From a36284ca711bec7ece769b91059181f26846ec33 Mon Sep 17 00:00:00 2001 From: ZhenWang Date: Fri, 21 Jun 2019 20:56:00 +0800 Subject: [PATCH] add cl_caller. --- paddle/fluid/lite/opencl/CMakeLists.txt | 6 +- paddle/fluid/lite/opencl/cl_caller.cc | 88 +++++++++++++++++++ paddle/fluid/lite/opencl/cl_caller.h | 30 +++++++ paddle/fluid/lite/opencl/cl_engine.cc | 4 +- paddle/fluid/lite/opencl/cl_helper.cc | 36 +++++--- paddle/fluid/lite/opencl/cl_helper.h | 6 +- paddle/fluid/lite/opencl/cl_image.cc | 24 ++--- paddle/fluid/lite/opencl/cl_image.h | 8 +- paddle/fluid/lite/opencl/cl_test.cc | 111 +++++++++++++++++++++++- 9 files changed, 279 insertions(+), 34 deletions(-) create mode 100644 paddle/fluid/lite/opencl/cl_caller.cc create mode 100644 paddle/fluid/lite/opencl/cl_caller.h diff --git a/paddle/fluid/lite/opencl/CMakeLists.txt b/paddle/fluid/lite/opencl/CMakeLists.txt index 00c0e35b074..06072a1067d 100644 --- a/paddle/fluid/lite/opencl/CMakeLists.txt +++ b/paddle/fluid/lite/opencl/CMakeLists.txt @@ -7,7 +7,8 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) set_target_properties(opencl-lib PROPERTIES IMPORTED_LOCATION - ${CMAKE_SOURCE_DIR}/opencl-lib/armeabi-v7a/libOpenCL.so) + #${CMAKE_SOURCE_DIR}/opencl-lib/armeabi-v7a/libOpenCL.so) + ${CMAKE_SOURCE_DIR}/opencl-lib/armeabi-v7a/libGLES_mali.so) cc_library(cl_tool SRCS cl_tool.cc) target_compile_options(cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers) @@ -18,7 +19,8 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) cc_library(cl_helper SRCS cl_helper.cc DEPS cl_context) cc_library(cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor) cc_library(cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine) - lite_cc_test(test_cl_runtime SRCS cl_test.cc DEPS cl_engine cl_context) + cc_library(cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image) + lite_cc_test(test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller) target_link_libraries(test_cl_runtime opencl-lib) add_dependencies(cl_tool opencl_clhpp) endif() diff --git a/paddle/fluid/lite/opencl/cl_caller.cc b/paddle/fluid/lite/opencl/cl_caller.cc new file mode 100644 index 00000000000..fbb970dea2d --- /dev/null +++ b/paddle/fluid/lite/opencl/cl_caller.cc @@ -0,0 +1,88 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/lite/opencl/cl_caller.h" +#include +#include "paddle/fluid/lite/core/compatible_tensor.h" +#include "paddle/fluid/lite/opencl/cl_context.h" +#include "paddle/fluid/lite/opencl/cl_engine.h" +#include "paddle/fluid/lite/opencl/cl_helper.h" +#include "paddle/fluid/lite/opencl/cl_image.h" +#include "paddle/fluid/lite/opencl/cl_tool.h" + +namespace paddle { +namespace lite { +static void CopyImageData(const CLImage& cl_image, float* out) { + int width = cl_image.image_dims()[0]; + int height = cl_image.image_dims()[1]; + + half_t* image_data = new half_t[height * width * 4]; + cl::Image* image = cl_image.cl_image(); + const std::array origin{0, 0, 0}; + const std::array region{static_cast(width), + static_cast(height), 1}; + cl_int err = CLEngine::Global()->command_queue().enqueueReadImage( + *image, CL_TRUE, origin, region, 0, 0, image_data, nullptr, nullptr); + CL_CHECK_ERRORS(err); + + auto* converter = cl_image.image_converter(); + converter->ImageToNCHW(image_data, out, cl_image.image_dims(), + cl_image.tensor_dims()); + + delete[] image_data; +} + +bool InitOpenCLEngine(std::string cl_path) { + auto* engine = CLEngine::Global(); + engine->set_cl_path(cl_path); + return engine->IsInitSuccess(); +} + +void elementwise_add(CLContext* context, float* in, const DDim& in_dim, + float* bias, const DDim& bias_dim, float* out, + const DDim& out_dim) { + CLHelper helper(context); + helper.AddKernel("elementwise_add", "elementwise_add_kernel.cl"); + auto kernel = helper.KernelAt(0); + CLImage in_image; + in_image.set_tensor_data(in, in_dim); + in_image.InitNormalCLImage(helper.OpenCLContext()); + VLOG(3) << " --- Inpu image: " << in_image << " --- "; + CLImage bias_image; + bias_image.set_tensor_data(bias, bias_dim); + bias_image.InitNormalCLImage(helper.OpenCLContext()); + VLOG(3) << " --- Bias image: " << bias_image << " --- "; + CLImage out_image; + out_image.InitEmptyImage(helper.OpenCLContext(), out_dim); + cl_int status; + status = kernel.setArg(0, *in_image.cl_image()); + CL_CHECK_ERRORS(status); + status = kernel.setArg(1, *bias_image.cl_image()); + CL_CHECK_ERRORS(status); + status = kernel.setArg(2, *out_image.cl_image()); + CL_CHECK_ERRORS(status); + size_t width = in_image.ImageWidth(); + size_t height = in_image.ImageHeight(); + auto global_work_size = cl::NDRange{width, height}; + status = helper.OpenCLCommandQueue().enqueueNDRangeKernel( + kernel, cl::NullRange, global_work_size, cl::NullRange, nullptr, nullptr); + CL_CHECK_ERRORS(status); + + VLOG(3) << " --- Out image: " << out_image << " --- "; + + CopyImageData(out_image, out); +} + +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/opencl/cl_caller.h b/paddle/fluid/lite/opencl/cl_caller.h new file mode 100644 index 00000000000..ed9b879fae2 --- /dev/null +++ b/paddle/fluid/lite/opencl/cl_caller.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include "paddle/fluid/lite/core/compatible_tensor.h" +#include "paddle/fluid/lite/opencl/cl_context.h" + +namespace paddle { +namespace lite { + +bool InitOpenCLEngine(std::string cl_path); +void elementwise_add(CLContext* context, float* in, const DDim& in_dim, + float* bias, const DDim& bias_dim, float* out, + const DDim& out_dim); + +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/opencl/cl_engine.cc b/paddle/fluid/lite/opencl/cl_engine.cc index 97301ecba28..be82ba23cbb 100644 --- a/paddle/fluid/lite/opencl/cl_engine.cc +++ b/paddle/fluid/lite/opencl/cl_engine.cc @@ -133,10 +133,10 @@ bool CLEngine::InitializePlatform() { bool CLEngine::InitializeDevice() { std::vector all_devices; - status_ = platform_->getDevices(CL_DEVICE_TYPE_DEFAULT, &all_devices); + status_ = platform_->getDevices(CL_DEVICE_TYPE_GPU, &all_devices); CL_CHECK_ERRORS(status_); if (all_devices.empty()) { - LOG(ERROR) << "No OpenCL device found!"; + LOG(ERROR) << "No OpenCL GPU device found!"; return false; } device_ = std::make_shared(); diff --git a/paddle/fluid/lite/opencl/cl_helper.cc b/paddle/fluid/lite/opencl/cl_helper.cc index 2a886d0b06e..116828c153d 100644 --- a/paddle/fluid/lite/opencl/cl_helper.cc +++ b/paddle/fluid/lite/opencl/cl_helper.cc @@ -21,9 +21,12 @@ limitations under the License. */ namespace paddle { namespace lite { +void CLHelper::set_context(CLContext *context) { context_ = context; } + void CLHelper::AddKernel(const std::string &kernel_name, const std::string &file_name, const std::string &options) { + CHECK(context_ != nullptr) << "Please use set_context first!"; VLOG(3) << " --- begin to add kernel ---"; auto kernel = context_->GetKernel(kernel_name, file_name, options); kernels.emplace_back(std::move(kernel)); @@ -32,16 +35,24 @@ void CLHelper::AddKernel(const std::string &kernel_name, cl::Kernel &CLHelper::KernelAt(const int index) { VLOG(3) << " --- kernel count: " << kernels.size() << " --- "; + CHECK(static_cast(index) < kernels.size()) + << "The index must be less than the size of kernels."; + CHECK(kernels[index] != nullptr) + << "The target kernel pointer cannot be null."; return *(kernels[index]); } cl::CommandQueue &CLHelper::OpenCLCommandQueue() { + CHECK(context_ != nullptr) << "Please use set_context first!"; return context_->GetCommandQueue(); } -cl::Context &CLHelper::OpenCLContext() { return context_->GetContext(); } +cl::Context &CLHelper::OpenCLContext() { + CHECK(context_ != nullptr) << "Please use set_context first!"; + return context_->GetContext(); +} -std::vector CLHelper::DefaultWorkSize(const CLImage &image) { +cl::NDRange CLHelper::DefaultWorkSize(const CLImage &image) { // n c h w auto image_dim = image.tensor_dims(); if (image_dim.size() == 4) { @@ -52,23 +63,26 @@ std::vector CLHelper::DefaultWorkSize(const CLImage &image) { auto work_size_0 = image_width / w; auto work_size_1 = w; auto work_size_2 = n * h; - return {static_cast(work_size_0), static_cast(work_size_1), - static_cast(work_size_2)}; + return cl::NDRange{static_cast(work_size_0), + static_cast(work_size_1), + static_cast(work_size_2)}; } else if (image_dim.size() == 2) { - return {static_cast(1), static_cast(image.ImageWidth()), - static_cast(image.ImageHeight())}; + return cl::NDRange{static_cast(1), + static_cast(image.ImageWidth()), + static_cast(image.ImageHeight())}; } else if (image_dim.size() == 1) { - return {static_cast(1), static_cast(image.ImageWidth()), - static_cast(1)}; + return cl::NDRange{static_cast(1), + static_cast(image.ImageWidth()), + static_cast(1)}; } else if (image_dim.size() == 3) { auto c = image_dim[0]; auto h = image_dim[1]; auto w = image_dim[2]; - return {static_cast((c + 3) / 4), static_cast(w), - static_cast(h)}; + return cl::NDRange{static_cast((c + 3) / 4), static_cast(w), + static_cast(h)}; } else { LOG(FATAL) << "Not support this dimension, need to be implemented!"; - return {}; + return cl::NDRange{}; } } diff --git a/paddle/fluid/lite/opencl/cl_helper.h b/paddle/fluid/lite/opencl/cl_helper.h index fce1929db59..f6f89fb6fda 100644 --- a/paddle/fluid/lite/opencl/cl_helper.h +++ b/paddle/fluid/lite/opencl/cl_helper.h @@ -30,6 +30,8 @@ class CLHelper { explicit CLHelper(CLContext *context) : context_(context) {} + void set_context(CLContext *context); + void AddKernel(const std::string &kernel_name, const std::string &file_name, const std::string &options = ""); @@ -39,10 +41,10 @@ class CLHelper { cl::Context &OpenCLContext(); - std::vector DefaultWorkSize(const CLImage &image); + cl::NDRange DefaultWorkSize(const CLImage &image); private: - CLContext *context_; + CLContext *context_{nullptr}; std::vector> kernels; }; diff --git a/paddle/fluid/lite/opencl/cl_image.cc b/paddle/fluid/lite/opencl/cl_image.cc index 8edcf000b03..2c551d6cd33 100644 --- a/paddle/fluid/lite/opencl/cl_image.cc +++ b/paddle/fluid/lite/opencl/cl_image.cc @@ -27,12 +27,12 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) { int height = cl_image.image_dims_[1]; half_t* image_data = new half_t[height * width * 4]; - cl::Image2D& image = cl_image.cl_image(); + cl::Image* image = cl_image.cl_image(); const std::array origin{0, 0, 0}; const std::array region{static_cast(width), static_cast(height), 1}; cl_int err = CLEngine::Global()->command_queue().enqueueReadImage( - image, CL_TRUE, origin, region, 0, 0, image_data, nullptr, nullptr); + *image, CL_TRUE, origin, region, 0, 0, image_data, nullptr, nullptr); CL_CHECK_ERRORS(err); float* tensor_data = new float[cl_image.numel()]; @@ -53,7 +53,7 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) { return os; } -void CLImage::SetTensorData(float* tensor_data, const DDim& dim) { +void CLImage::set_tensor_data(float* tensor_data, const DDim& dim) { #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK auto numel = dim.product(); #else @@ -65,28 +65,30 @@ void CLImage::SetTensorData(float* tensor_data, const DDim& dim) { } void CLImage::InitCLImage(const cl::Context& context) { - CHECK(tensor_data_ != nullptr) << " Please call SetTensorData first!"; + CHECK(tensor_data_ != nullptr) << " Please call " + "set_tensohelper->DefaultWorkSize(out_" + "image)r_data first!"; image_converter_.reset(new CLImageConverterFolder); InitCLImage(context, image_converter_.get()); } void CLImage::InitNormalCLImage(const cl::Context& context) { - CHECK(tensor_data_ != nullptr) << " Please call SetTensorData first!"; + CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!"; image_converter_.reset(new CLImageConverterNormal); InitCLImage(context, image_converter_.get()); } void CLImage::InitNImage(const cl::Context& context) { - CHECK(tensor_data_ != nullptr) << " Please call SetTensorData first!"; + CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!"; CHECK(tensor_dims_.size() == 4) << " Tensor dim is not 4."; - image_converter_.reset(new CLImageConverterNWBlock()); + image_converter_.reset(new CLImageConverterNWBlock); InitCLImage(context, image_converter_.get()); } void CLImage::InitDWImage(const cl::Context& context) { - CHECK(tensor_data_ != nullptr) << " Please call SetTensorData first!"; + CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!"; CHECK(tensor_dims_.size() == 4) << " Tensor dim is not 4."; - image_converter_.reset(new CLImageConverterDWBlock()); + image_converter_.reset(new CLImageConverterDWBlock); InitCLImage(context, image_converter_.get()); } @@ -95,7 +97,7 @@ void CLImage::InitEmptyImage(const cl::Context& context, const DDim& dim) { << " Empty image tensor data shouldn't have value"; tensor_dims_ = dim; - image_converter_.reset(new CLImageConverterNormal()); + image_converter_.reset(new CLImageConverterNormal); VLOG(3) << " to get image dims "; image_dims_ = image_converter_->InitImageDimInfoWith(tensor_dims_); @@ -123,7 +125,7 @@ void CLImage::InitEmptyWithImageDim(const cl::Context& context, void CLImage::InitCLImage(const cl::Context& context, CLImageConverterBase* converter) { - CHECK(tensor_data_ != nullptr) << " Please call SetTensorData first!"; + CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!"; VLOG(3) << " begin init cl image "; image_dims_ = converter->InitImageDimInfoWith(tensor_dims_); diff --git a/paddle/fluid/lite/opencl/cl_image.h b/paddle/fluid/lite/opencl/cl_image.h index 9b827d9818a..627e503168e 100644 --- a/paddle/fluid/lite/opencl/cl_image.h +++ b/paddle/fluid/lite/opencl/cl_image.h @@ -33,11 +33,11 @@ class CLImage { /* * Will not hold input tensor data, memcpy in this method. * */ - void SetTensorData(float* tensor_data, const DDim& dim); + void set_tensor_data(float* tensor_data, const DDim& dim); bool IsInit() { return initialized_; } /* - * Need call SetTensorData first. + * Need call set_tensor_data first. * Folder when one dim or two dim. * */ void InitCLImage(const cl::Context& context); @@ -53,7 +53,7 @@ class CLImage { void InitEmptyWithImageDim(const cl::Context& context, const DDim& image_dims); - cl::Image2D& cl_image() const { return *cl_image_; } + cl::Image* cl_image() const { return cl_image_.get(); } const DDim& image_dims() const { return image_dims_; } @@ -63,7 +63,7 @@ class CLImage { const DDim& tensor_dims() const { return tensor_dims_; } - /* + /*with_da * Resize original tensor dim. * */ inline CLImage& Resize(const DDim& dims) { diff --git a/paddle/fluid/lite/opencl/cl_test.cc b/paddle/fluid/lite/opencl/cl_test.cc index 0d3d9d7a389..4a4ac965c11 100644 --- a/paddle/fluid/lite/opencl/cl_test.cc +++ b/paddle/fluid/lite/opencl/cl_test.cc @@ -12,10 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include +#include +#include +#include +#include "paddle/fluid/lite/core/compatible_tensor.h" +#include "paddle/fluid/lite/opencl/cl_caller.h" #include "paddle/fluid/lite/opencl/cl_context.h" #include "paddle/fluid/lite/opencl/cl_engine.h" +#include "paddle/fluid/lite/opencl/cl_helper.h" +#include "paddle/fluid/lite/opencl/cl_image.h" + +DEFINE_string(cl_path, "/data/local/tmp/opencl", "The OpenCL kernels path."); namespace paddle { namespace lite { @@ -23,7 +33,7 @@ namespace lite { TEST(cl_test, engine_test) { auto* engine = CLEngine::Global(); CHECK(engine->IsInitSuccess()); - engine->set_cl_path("/data/local/tmp/opencl"); + engine->set_cl_path(FLAGS_cl_path); engine->platform(); engine->device(); engine->command_queue(); @@ -37,11 +47,108 @@ TEST(cl_test, engine_test) { TEST(cl_test, context_test) { auto* engine = CLEngine::Global(); CHECK(engine->IsInitSuccess()); - engine->set_cl_path("/data/local/tmp/opencl"); + engine->set_cl_path(FLAGS_cl_path); CLContext context; context.GetKernel("pool_max", "pool_kernel.cl", ""); context.GetKernel("elementwise_add", "elementwise_add_kernel.cl", ""); context.GetKernel("elementwise_add", "elementwise_add_kernel.cl", ""); } + +TEST(cl_test, kernel_test) { + auto* engine = CLEngine::Global(); + CHECK(engine->IsInitSuccess()); + engine->set_cl_path(FLAGS_cl_path); + std::unique_ptr context(new CLContext); + // std::unique_ptr helper(new CLHelper(context.get())); + std::unique_ptr helper(new CLHelper); + helper->set_context(context.get()); + helper->AddKernel("elementwise_add", "elementwise_add_kernel.cl"); + helper->AddKernel("pool_max", "pool_kernel.cl"); + helper->AddKernel("elementwise_add", "elementwise_add_kernel.cl"); + auto kernel = helper->KernelAt(2); + + std::unique_ptr in_data(new float[1024 * 512]); + for (int i = 0; i < 1024 * 512; i++) { + in_data[i] = 1.f; + } + const DDim in_dim = DDim(std::vector{1024, 512}); + CLImage in_image; + in_image.set_tensor_data(in_data.get(), in_dim); + in_image.InitNormalCLImage(helper->OpenCLContext()); + LOG(INFO) << in_image; + + std::unique_ptr bias_data(new float[1024 * 512]); + for (int i = 0; i < 1024 * 512; i++) { + bias_data[i] = 2.f; + } + const DDim bias_dim = DDim(std::vector{1024, 512}); + CLImage bias_image; + bias_image.set_tensor_data(bias_data.get(), bias_dim); + bias_image.InitNormalCLImage(helper->OpenCLContext()); + LOG(INFO) << bias_image; + + CLImage out_image; + const DDim out_dim = DDim(std::vector{1024, 512}); + out_image.InitEmptyImage(helper->OpenCLContext(), out_dim); + LOG(INFO) << out_image; + + cl_int status; + status = kernel.setArg(0, *in_image.cl_image()); + CL_CHECK_ERRORS(status); + status = kernel.setArg(1, *bias_image.cl_image()); + CL_CHECK_ERRORS(status); + status = kernel.setArg(2, *out_image.cl_image()); + CL_CHECK_ERRORS(status); + + // auto global_work_size = helper->DefaultWorkSize(out_image); + size_t width = in_image.ImageWidth(); + size_t height = in_image.ImageHeight(); + auto global_work_size = cl::NDRange{width, height}; + cl::Event event; + status = helper->OpenCLCommandQueue().enqueueNDRangeKernel( + kernel, cl::NullRange, global_work_size, cl::NullRange, nullptr, &event); + CL_CHECK_ERRORS(status); + + double start_nanos = event.getProfilingInfo(); + double stop_nanos = event.getProfilingInfo(); + double elapsed_micros = (stop_nanos - start_nanos) / 1000.0; + LOG(INFO) << "Kernel Run Cost Time: " << elapsed_micros << " us."; + LOG(INFO) << out_image; +} + +TEST(cl_test, elementwise_add_test) { + std::default_random_engine engine; + std::uniform_real_distribution dist(-5, 5); + + const DDim in_dim = DDim(std::vector{1024, 512}); + std::unique_ptr in_data(new float[1024 * 512]); + for (int i = 0; i < 1024 * 512; i++) { + in_data[i] = dist(engine); + } + + const DDim bias_dim = DDim(std::vector{1024, 512}); + std::unique_ptr bias_data(new float[1024 * 512]); + for (int i = 0; i < 1024 * 512; i++) { + bias_data[i] = dist(engine); + } + + const DDim out_dim = DDim(std::vector{1024, 512}); + std::unique_ptr out(new float[1024 * 512]); + + bool status = InitOpenCLEngine(FLAGS_cl_path); + CHECK(status) << "Fail to initialize OpenCL engine."; + CLContext context; + + elementwise_add(&context, in_data.get(), in_dim, bias_data.get(), bias_dim, + out.get(), out_dim); + + int stride = 1024 * 512 / 20; + for (int i = 0; i < 1024 * 512; i += stride) { + std::cout << out[i] << " "; + } + + std::cout << std::endl; +} + } // namespace lite } // namespace paddle -- GitLab