提交 a36284ca 编写于 作者: Z ZhenWang

add cl_caller.

上级 34a290c8
无相关合并请求
......@@ -7,7 +7,8 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
set_target_properties(opencl-lib
PROPERTIES
IMPORTED_LOCATION
${CMAKE_SOURCE_DIR}/opencl-lib/armeabi-v7a/libOpenCL.so)
#${CMAKE_SOURCE_DIR}/opencl-lib/armeabi-v7a/libOpenCL.so)
${CMAKE_SOURCE_DIR}/opencl-lib/armeabi-v7a/libGLES_mali.so)
cc_library(cl_tool SRCS cl_tool.cc)
target_compile_options(cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers)
......@@ -18,7 +19,8 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
cc_library(cl_helper SRCS cl_helper.cc DEPS cl_context)
cc_library(cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor)
cc_library(cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine)
lite_cc_test(test_cl_runtime SRCS cl_test.cc DEPS cl_engine cl_context)
cc_library(cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image)
lite_cc_test(test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller)
target_link_libraries(test_cl_runtime opencl-lib)
add_dependencies(cl_tool opencl_clhpp)
endif()
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace paddle {
namespace lite {
static void CopyImageData(const CLImage& cl_image, float* out) {
int width = cl_image.image_dims()[0];
int height = cl_image.image_dims()[1];
half_t* image_data = new half_t[height * width * 4];
cl::Image* image = cl_image.cl_image();
const std::array<size_t, 3> origin{0, 0, 0};
const std::array<size_t, 3> region{static_cast<size_t>(width),
static_cast<size_t>(height), 1};
cl_int err = CLEngine::Global()->command_queue().enqueueReadImage(
*image, CL_TRUE, origin, region, 0, 0, image_data, nullptr, nullptr);
CL_CHECK_ERRORS(err);
auto* converter = cl_image.image_converter();
converter->ImageToNCHW(image_data, out, cl_image.image_dims(),
cl_image.tensor_dims());
delete[] image_data;
}
bool InitOpenCLEngine(std::string cl_path) {
auto* engine = CLEngine::Global();
engine->set_cl_path(cl_path);
return engine->IsInitSuccess();
}
void elementwise_add(CLContext* context, float* in, const DDim& in_dim,
float* bias, const DDim& bias_dim, float* out,
const DDim& out_dim) {
CLHelper helper(context);
helper.AddKernel("elementwise_add", "elementwise_add_kernel.cl");
auto kernel = helper.KernelAt(0);
CLImage in_image;
in_image.set_tensor_data(in, in_dim);
in_image.InitNormalCLImage(helper.OpenCLContext());
VLOG(3) << " --- Inpu image: " << in_image << " --- ";
CLImage bias_image;
bias_image.set_tensor_data(bias, bias_dim);
bias_image.InitNormalCLImage(helper.OpenCLContext());
VLOG(3) << " --- Bias image: " << bias_image << " --- ";
CLImage out_image;
out_image.InitEmptyImage(helper.OpenCLContext(), out_dim);
cl_int status;
status = kernel.setArg(0, *in_image.cl_image());
CL_CHECK_ERRORS(status);
status = kernel.setArg(1, *bias_image.cl_image());
CL_CHECK_ERRORS(status);
status = kernel.setArg(2, *out_image.cl_image());
CL_CHECK_ERRORS(status);
size_t width = in_image.ImageWidth();
size_t height = in_image.ImageHeight();
auto global_work_size = cl::NDRange{width, height};
status = helper.OpenCLCommandQueue().enqueueNDRangeKernel(
kernel, cl::NullRange, global_work_size, cl::NullRange, nullptr, nullptr);
CL_CHECK_ERRORS(status);
VLOG(3) << " --- Out image: " << out_image << " --- ";
CopyImageData(out_image, out);
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
namespace paddle {
namespace lite {
bool InitOpenCLEngine(std::string cl_path);
void elementwise_add(CLContext* context, float* in, const DDim& in_dim,
float* bias, const DDim& bias_dim, float* out,
const DDim& out_dim);
} // namespace lite
} // namespace paddle
......@@ -133,10 +133,10 @@ bool CLEngine::InitializePlatform() {
bool CLEngine::InitializeDevice() {
std::vector<cl::Device> all_devices;
status_ = platform_->getDevices(CL_DEVICE_TYPE_DEFAULT, &all_devices);
status_ = platform_->getDevices(CL_DEVICE_TYPE_GPU, &all_devices);
CL_CHECK_ERRORS(status_);
if (all_devices.empty()) {
LOG(ERROR) << "No OpenCL device found!";
LOG(ERROR) << "No OpenCL GPU device found!";
return false;
}
device_ = std::make_shared<cl::Device>();
......
......@@ -21,9 +21,12 @@ limitations under the License. */
namespace paddle {
namespace lite {
void CLHelper::set_context(CLContext *context) { context_ = context; }
void CLHelper::AddKernel(const std::string &kernel_name,
const std::string &file_name,
const std::string &options) {
CHECK(context_ != nullptr) << "Please use set_context first!";
VLOG(3) << " --- begin to add kernel ---";
auto kernel = context_->GetKernel(kernel_name, file_name, options);
kernels.emplace_back(std::move(kernel));
......@@ -32,16 +35,24 @@ void CLHelper::AddKernel(const std::string &kernel_name,
cl::Kernel &CLHelper::KernelAt(const int index) {
VLOG(3) << " --- kernel count: " << kernels.size() << " --- ";
CHECK(static_cast<size_t>(index) < kernels.size())
<< "The index must be less than the size of kernels.";
CHECK(kernels[index] != nullptr)
<< "The target kernel pointer cannot be null.";
return *(kernels[index]);
}
cl::CommandQueue &CLHelper::OpenCLCommandQueue() {
CHECK(context_ != nullptr) << "Please use set_context first!";
return context_->GetCommandQueue();
}
cl::Context &CLHelper::OpenCLContext() { return context_->GetContext(); }
cl::Context &CLHelper::OpenCLContext() {
CHECK(context_ != nullptr) << "Please use set_context first!";
return context_->GetContext();
}
std::vector<size_t> CLHelper::DefaultWorkSize(const CLImage &image) {
cl::NDRange CLHelper::DefaultWorkSize(const CLImage &image) {
// n c h w
auto image_dim = image.tensor_dims();
if (image_dim.size() == 4) {
......@@ -52,23 +63,26 @@ std::vector<size_t> CLHelper::DefaultWorkSize(const CLImage &image) {
auto work_size_0 = image_width / w;
auto work_size_1 = w;
auto work_size_2 = n * h;
return {static_cast<size_t>(work_size_0), static_cast<size_t>(work_size_1),
return cl::NDRange{static_cast<size_t>(work_size_0),
static_cast<size_t>(work_size_1),
static_cast<size_t>(work_size_2)};
} else if (image_dim.size() == 2) {
return {static_cast<size_t>(1), static_cast<size_t>(image.ImageWidth()),
return cl::NDRange{static_cast<size_t>(1),
static_cast<size_t>(image.ImageWidth()),
static_cast<size_t>(image.ImageHeight())};
} else if (image_dim.size() == 1) {
return {static_cast<size_t>(1), static_cast<size_t>(image.ImageWidth()),
return cl::NDRange{static_cast<size_t>(1),
static_cast<size_t>(image.ImageWidth()),
static_cast<size_t>(1)};
} else if (image_dim.size() == 3) {
auto c = image_dim[0];
auto h = image_dim[1];
auto w = image_dim[2];
return {static_cast<size_t>((c + 3) / 4), static_cast<size_t>(w),
return cl::NDRange{static_cast<size_t>((c + 3) / 4), static_cast<size_t>(w),
static_cast<size_t>(h)};
} else {
LOG(FATAL) << "Not support this dimension, need to be implemented!";
return {};
return cl::NDRange{};
}
}
......
......@@ -30,6 +30,8 @@ class CLHelper {
explicit CLHelper(CLContext *context) : context_(context) {}
void set_context(CLContext *context);
void AddKernel(const std::string &kernel_name, const std::string &file_name,
const std::string &options = "");
......@@ -39,10 +41,10 @@ class CLHelper {
cl::Context &OpenCLContext();
std::vector<size_t> DefaultWorkSize(const CLImage &image);
cl::NDRange DefaultWorkSize(const CLImage &image);
private:
CLContext *context_;
CLContext *context_{nullptr};
std::vector<std::unique_ptr<cl::Kernel>> kernels;
};
......
......@@ -27,12 +27,12 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
int height = cl_image.image_dims_[1];
half_t* image_data = new half_t[height * width * 4];
cl::Image2D& image = cl_image.cl_image();
cl::Image* image = cl_image.cl_image();
const std::array<size_t, 3> origin{0, 0, 0};
const std::array<size_t, 3> region{static_cast<size_t>(width),
static_cast<size_t>(height), 1};
cl_int err = CLEngine::Global()->command_queue().enqueueReadImage(
image, CL_TRUE, origin, region, 0, 0, image_data, nullptr, nullptr);
*image, CL_TRUE, origin, region, 0, 0, image_data, nullptr, nullptr);
CL_CHECK_ERRORS(err);
float* tensor_data = new float[cl_image.numel()];
......@@ -53,7 +53,7 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
return os;
}
void CLImage::SetTensorData(float* tensor_data, const DDim& dim) {
void CLImage::set_tensor_data(float* tensor_data, const DDim& dim) {
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
auto numel = dim.product();
#else
......@@ -65,28 +65,30 @@ void CLImage::SetTensorData(float* tensor_data, const DDim& dim) {
}
void CLImage::InitCLImage(const cl::Context& context) {
CHECK(tensor_data_ != nullptr) << " Please call SetTensorData first!";
CHECK(tensor_data_ != nullptr) << " Please call "
"set_tensohelper->DefaultWorkSize(out_"
"image)r_data first!";
image_converter_.reset(new CLImageConverterFolder);
InitCLImage(context, image_converter_.get());
}
void CLImage::InitNormalCLImage(const cl::Context& context) {
CHECK(tensor_data_ != nullptr) << " Please call SetTensorData first!";
CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!";
image_converter_.reset(new CLImageConverterNormal);
InitCLImage(context, image_converter_.get());
}
void CLImage::InitNImage(const cl::Context& context) {
CHECK(tensor_data_ != nullptr) << " Please call SetTensorData first!";
CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!";
CHECK(tensor_dims_.size() == 4) << " Tensor dim is not 4.";
image_converter_.reset(new CLImageConverterNWBlock());
image_converter_.reset(new CLImageConverterNWBlock);
InitCLImage(context, image_converter_.get());
}
void CLImage::InitDWImage(const cl::Context& context) {
CHECK(tensor_data_ != nullptr) << " Please call SetTensorData first!";
CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!";
CHECK(tensor_dims_.size() == 4) << " Tensor dim is not 4.";
image_converter_.reset(new CLImageConverterDWBlock());
image_converter_.reset(new CLImageConverterDWBlock);
InitCLImage(context, image_converter_.get());
}
......@@ -95,7 +97,7 @@ void CLImage::InitEmptyImage(const cl::Context& context, const DDim& dim) {
<< " Empty image tensor data shouldn't have value";
tensor_dims_ = dim;
image_converter_.reset(new CLImageConverterNormal());
image_converter_.reset(new CLImageConverterNormal);
VLOG(3) << " to get image dims ";
image_dims_ = image_converter_->InitImageDimInfoWith(tensor_dims_);
......@@ -123,7 +125,7 @@ void CLImage::InitEmptyWithImageDim(const cl::Context& context,
void CLImage::InitCLImage(const cl::Context& context,
CLImageConverterBase* converter) {
CHECK(tensor_data_ != nullptr) << " Please call SetTensorData first!";
CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!";
VLOG(3) << " begin init cl image ";
image_dims_ = converter->InitImageDimInfoWith(tensor_dims_);
......
......@@ -33,11 +33,11 @@ class CLImage {
/*
* Will not hold input tensor data, memcpy in this method.
* */
void SetTensorData(float* tensor_data, const DDim& dim);
void set_tensor_data(float* tensor_data, const DDim& dim);
bool IsInit() { return initialized_; }
/*
* Need call SetTensorData first.
* Need call set_tensor_data first.
* Folder when one dim or two dim.
* */
void InitCLImage(const cl::Context& context);
......@@ -53,7 +53,7 @@ class CLImage {
void InitEmptyWithImageDim(const cl::Context& context,
const DDim& image_dims);
cl::Image2D& cl_image() const { return *cl_image_; }
cl::Image* cl_image() const { return cl_image_.get(); }
const DDim& image_dims() const { return image_dims_; }
......@@ -63,7 +63,7 @@ class CLImage {
const DDim& tensor_dims() const { return tensor_dims_; }
/*
/*with_da
* Resize original tensor dim.
* */
inline CLImage& Resize(const DDim& dims) {
......
......@@ -12,10 +12,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory>
#include <random>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
DEFINE_string(cl_path, "/data/local/tmp/opencl", "The OpenCL kernels path.");
namespace paddle {
namespace lite {
......@@ -23,7 +33,7 @@ namespace lite {
TEST(cl_test, engine_test) {
auto* engine = CLEngine::Global();
CHECK(engine->IsInitSuccess());
engine->set_cl_path("/data/local/tmp/opencl");
engine->set_cl_path(FLAGS_cl_path);
engine->platform();
engine->device();
engine->command_queue();
......@@ -37,11 +47,108 @@ TEST(cl_test, engine_test) {
TEST(cl_test, context_test) {
auto* engine = CLEngine::Global();
CHECK(engine->IsInitSuccess());
engine->set_cl_path("/data/local/tmp/opencl");
engine->set_cl_path(FLAGS_cl_path);
CLContext context;
context.GetKernel("pool_max", "pool_kernel.cl", "");
context.GetKernel("elementwise_add", "elementwise_add_kernel.cl", "");
context.GetKernel("elementwise_add", "elementwise_add_kernel.cl", "");
}
TEST(cl_test, kernel_test) {
auto* engine = CLEngine::Global();
CHECK(engine->IsInitSuccess());
engine->set_cl_path(FLAGS_cl_path);
std::unique_ptr<CLContext> context(new CLContext);
// std::unique_ptr<CLHelper> helper(new CLHelper(context.get()));
std::unique_ptr<CLHelper> helper(new CLHelper);
helper->set_context(context.get());
helper->AddKernel("elementwise_add", "elementwise_add_kernel.cl");
helper->AddKernel("pool_max", "pool_kernel.cl");
helper->AddKernel("elementwise_add", "elementwise_add_kernel.cl");
auto kernel = helper->KernelAt(2);
std::unique_ptr<float[]> in_data(new float[1024 * 512]);
for (int i = 0; i < 1024 * 512; i++) {
in_data[i] = 1.f;
}
const DDim in_dim = DDim(std::vector<DDim::value_type>{1024, 512});
CLImage in_image;
in_image.set_tensor_data(in_data.get(), in_dim);
in_image.InitNormalCLImage(helper->OpenCLContext());
LOG(INFO) << in_image;
std::unique_ptr<float[]> bias_data(new float[1024 * 512]);
for (int i = 0; i < 1024 * 512; i++) {
bias_data[i] = 2.f;
}
const DDim bias_dim = DDim(std::vector<DDim::value_type>{1024, 512});
CLImage bias_image;
bias_image.set_tensor_data(bias_data.get(), bias_dim);
bias_image.InitNormalCLImage(helper->OpenCLContext());
LOG(INFO) << bias_image;
CLImage out_image;
const DDim out_dim = DDim(std::vector<DDim::value_type>{1024, 512});
out_image.InitEmptyImage(helper->OpenCLContext(), out_dim);
LOG(INFO) << out_image;
cl_int status;
status = kernel.setArg(0, *in_image.cl_image());
CL_CHECK_ERRORS(status);
status = kernel.setArg(1, *bias_image.cl_image());
CL_CHECK_ERRORS(status);
status = kernel.setArg(2, *out_image.cl_image());
CL_CHECK_ERRORS(status);
// auto global_work_size = helper->DefaultWorkSize(out_image);
size_t width = in_image.ImageWidth();
size_t height = in_image.ImageHeight();
auto global_work_size = cl::NDRange{width, height};
cl::Event event;
status = helper->OpenCLCommandQueue().enqueueNDRangeKernel(
kernel, cl::NullRange, global_work_size, cl::NullRange, nullptr, &event);
CL_CHECK_ERRORS(status);
double start_nanos = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
double stop_nanos = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
double elapsed_micros = (stop_nanos - start_nanos) / 1000.0;
LOG(INFO) << "Kernel Run Cost Time: " << elapsed_micros << " us.";
LOG(INFO) << out_image;
}
TEST(cl_test, elementwise_add_test) {
std::default_random_engine engine;
std::uniform_real_distribution<float> dist(-5, 5);
const DDim in_dim = DDim(std::vector<DDim::value_type>{1024, 512});
std::unique_ptr<float[]> in_data(new float[1024 * 512]);
for (int i = 0; i < 1024 * 512; i++) {
in_data[i] = dist(engine);
}
const DDim bias_dim = DDim(std::vector<DDim::value_type>{1024, 512});
std::unique_ptr<float[]> bias_data(new float[1024 * 512]);
for (int i = 0; i < 1024 * 512; i++) {
bias_data[i] = dist(engine);
}
const DDim out_dim = DDim(std::vector<DDim::value_type>{1024, 512});
std::unique_ptr<float[]> out(new float[1024 * 512]);
bool status = InitOpenCLEngine(FLAGS_cl_path);
CHECK(status) << "Fail to initialize OpenCL engine.";
CLContext context;
elementwise_add(&context, in_data.get(), in_dim, bias_data.get(), bias_dim,
out.get(), out_dim);
int stride = 1024 * 512 / 20;
for (int i = 0; i < 1024 * 512; i += stride) {
std::cout << out[i] << " ";
}
std::cout << std::endl;
}
} // namespace lite
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部