提交 76f4b1fc 编写于 作者: C Chunwei

Init OpenCL support for lite framework.

...@@ -150,6 +150,7 @@ option(WITH_LITE "Enable lite framework" OFF) ...@@ -150,6 +150,7 @@ option(WITH_LITE "Enable lite framework" OFF)
option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF) option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
option(LITE_WITH_X86 "Enable X86 in lite mode" ON) option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
option(LITE_WITH_ARM "Enable ARM in lite mode" OFF) option(LITE_WITH_ARM "Enable ARM in lite mode" OFF)
option(LITE_WITH_OPENCL "Enable OpenCL support in lite" OFF)
option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF) option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF)
option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF) option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF)
...@@ -181,6 +182,12 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) ...@@ -181,6 +182,12 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
include(ccache) # set ccache for compilation include(ccache) # set ccache for compilation
# for opencl
if (LITE_WITH_OPENCL)
include(external/opencl-headers)
include(external/opencl-clhpp)
endif()
include(generic) # simplify cmake module include(generic) # simplify cmake module
include(configure) # add paddle env configuration include(configure) # add paddle env configuration
......
...@@ -176,6 +176,10 @@ if (LITE_WITH_ARM) ...@@ -176,6 +176,10 @@ if (LITE_WITH_ARM)
add_definitions("-DLITE_WITH_ARM") add_definitions("-DLITE_WITH_ARM")
endif() endif()
if (LITE_WITH_OPENCL)
add_definitions("-DLITE_WITH_OPENCL")
endif()
if (LITE_WITH_PROFILE) if (LITE_WITH_PROFILE)
add_definitions("-DLITE_WITH_PROFILE") add_definitions("-DLITE_WITH_PROFILE")
endif() endif()
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(OPENCL_CLHPP_SRCS_DIR ${THIRD_PARTY_PATH}/opencl-clhpp)
SET(OPENCL_CLHPP_INSTALL_DIR ${THIRD_PARTY_PATH}/install/opencl-clhpp)
SET(OPENCL_CLHPP_INCLUDE_DIR "${OPENCL_CLHPP_INSTALL_DIR}" CACHE PATH "opencl-clhpp include directory." FORCE)
INCLUDE_DIRECTORIES(${OPENCL_CLHPP_INCLUDE_DIR})
ExternalProject_Add(
opencl_clhpp
GIT_REPOSITORY "https://github.com/KhronosGroup/OpenCL-CLHPP.git"
GIT_TAG "v2.0.10"
PREFIX "${OPENCL_CLHPP_SRCS_DIR}"
CMAKE_ARGS -DBUILD_DOCS=OFF
-DBUILD_EXAMPLES=OFF
-DBUILD_TESTS=OFF
-DCMAKE_INSTALL_PREFIX=${OPENCL_CLHPP_INSTALL_DIR}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${OPENCL_CLHPP_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
)
ADD_DEPENDENCIES(opencl_clhpp opencl_headers)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(OPENCL_HEADERS_SRCS_DIR ${THIRD_PARTY_PATH}/opencl-headers)
SET(OPENCL_HEADERS_INCLUDE_DIR "${OPENCL_HEADERS_SRCS_DIR}/src/opencl_headers" CACHE PATH "opencl-headers include directory." FORCE)
INCLUDE_DIRECTORIES(${OPENCL_HEADERS_INCLUDE_DIR})
ExternalProject_Add(
opencl_headers
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/KhronosGroup/OpenCL-Headers.git"
GIT_TAG "c5a4bbeabb10d8ed3d1c651b93aa31737bc473dd"
PREFIX ${OPENCL_HEADERS_SRCS_DIR}
DOWNLOAD_NAME "OpenCL-Headers"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
...@@ -182,6 +182,7 @@ add_subdirectory(x86) ...@@ -182,6 +182,7 @@ add_subdirectory(x86)
add_subdirectory(arm) add_subdirectory(arm)
add_subdirectory(host) add_subdirectory(host)
add_subdirectory(cuda) add_subdirectory(cuda)
add_subdirectory(opencl)
add_subdirectory(model_parser) add_subdirectory(model_parser)
add_subdirectory(utils) add_subdirectory(utils)
add_subdirectory(api) add_subdirectory(api)
......
if (NOT LITE_WITH_OPENCL)
return()
endif()
if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
cc_library(cl_wrapper SRCS cl_wrapper.cc)
cc_library(cl_tool SRCS cl_tool.cc)
target_compile_options(cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers)
cc_library(cl_half SRCS cl_half.cc)
target_compile_options(cl_half BEFORE PUBLIC -fno-strict-aliasing)
cc_library(cl_engine SRCS cl_engine.cc DEPS cl_tool)
cc_library(cl_context SRCS cl_context.cc DEPS cl_engine)
cc_library(cl_helper SRCS cl_helper.cc DEPS cl_context)
cc_library(cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor)
cc_library(cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine)
cc_library(cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image)
lite_cc_test(test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller cl_wrapper)
add_dependencies(cl_tool opencl_clhpp)
endif()
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#define CL_TARGET_OPENCL_VERSION 200
#define CL_HPP_TARGET_OPENCL_VERSION 200
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
#include <CL/cl2.hpp>
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace paddle {
namespace lite {
static void CopyImageData(const CLImage& cl_image, float* out) {
int width = cl_image.image_dims()[0];
int height = cl_image.image_dims()[1];
half_t* image_data = new half_t[height * width * 4];
cl::Image* image = cl_image.cl_image();
const std::array<size_t, 3> origin{0, 0, 0};
const std::array<size_t, 3> region{static_cast<size_t>(width),
static_cast<size_t>(height), 1};
cl_int err = CLEngine::Global()->command_queue().enqueueReadImage(
*image, CL_TRUE, origin, region, 0, 0, image_data, nullptr, nullptr);
CL_CHECK_ERRORS(err);
auto* converter = cl_image.image_converter();
converter->ImageToNCHW(image_data, out, cl_image.image_dims(),
cl_image.tensor_dims());
delete[] image_data;
}
bool InitOpenCLEngine(std::string cl_path) {
auto* engine = CLEngine::Global();
engine->set_cl_path(cl_path);
return engine->IsInitSuccess();
}
void elementwise_add(CLContext* context, float* in, const DDim& in_dim,
float* bias, const DDim& bias_dim, float* out,
const DDim& out_dim) {
CLHelper helper(context);
helper.AddKernel("elementwise_add", "elementwise_add_kernel.cl");
auto kernel = helper.KernelAt(0);
CLImage in_image;
in_image.set_tensor_data(in, in_dim);
in_image.InitNormalCLImage(helper.OpenCLContext());
VLOG(3) << " --- Inpu image: " << in_image << " --- ";
CLImage bias_image;
bias_image.set_tensor_data(bias, bias_dim);
bias_image.InitNormalCLImage(helper.OpenCLContext());
VLOG(3) << " --- Bias image: " << bias_image << " --- ";
CLImage out_image;
out_image.InitEmptyImage(helper.OpenCLContext(), out_dim);
cl_int status;
status = kernel.setArg(0, *in_image.cl_image());
CL_CHECK_ERRORS(status);
status = kernel.setArg(1, *bias_image.cl_image());
CL_CHECK_ERRORS(status);
status = kernel.setArg(2, *out_image.cl_image());
CL_CHECK_ERRORS(status);
size_t width = in_image.ImageWidth();
size_t height = in_image.ImageHeight();
auto global_work_size = cl::NDRange{width, height};
status = helper.OpenCLCommandQueue().enqueueNDRangeKernel(
kernel, cl::NullRange, global_work_size, cl::NullRange, nullptr, nullptr);
CL_CHECK_ERRORS(status);
VLOG(3) << " --- Out image: " << out_image << " --- ";
CopyImageData(out_image, out);
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
namespace paddle {
namespace lite {
bool InitOpenCLEngine(std::string cl_path);
void elementwise_add(CLContext* context, float* in, const DDim& in_dim,
float* bias, const DDim& bias_dim, float* out,
const DDim& out_dim);
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <memory>
#include <string>
#include <utility>
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace paddle {
namespace lite {
cl::CommandQueue &CLContext::GetCommandQueue() {
return CLEngine::Global()->command_queue();
}
cl::Context &CLContext::GetContext() { return CLEngine::Global()->context(); }
cl::Program &CLContext::GetProgram(const std::string &file_name,
const std::string &options) {
std::string program_key = file_name;
if (!options.empty()) {
program_key += options;
}
auto it = programs_.find(program_key);
if (it != programs_.end()) {
VLOG(3) << " --- program -> " << program_key << " has been built --- ";
return *(it->second);
}
auto program = CLEngine::Global()->CreateProgram(
GetContext(), CLEngine::Global()->cl_path() + "/cl_kernel/" + file_name);
VLOG(3) << " --- begin build program -> " << program_key << " --- ";
CLEngine::Global()->BuildProgram(program.get(), options);
VLOG(3) << " --- end build program -> " << program_key << " --- ";
programs_[program_key] = std::move(program);
return *(programs_[program_key]);
}
std::unique_ptr<cl::Kernel> CLContext::GetKernel(const std::string &kernel_name,
const std::string &file_name,
const std::string &options) {
cl_int status{CL_SUCCESS};
VLOG(3) << " --- to get program " << file_name << " --- ";
auto program = GetProgram(file_name, options);
VLOG(3) << " --- end get program --- ";
VLOG(3) << " --- to create kernel: " << kernel_name << " --- ";
std::unique_ptr<cl::Kernel> kernel(
new cl::Kernel(program, kernel_name.c_str(), &status));
CL_CHECK_ERRORS(status);
VLOG(3) << " --- end create kernel --- ";
return std::move(kernel);
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "paddle/fluid/lite/opencl/cl2_header.h"
namespace paddle {
namespace lite {
class CLContext {
public:
cl::CommandQueue &GetCommandQueue();
cl::Context &GetContext();
cl::Program &GetProgram(const std::string &file_name,
const std::string &options);
std::unique_ptr<cl::Kernel> GetKernel(const std::string &kernel_name,
const std::string &file_name,
const std::string &options);
private:
std::unordered_map<std::string, std::unique_ptr<cl::Program>> programs_;
};
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include <glog/logging.h>
#include <string>
#include <utility>
#include <vector>
namespace paddle {
namespace lite {
CLEngine* CLEngine::Global() {
static CLEngine cl_engine_;
cl_engine_.Init();
return &cl_engine_;
}
CLEngine::~CLEngine() {
if (command_queue_ != nullptr) {
command_queue_->finish();
}
// For controlling the destruction order:
command_queue_.reset();
context_.reset();
device_.reset();
platform_.reset();
}
bool CLEngine::Init() {
if (initialized_) {
return true;
}
bool is_platform_init = InitializePlatform();
bool is_device_init = InitializeDevice();
is_init_success_ = is_platform_init && is_device_init;
initialized_ = true;
return initialized_;
}
cl::Platform& CLEngine::platform() {
CHECK(platform_ != nullptr) << "platform_ is not initialized!";
return *platform_;
}
cl::Context& CLEngine::context() {
if (context_ == nullptr) {
context_ = CreateContext();
}
return *context_;
}
cl::Device& CLEngine::device() {
CHECK(device_ != nullptr) << "device_ is not initialized!";
return *device_;
}
cl::CommandQueue& CLEngine::command_queue() {
if (command_queue_ == nullptr) {
command_queue_ = CreateCommandQueue(context());
}
return *command_queue_;
}
std::unique_ptr<cl::Program> CLEngine::CreateProgram(const cl::Context& context,
std::string file_name) {
std::ifstream file{file_name, std::ios::binary | std::ios::ate};
CHECK(file.is_open()) << "Can't open file from " << file_name;
auto size = file.tellg();
CHECK(size > 0) << "size is too small.";
std::string content(size, '\0');
file.seekg(0);
file.read(&content[0], size);
cl::Program::Sources sources;
sources.push_back(content);
auto prog =
std::unique_ptr<cl::Program>(new cl::Program(context, sources, &status_));
LOG(INFO) << "OpenCL kernel file name: " << file_name;
LOG(INFO) << "Program source size: " << content.size();
CL_CHECK_ERRORS(status_);
return std::move(prog);
}
std::unique_ptr<cl::UserEvent> CLEngine::CreateEvent(
const cl::Context& context) {
auto event =
std::unique_ptr<cl::UserEvent>(new cl::UserEvent(context, &status_));
CL_CHECK_ERRORS(status_);
return std::move(event);
}
bool CLEngine::BuildProgram(cl::Program* program, const std::string& options) {
std::string build_option = options + " -cl-fast-relaxed-math -I " +
CLEngine::Global()->cl_path() + "/cl_kernel";
status_ = program->build({*device_}, build_option.c_str());
CL_CHECK_ERRORS(status_);
if (status_ != CL_SUCCESS) {
if (program->getBuildInfo<CL_PROGRAM_BUILD_STATUS>(device()) ==
CL_BUILD_ERROR) {
std::string log = program->getBuildInfo<CL_PROGRAM_BUILD_LOG>(device());
LOG(INFO) << "Program build error: " << log;
}
return false;
}
return true;
}
bool CLEngine::InitializePlatform() {
std::vector<cl::Platform> all_platforms;
status_ = cl::Platform::get(&all_platforms);
CL_CHECK_ERRORS(status_);
if (all_platforms.empty()) {
LOG(ERROR) << "No OpenCL platform found!";
return false;
}
platform_ = std::make_shared<cl::Platform>();
*platform_ = all_platforms[0];
return true;
}
bool CLEngine::InitializeDevice() {
std::vector<cl::Device> all_devices;
status_ = platform_->getDevices(CL_DEVICE_TYPE_GPU, &all_devices);
CL_CHECK_ERRORS(status_);
if (all_devices.empty()) {
LOG(ERROR) << "No OpenCL GPU device found!";
return false;
}
device_ = std::make_shared<cl::Device>();
*device_ = all_devices[0];
auto device_name = device_->getInfo<CL_DEVICE_NAME>();
LOG(INFO) << "Using device: " << device_name;
auto image_support = device_->getInfo<CL_DEVICE_IMAGE_SUPPORT>();
if (image_support) {
LOG(INFO) << "The chosen device supports image processing.";
} else {
LOG(ERROR) << "The chosen device doesn't support image processing!";
return false;
}
auto ext_data = device_->getInfo<CL_DEVICE_EXTENSIONS>();
LOG(INFO) << "The extensions supported by this device: " << ext_data;
if (ext_data.find("cl_khr_fp16") != std::string::npos) {
LOG(INFO) << "The chosen device supports the half data type.";
} else {
LOG(ERROR) << "The chosen device doesn't support the half data type!";
return false;
}
auto max_units = device_->getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
LOG(INFO) << "The chosen device has " << max_units << " compute units.";
auto local_mem = device_->getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
LOG(INFO) << "The local memory size of the chosen device is "
<< static_cast<float>(local_mem) / 1024 << " KB.";
return true;
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <fstream>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace paddle {
namespace lite {
class CLEngine {
public:
static CLEngine* Global();
bool Init();
cl::Platform& platform();
cl::Context& context();
cl::Device& device();
cl::CommandQueue& command_queue();
std::unique_ptr<cl::Program> CreateProgram(const cl::Context& context,
std::string file_name);
std::unique_ptr<cl::UserEvent> CreateEvent(const cl::Context& context);
bool BuildProgram(cl::Program* program, const std::string& options = "");
bool IsInitSuccess() { return is_init_success_; }
std::string cl_path() { return cl_path_; }
void set_cl_path(std::string cl_path) { cl_path_ = cl_path; }
private:
CLEngine() = default;
~CLEngine();
bool InitializePlatform();
bool InitializeDevice();
std::shared_ptr<cl::Context> CreateContext() {
auto context = std::make_shared<cl::Context>(
std::vector<cl::Device>{device()}, nullptr, nullptr, nullptr, &status_);
CL_CHECK_ERRORS(status_);
return context;
}
std::shared_ptr<cl::CommandQueue> CreateCommandQueue(
const cl::Context& context) {
auto queue =
std::make_shared<cl::CommandQueue>(context, device(), 0, &status_);
CL_CHECK_ERRORS(status_);
return queue;
}
std::string cl_path_;
std::shared_ptr<cl::Platform> platform_{nullptr};
std::shared_ptr<cl::Context> context_{nullptr};
std::shared_ptr<cl::Device> device_{nullptr};
std::shared_ptr<cl::CommandQueue> command_queue_{nullptr};
cl_int status_{CL_SUCCESS};
bool initialized_{false};
bool is_init_success_{false};
};
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
// ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf
#include "paddle/fluid/lite/opencl/cl_half.h"
namespace paddle {
namespace lite {
static const uint32_t mantissatable[2048] = {
0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34a00000,
0x34c00000, 0x34e00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000,
0x35400000, 0x35500000, 0x35600000, 0x35700000, 0x35800000, 0x35880000,
0x35900000, 0x35980000, 0x35a00000, 0x35a80000, 0x35b00000, 0x35b80000,
0x35c00000, 0x35c80000, 0x35d00000, 0x35d80000, 0x35e00000, 0x35e80000,
0x35f00000, 0x35f80000, 0x36000000, 0x36040000, 0x36080000, 0x360c0000,
0x36100000, 0x36140000, 0x36180000, 0x361c0000, 0x36200000, 0x36240000,
0x36280000, 0x362c0000, 0x36300000, 0x36340000, 0x36380000, 0x363c0000,
0x36400000, 0x36440000, 0x36480000, 0x364c0000, 0x36500000, 0x36540000,
0x36580000, 0x365c0000, 0x36600000, 0x36640000, 0x36680000, 0x366c0000,
0x36700000, 0x36740000, 0x36780000, 0x367c0000, 0x36800000, 0x36820000,
0x36840000, 0x36860000, 0x36880000, 0x368a0000, 0x368c0000, 0x368e0000,
0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369a0000,
0x369c0000, 0x369e0000, 0x36a00000, 0x36a20000, 0x36a40000, 0x36a60000,
0x36a80000, 0x36aa0000, 0x36ac0000, 0x36ae0000, 0x36b00000, 0x36b20000,
0x36b40000, 0x36b60000, 0x36b80000, 0x36ba0000, 0x36bc0000, 0x36be0000,
0x36c00000, 0x36c20000, 0x36c40000, 0x36c60000, 0x36c80000, 0x36ca0000,
0x36cc0000, 0x36ce0000, 0x36d00000, 0x36d20000, 0x36d40000, 0x36d60000,
0x36d80000, 0x36da0000, 0x36dc0000, 0x36de0000, 0x36e00000, 0x36e20000,
0x36e40000, 0x36e60000, 0x36e80000, 0x36ea0000, 0x36ec0000, 0x36ee0000,
0x36f00000, 0x36f20000, 0x36f40000, 0x36f60000, 0x36f80000, 0x36fa0000,
0x36fc0000, 0x36fe0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000,
0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000,
0x370a0000, 0x370b0000, 0x370c0000, 0x370d0000, 0x370e0000, 0x370f0000,
0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000,
0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371a0000, 0x371b0000,
0x371c0000, 0x371d0000, 0x371e0000, 0x371f0000, 0x37200000, 0x37210000,
0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000,
0x37280000, 0x37290000, 0x372a0000, 0x372b0000, 0x372c0000, 0x372d0000,
0x372e0000, 0x372f0000, 0x37300000, 0x37310000, 0x37320000, 0x37330000,
0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000,
0x373a0000, 0x373b0000, 0x373c0000, 0x373d0000, 0x373e0000, 0x373f0000,
0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000,
0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374a0000, 0x374b0000,
0x374c0000, 0x374d0000, 0x374e0000, 0x374f0000, 0x37500000, 0x37510000,
0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000,
0x37580000, 0x37590000, 0x375a0000, 0x375b0000, 0x375c0000, 0x375d0000,
0x375e0000, 0x375f0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000,
0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000,
0x376a0000, 0x376b0000, 0x376c0000, 0x376d0000, 0x376e0000, 0x376f0000,
0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000,
0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377a0000, 0x377b0000,
0x377c0000, 0x377d0000, 0x377e0000, 0x377f0000, 0x37800000, 0x37808000,
0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000,
0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000,
0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000,
0x378a0000, 0x378a8000, 0x378b0000, 0x378b8000, 0x378c0000, 0x378c8000,
0x378d0000, 0x378d8000, 0x378e0000, 0x378e8000, 0x378f0000, 0x378f8000,
0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000,
0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000,
0x37960000, 0x37968000, 0x37970000, 0x37978000, 0x37980000, 0x37988000,
0x37990000, 0x37998000, 0x379a0000, 0x379a8000, 0x379b0000, 0x379b8000,
0x379c0000, 0x379c8000, 0x379d0000, 0x379d8000, 0x379e0000, 0x379e8000,
0x379f0000, 0x379f8000, 0x37a00000, 0x37a08000, 0x37a10000, 0x37a18000,
0x37a20000, 0x37a28000, 0x37a30000, 0x37a38000, 0x37a40000, 0x37a48000,
0x37a50000, 0x37a58000, 0x37a60000, 0x37a68000, 0x37a70000, 0x37a78000,
0x37a80000, 0x37a88000, 0x37a90000, 0x37a98000, 0x37aa0000, 0x37aa8000,
0x37ab0000, 0x37ab8000, 0x37ac0000, 0x37ac8000, 0x37ad0000, 0x37ad8000,
0x37ae0000, 0x37ae8000, 0x37af0000, 0x37af8000, 0x37b00000, 0x37b08000,
0x37b10000, 0x37b18000, 0x37b20000, 0x37b28000, 0x37b30000, 0x37b38000,
0x37b40000, 0x37b48000, 0x37b50000, 0x37b58000, 0x37b60000, 0x37b68000,
0x37b70000, 0x37b78000, 0x37b80000, 0x37b88000, 0x37b90000, 0x37b98000,
0x37ba0000, 0x37ba8000, 0x37bb0000, 0x37bb8000, 0x37bc0000, 0x37bc8000,
0x37bd0000, 0x37bd8000, 0x37be0000, 0x37be8000, 0x37bf0000, 0x37bf8000,
0x37c00000, 0x37c08000, 0x37c10000, 0x37c18000, 0x37c20000, 0x37c28000,
0x37c30000, 0x37c38000, 0x37c40000, 0x37c48000, 0x37c50000, 0x37c58000,
0x37c60000, 0x37c68000, 0x37c70000, 0x37c78000, 0x37c80000, 0x37c88000,
0x37c90000, 0x37c98000, 0x37ca0000, 0x37ca8000, 0x37cb0000, 0x37cb8000,
0x37cc0000, 0x37cc8000, 0x37cd0000, 0x37cd8000, 0x37ce0000, 0x37ce8000,
0x37cf0000, 0x37cf8000, 0x37d00000, 0x37d08000, 0x37d10000, 0x37d18000,
0x37d20000, 0x37d28000, 0x37d30000, 0x37d38000, 0x37d40000, 0x37d48000,
0x37d50000, 0x37d58000, 0x37d60000, 0x37d68000, 0x37d70000, 0x37d78000,
0x37d80000, 0x37d88000, 0x37d90000, 0x37d98000, 0x37da0000, 0x37da8000,
0x37db0000, 0x37db8000, 0x37dc0000, 0x37dc8000, 0x37dd0000, 0x37dd8000,
0x37de0000, 0x37de8000, 0x37df0000, 0x37df8000, 0x37e00000, 0x37e08000,
0x37e10000, 0x37e18000, 0x37e20000, 0x37e28000, 0x37e30000, 0x37e38000,
0x37e40000, 0x37e48000, 0x37e50000, 0x37e58000, 0x37e60000, 0x37e68000,
0x37e70000, 0x37e78000, 0x37e80000, 0x37e88000, 0x37e90000, 0x37e98000,
0x37ea0000, 0x37ea8000, 0x37eb0000, 0x37eb8000, 0x37ec0000, 0x37ec8000,
0x37ed0000, 0x37ed8000, 0x37ee0000, 0x37ee8000, 0x37ef0000, 0x37ef8000,
0x37f00000, 0x37f08000, 0x37f10000, 0x37f18000, 0x37f20000, 0x37f28000,
0x37f30000, 0x37f38000, 0x37f40000, 0x37f48000, 0x37f50000, 0x37f58000,
0x37f60000, 0x37f68000, 0x37f70000, 0x37f78000, 0x37f80000, 0x37f88000,
0x37f90000, 0x37f98000, 0x37fa0000, 0x37fa8000, 0x37fb0000, 0x37fb8000,
0x37fc0000, 0x37fc8000, 0x37fd0000, 0x37fd8000, 0x37fe0000, 0x37fe8000,
0x37ff0000, 0x37ff8000, 0x38000000, 0x38004000, 0x38008000, 0x3800c000,
0x38010000, 0x38014000, 0x38018000, 0x3801c000, 0x38020000, 0x38024000,
0x38028000, 0x3802c000, 0x38030000, 0x38034000, 0x38038000, 0x3803c000,
0x38040000, 0x38044000, 0x38048000, 0x3804c000, 0x38050000, 0x38054000,
0x38058000, 0x3805c000, 0x38060000, 0x38064000, 0x38068000, 0x3806c000,
0x38070000, 0x38074000, 0x38078000, 0x3807c000, 0x38080000, 0x38084000,
0x38088000, 0x3808c000, 0x38090000, 0x38094000, 0x38098000, 0x3809c000,
0x380a0000, 0x380a4000, 0x380a8000, 0x380ac000, 0x380b0000, 0x380b4000,
0x380b8000, 0x380bc000, 0x380c0000, 0x380c4000, 0x380c8000, 0x380cc000,
0x380d0000, 0x380d4000, 0x380d8000, 0x380dc000, 0x380e0000, 0x380e4000,
0x380e8000, 0x380ec000, 0x380f0000, 0x380f4000, 0x380f8000, 0x380fc000,
0x38100000, 0x38104000, 0x38108000, 0x3810c000, 0x38110000, 0x38114000,
0x38118000, 0x3811c000, 0x38120000, 0x38124000, 0x38128000, 0x3812c000,
0x38130000, 0x38134000, 0x38138000, 0x3813c000, 0x38140000, 0x38144000,
0x38148000, 0x3814c000, 0x38150000, 0x38154000, 0x38158000, 0x3815c000,
0x38160000, 0x38164000, 0x38168000, 0x3816c000, 0x38170000, 0x38174000,
0x38178000, 0x3817c000, 0x38180000, 0x38184000, 0x38188000, 0x3818c000,
0x38190000, 0x38194000, 0x38198000, 0x3819c000, 0x381a0000, 0x381a4000,
0x381a8000, 0x381ac000, 0x381b0000, 0x381b4000, 0x381b8000, 0x381bc000,
0x381c0000, 0x381c4000, 0x381c8000, 0x381cc000, 0x381d0000, 0x381d4000,
0x381d8000, 0x381dc000, 0x381e0000, 0x381e4000, 0x381e8000, 0x381ec000,
0x381f0000, 0x381f4000, 0x381f8000, 0x381fc000, 0x38200000, 0x38204000,
0x38208000, 0x3820c000, 0x38210000, 0x38214000, 0x38218000, 0x3821c000,
0x38220000, 0x38224000, 0x38228000, 0x3822c000, 0x38230000, 0x38234000,
0x38238000, 0x3823c000, 0x38240000, 0x38244000, 0x38248000, 0x3824c000,
0x38250000, 0x38254000, 0x38258000, 0x3825c000, 0x38260000, 0x38264000,
0x38268000, 0x3826c000, 0x38270000, 0x38274000, 0x38278000, 0x3827c000,
0x38280000, 0x38284000, 0x38288000, 0x3828c000, 0x38290000, 0x38294000,
0x38298000, 0x3829c000, 0x382a0000, 0x382a4000, 0x382a8000, 0x382ac000,
0x382b0000, 0x382b4000, 0x382b8000, 0x382bc000, 0x382c0000, 0x382c4000,
0x382c8000, 0x382cc000, 0x382d0000, 0x382d4000, 0x382d8000, 0x382dc000,
0x382e0000, 0x382e4000, 0x382e8000, 0x382ec000, 0x382f0000, 0x382f4000,
0x382f8000, 0x382fc000, 0x38300000, 0x38304000, 0x38308000, 0x3830c000,
0x38310000, 0x38314000, 0x38318000, 0x3831c000, 0x38320000, 0x38324000,
0x38328000, 0x3832c000, 0x38330000, 0x38334000, 0x38338000, 0x3833c000,
0x38340000, 0x38344000, 0x38348000, 0x3834c000, 0x38350000, 0x38354000,
0x38358000, 0x3835c000, 0x38360000, 0x38364000, 0x38368000, 0x3836c000,
0x38370000, 0x38374000, 0x38378000, 0x3837c000, 0x38380000, 0x38384000,
0x38388000, 0x3838c000, 0x38390000, 0x38394000, 0x38398000, 0x3839c000,
0x383a0000, 0x383a4000, 0x383a8000, 0x383ac000, 0x383b0000, 0x383b4000,
0x383b8000, 0x383bc000, 0x383c0000, 0x383c4000, 0x383c8000, 0x383cc000,
0x383d0000, 0x383d4000, 0x383d8000, 0x383dc000, 0x383e0000, 0x383e4000,
0x383e8000, 0x383ec000, 0x383f0000, 0x383f4000, 0x383f8000, 0x383fc000,
0x38400000, 0x38404000, 0x38408000, 0x3840c000, 0x38410000, 0x38414000,
0x38418000, 0x3841c000, 0x38420000, 0x38424000, 0x38428000, 0x3842c000,
0x38430000, 0x38434000, 0x38438000, 0x3843c000, 0x38440000, 0x38444000,
0x38448000, 0x3844c000, 0x38450000, 0x38454000, 0x38458000, 0x3845c000,
0x38460000, 0x38464000, 0x38468000, 0x3846c000, 0x38470000, 0x38474000,
0x38478000, 0x3847c000, 0x38480000, 0x38484000, 0x38488000, 0x3848c000,
0x38490000, 0x38494000, 0x38498000, 0x3849c000, 0x384a0000, 0x384a4000,
0x384a8000, 0x384ac000, 0x384b0000, 0x384b4000, 0x384b8000, 0x384bc000,
0x384c0000, 0x384c4000, 0x384c8000, 0x384cc000, 0x384d0000, 0x384d4000,
0x384d8000, 0x384dc000, 0x384e0000, 0x384e4000, 0x384e8000, 0x384ec000,
0x384f0000, 0x384f4000, 0x384f8000, 0x384fc000, 0x38500000, 0x38504000,
0x38508000, 0x3850c000, 0x38510000, 0x38514000, 0x38518000, 0x3851c000,
0x38520000, 0x38524000, 0x38528000, 0x3852c000, 0x38530000, 0x38534000,
0x38538000, 0x3853c000, 0x38540000, 0x38544000, 0x38548000, 0x3854c000,
0x38550000, 0x38554000, 0x38558000, 0x3855c000, 0x38560000, 0x38564000,
0x38568000, 0x3856c000, 0x38570000, 0x38574000, 0x38578000, 0x3857c000,
0x38580000, 0x38584000, 0x38588000, 0x3858c000, 0x38590000, 0x38594000,
0x38598000, 0x3859c000, 0x385a0000, 0x385a4000, 0x385a8000, 0x385ac000,
0x385b0000, 0x385b4000, 0x385b8000, 0x385bc000, 0x385c0000, 0x385c4000,
0x385c8000, 0x385cc000, 0x385d0000, 0x385d4000, 0x385d8000, 0x385dc000,
0x385e0000, 0x385e4000, 0x385e8000, 0x385ec000, 0x385f0000, 0x385f4000,
0x385f8000, 0x385fc000, 0x38600000, 0x38604000, 0x38608000, 0x3860c000,
0x38610000, 0x38614000, 0x38618000, 0x3861c000, 0x38620000, 0x38624000,
0x38628000, 0x3862c000, 0x38630000, 0x38634000, 0x38638000, 0x3863c000,
0x38640000, 0x38644000, 0x38648000, 0x3864c000, 0x38650000, 0x38654000,
0x38658000, 0x3865c000, 0x38660000, 0x38664000, 0x38668000, 0x3866c000,
0x38670000, 0x38674000, 0x38678000, 0x3867c000, 0x38680000, 0x38684000,
0x38688000, 0x3868c000, 0x38690000, 0x38694000, 0x38698000, 0x3869c000,
0x386a0000, 0x386a4000, 0x386a8000, 0x386ac000, 0x386b0000, 0x386b4000,
0x386b8000, 0x386bc000, 0x386c0000, 0x386c4000, 0x386c8000, 0x386cc000,
0x386d0000, 0x386d4000, 0x386d8000, 0x386dc000, 0x386e0000, 0x386e4000,
0x386e8000, 0x386ec000, 0x386f0000, 0x386f4000, 0x386f8000, 0x386fc000,
0x38700000, 0x38704000, 0x38708000, 0x3870c000, 0x38710000, 0x38714000,
0x38718000, 0x3871c000, 0x38720000, 0x38724000, 0x38728000, 0x3872c000,
0x38730000, 0x38734000, 0x38738000, 0x3873c000, 0x38740000, 0x38744000,
0x38748000, 0x3874c000, 0x38750000, 0x38754000, 0x38758000, 0x3875c000,
0x38760000, 0x38764000, 0x38768000, 0x3876c000, 0x38770000, 0x38774000,
0x38778000, 0x3877c000, 0x38780000, 0x38784000, 0x38788000, 0x3878c000,
0x38790000, 0x38794000, 0x38798000, 0x3879c000, 0x387a0000, 0x387a4000,
0x387a8000, 0x387ac000, 0x387b0000, 0x387b4000, 0x387b8000, 0x387bc000,
0x387c0000, 0x387c4000, 0x387c8000, 0x387cc000, 0x387d0000, 0x387d4000,
0x387d8000, 0x387dc000, 0x387e0000, 0x387e4000, 0x387e8000, 0x387ec000,
0x387f0000, 0x387f4000, 0x387f8000, 0x387fc000, 0x38000000, 0x38002000,
0x38004000, 0x38006000, 0x38008000, 0x3800a000, 0x3800c000, 0x3800e000,
0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801a000,
0x3801c000, 0x3801e000, 0x38020000, 0x38022000, 0x38024000, 0x38026000,
0x38028000, 0x3802a000, 0x3802c000, 0x3802e000, 0x38030000, 0x38032000,
0x38034000, 0x38036000, 0x38038000, 0x3803a000, 0x3803c000, 0x3803e000,
0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804a000,
0x3804c000, 0x3804e000, 0x38050000, 0x38052000, 0x38054000, 0x38056000,
0x38058000, 0x3805a000, 0x3805c000, 0x3805e000, 0x38060000, 0x38062000,
0x38064000, 0x38066000, 0x38068000, 0x3806a000, 0x3806c000, 0x3806e000,
0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807a000,
0x3807c000, 0x3807e000, 0x38080000, 0x38082000, 0x38084000, 0x38086000,
0x38088000, 0x3808a000, 0x3808c000, 0x3808e000, 0x38090000, 0x38092000,
0x38094000, 0x38096000, 0x38098000, 0x3809a000, 0x3809c000, 0x3809e000,
0x380a0000, 0x380a2000, 0x380a4000, 0x380a6000, 0x380a8000, 0x380aa000,
0x380ac000, 0x380ae000, 0x380b0000, 0x380b2000, 0x380b4000, 0x380b6000,
0x380b8000, 0x380ba000, 0x380bc000, 0x380be000, 0x380c0000, 0x380c2000,
0x380c4000, 0x380c6000, 0x380c8000, 0x380ca000, 0x380cc000, 0x380ce000,
0x380d0000, 0x380d2000, 0x380d4000, 0x380d6000, 0x380d8000, 0x380da000,
0x380dc000, 0x380de000, 0x380e0000, 0x380e2000, 0x380e4000, 0x380e6000,
0x380e8000, 0x380ea000, 0x380ec000, 0x380ee000, 0x380f0000, 0x380f2000,
0x380f4000, 0x380f6000, 0x380f8000, 0x380fa000, 0x380fc000, 0x380fe000,
0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810a000,
0x3810c000, 0x3810e000, 0x38110000, 0x38112000, 0x38114000, 0x38116000,
0x38118000, 0x3811a000, 0x3811c000, 0x3811e000, 0x38120000, 0x38122000,
0x38124000, 0x38126000, 0x38128000, 0x3812a000, 0x3812c000, 0x3812e000,
0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813a000,
0x3813c000, 0x3813e000, 0x38140000, 0x38142000, 0x38144000, 0x38146000,
0x38148000, 0x3814a000, 0x3814c000, 0x3814e000, 0x38150000, 0x38152000,
0x38154000, 0x38156000, 0x38158000, 0x3815a000, 0x3815c000, 0x3815e000,
0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816a000,
0x3816c000, 0x3816e000, 0x38170000, 0x38172000, 0x38174000, 0x38176000,
0x38178000, 0x3817a000, 0x3817c000, 0x3817e000, 0x38180000, 0x38182000,
0x38184000, 0x38186000, 0x38188000, 0x3818a000, 0x3818c000, 0x3818e000,
0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819a000,
0x3819c000, 0x3819e000, 0x381a0000, 0x381a2000, 0x381a4000, 0x381a6000,
0x381a8000, 0x381aa000, 0x381ac000, 0x381ae000, 0x381b0000, 0x381b2000,
0x381b4000, 0x381b6000, 0x381b8000, 0x381ba000, 0x381bc000, 0x381be000,
0x381c0000, 0x381c2000, 0x381c4000, 0x381c6000, 0x381c8000, 0x381ca000,
0x381cc000, 0x381ce000, 0x381d0000, 0x381d2000, 0x381d4000, 0x381d6000,
0x381d8000, 0x381da000, 0x381dc000, 0x381de000, 0x381e0000, 0x381e2000,
0x381e4000, 0x381e6000, 0x381e8000, 0x381ea000, 0x381ec000, 0x381ee000,
0x381f0000, 0x381f2000, 0x381f4000, 0x381f6000, 0x381f8000, 0x381fa000,
0x381fc000, 0x381fe000, 0x38200000, 0x38202000, 0x38204000, 0x38206000,
0x38208000, 0x3820a000, 0x3820c000, 0x3820e000, 0x38210000, 0x38212000,
0x38214000, 0x38216000, 0x38218000, 0x3821a000, 0x3821c000, 0x3821e000,
0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822a000,
0x3822c000, 0x3822e000, 0x38230000, 0x38232000, 0x38234000, 0x38236000,
0x38238000, 0x3823a000, 0x3823c000, 0x3823e000, 0x38240000, 0x38242000,
0x38244000, 0x38246000, 0x38248000, 0x3824a000, 0x3824c000, 0x3824e000,
0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825a000,
0x3825c000, 0x3825e000, 0x38260000, 0x38262000, 0x38264000, 0x38266000,
0x38268000, 0x3826a000, 0x3826c000, 0x3826e000, 0x38270000, 0x38272000,
0x38274000, 0x38276000, 0x38278000, 0x3827a000, 0x3827c000, 0x3827e000,
0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828a000,
0x3828c000, 0x3828e000, 0x38290000, 0x38292000, 0x38294000, 0x38296000,
0x38298000, 0x3829a000, 0x3829c000, 0x3829e000, 0x382a0000, 0x382a2000,
0x382a4000, 0x382a6000, 0x382a8000, 0x382aa000, 0x382ac000, 0x382ae000,
0x382b0000, 0x382b2000, 0x382b4000, 0x382b6000, 0x382b8000, 0x382ba000,
0x382bc000, 0x382be000, 0x382c0000, 0x382c2000, 0x382c4000, 0x382c6000,
0x382c8000, 0x382ca000, 0x382cc000, 0x382ce000, 0x382d0000, 0x382d2000,
0x382d4000, 0x382d6000, 0x382d8000, 0x382da000, 0x382dc000, 0x382de000,
0x382e0000, 0x382e2000, 0x382e4000, 0x382e6000, 0x382e8000, 0x382ea000,
0x382ec000, 0x382ee000, 0x382f0000, 0x382f2000, 0x382f4000, 0x382f6000,
0x382f8000, 0x382fa000, 0x382fc000, 0x382fe000, 0x38300000, 0x38302000,
0x38304000, 0x38306000, 0x38308000, 0x3830a000, 0x3830c000, 0x3830e000,
0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831a000,
0x3831c000, 0x3831e000, 0x38320000, 0x38322000, 0x38324000, 0x38326000,
0x38328000, 0x3832a000, 0x3832c000, 0x3832e000, 0x38330000, 0x38332000,
0x38334000, 0x38336000, 0x38338000, 0x3833a000, 0x3833c000, 0x3833e000,
0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834a000,
0x3834c000, 0x3834e000, 0x38350000, 0x38352000, 0x38354000, 0x38356000,
0x38358000, 0x3835a000, 0x3835c000, 0x3835e000, 0x38360000, 0x38362000,
0x38364000, 0x38366000, 0x38368000, 0x3836a000, 0x3836c000, 0x3836e000,
0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837a000,
0x3837c000, 0x3837e000, 0x38380000, 0x38382000, 0x38384000, 0x38386000,
0x38388000, 0x3838a000, 0x3838c000, 0x3838e000, 0x38390000, 0x38392000,
0x38394000, 0x38396000, 0x38398000, 0x3839a000, 0x3839c000, 0x3839e000,
0x383a0000, 0x383a2000, 0x383a4000, 0x383a6000, 0x383a8000, 0x383aa000,
0x383ac000, 0x383ae000, 0x383b0000, 0x383b2000, 0x383b4000, 0x383b6000,
0x383b8000, 0x383ba000, 0x383bc000, 0x383be000, 0x383c0000, 0x383c2000,
0x383c4000, 0x383c6000, 0x383c8000, 0x383ca000, 0x383cc000, 0x383ce000,
0x383d0000, 0x383d2000, 0x383d4000, 0x383d6000, 0x383d8000, 0x383da000,
0x383dc000, 0x383de000, 0x383e0000, 0x383e2000, 0x383e4000, 0x383e6000,
0x383e8000, 0x383ea000, 0x383ec000, 0x383ee000, 0x383f0000, 0x383f2000,
0x383f4000, 0x383f6000, 0x383f8000, 0x383fa000, 0x383fc000, 0x383fe000,
0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840a000,
0x3840c000, 0x3840e000, 0x38410000, 0x38412000, 0x38414000, 0x38416000,
0x38418000, 0x3841a000, 0x3841c000, 0x3841e000, 0x38420000, 0x38422000,
0x38424000, 0x38426000, 0x38428000, 0x3842a000, 0x3842c000, 0x3842e000,
0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843a000,
0x3843c000, 0x3843e000, 0x38440000, 0x38442000, 0x38444000, 0x38446000,
0x38448000, 0x3844a000, 0x3844c000, 0x3844e000, 0x38450000, 0x38452000,
0x38454000, 0x38456000, 0x38458000, 0x3845a000, 0x3845c000, 0x3845e000,
0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846a000,
0x3846c000, 0x3846e000, 0x38470000, 0x38472000, 0x38474000, 0x38476000,
0x38478000, 0x3847a000, 0x3847c000, 0x3847e000, 0x38480000, 0x38482000,
0x38484000, 0x38486000, 0x38488000, 0x3848a000, 0x3848c000, 0x3848e000,
0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849a000,
0x3849c000, 0x3849e000, 0x384a0000, 0x384a2000, 0x384a4000, 0x384a6000,
0x384a8000, 0x384aa000, 0x384ac000, 0x384ae000, 0x384b0000, 0x384b2000,
0x384b4000, 0x384b6000, 0x384b8000, 0x384ba000, 0x384bc000, 0x384be000,
0x384c0000, 0x384c2000, 0x384c4000, 0x384c6000, 0x384c8000, 0x384ca000,
0x384cc000, 0x384ce000, 0x384d0000, 0x384d2000, 0x384d4000, 0x384d6000,
0x384d8000, 0x384da000, 0x384dc000, 0x384de000, 0x384e0000, 0x384e2000,
0x384e4000, 0x384e6000, 0x384e8000, 0x384ea000, 0x384ec000, 0x384ee000,
0x384f0000, 0x384f2000, 0x384f4000, 0x384f6000, 0x384f8000, 0x384fa000,
0x384fc000, 0x384fe000, 0x38500000, 0x38502000, 0x38504000, 0x38506000,
0x38508000, 0x3850a000, 0x3850c000, 0x3850e000, 0x38510000, 0x38512000,
0x38514000, 0x38516000, 0x38518000, 0x3851a000, 0x3851c000, 0x3851e000,
0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852a000,
0x3852c000, 0x3852e000, 0x38530000, 0x38532000, 0x38534000, 0x38536000,
0x38538000, 0x3853a000, 0x3853c000, 0x3853e000, 0x38540000, 0x38542000,
0x38544000, 0x38546000, 0x38548000, 0x3854a000, 0x3854c000, 0x3854e000,
0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855a000,
0x3855c000, 0x3855e000, 0x38560000, 0x38562000, 0x38564000, 0x38566000,
0x38568000, 0x3856a000, 0x3856c000, 0x3856e000, 0x38570000, 0x38572000,
0x38574000, 0x38576000, 0x38578000, 0x3857a000, 0x3857c000, 0x3857e000,
0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858a000,
0x3858c000, 0x3858e000, 0x38590000, 0x38592000, 0x38594000, 0x38596000,
0x38598000, 0x3859a000, 0x3859c000, 0x3859e000, 0x385a0000, 0x385a2000,
0x385a4000, 0x385a6000, 0x385a8000, 0x385aa000, 0x385ac000, 0x385ae000,
0x385b0000, 0x385b2000, 0x385b4000, 0x385b6000, 0x385b8000, 0x385ba000,
0x385bc000, 0x385be000, 0x385c0000, 0x385c2000, 0x385c4000, 0x385c6000,
0x385c8000, 0x385ca000, 0x385cc000, 0x385ce000, 0x385d0000, 0x385d2000,
0x385d4000, 0x385d6000, 0x385d8000, 0x385da000, 0x385dc000, 0x385de000,
0x385e0000, 0x385e2000, 0x385e4000, 0x385e6000, 0x385e8000, 0x385ea000,
0x385ec000, 0x385ee000, 0x385f0000, 0x385f2000, 0x385f4000, 0x385f6000,
0x385f8000, 0x385fa000, 0x385fc000, 0x385fe000, 0x38600000, 0x38602000,
0x38604000, 0x38606000, 0x38608000, 0x3860a000, 0x3860c000, 0x3860e000,
0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861a000,
0x3861c000, 0x3861e000, 0x38620000, 0x38622000, 0x38624000, 0x38626000,
0x38628000, 0x3862a000, 0x3862c000, 0x3862e000, 0x38630000, 0x38632000,
0x38634000, 0x38636000, 0x38638000, 0x3863a000, 0x3863c000, 0x3863e000,
0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864a000,
0x3864c000, 0x3864e000, 0x38650000, 0x38652000, 0x38654000, 0x38656000,
0x38658000, 0x3865a000, 0x3865c000, 0x3865e000, 0x38660000, 0x38662000,
0x38664000, 0x38666000, 0x38668000, 0x3866a000, 0x3866c000, 0x3866e000,
0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867a000,
0x3867c000, 0x3867e000, 0x38680000, 0x38682000, 0x38684000, 0x38686000,
0x38688000, 0x3868a000, 0x3868c000, 0x3868e000, 0x38690000, 0x38692000,
0x38694000, 0x38696000, 0x38698000, 0x3869a000, 0x3869c000, 0x3869e000,
0x386a0000, 0x386a2000, 0x386a4000, 0x386a6000, 0x386a8000, 0x386aa000,
0x386ac000, 0x386ae000, 0x386b0000, 0x386b2000, 0x386b4000, 0x386b6000,
0x386b8000, 0x386ba000, 0x386bc000, 0x386be000, 0x386c0000, 0x386c2000,
0x386c4000, 0x386c6000, 0x386c8000, 0x386ca000, 0x386cc000, 0x386ce000,
0x386d0000, 0x386d2000, 0x386d4000, 0x386d6000, 0x386d8000, 0x386da000,
0x386dc000, 0x386de000, 0x386e0000, 0x386e2000, 0x386e4000, 0x386e6000,
0x386e8000, 0x386ea000, 0x386ec000, 0x386ee000, 0x386f0000, 0x386f2000,
0x386f4000, 0x386f6000, 0x386f8000, 0x386fa000, 0x386fc000, 0x386fe000,
0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870a000,
0x3870c000, 0x3870e000, 0x38710000, 0x38712000, 0x38714000, 0x38716000,
0x38718000, 0x3871a000, 0x3871c000, 0x3871e000, 0x38720000, 0x38722000,
0x38724000, 0x38726000, 0x38728000, 0x3872a000, 0x3872c000, 0x3872e000,
0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873a000,
0x3873c000, 0x3873e000, 0x38740000, 0x38742000, 0x38744000, 0x38746000,
0x38748000, 0x3874a000, 0x3874c000, 0x3874e000, 0x38750000, 0x38752000,
0x38754000, 0x38756000, 0x38758000, 0x3875a000, 0x3875c000, 0x3875e000,
0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876a000,
0x3876c000, 0x3876e000, 0x38770000, 0x38772000, 0x38774000, 0x38776000,
0x38778000, 0x3877a000, 0x3877c000, 0x3877e000, 0x38780000, 0x38782000,
0x38784000, 0x38786000, 0x38788000, 0x3878a000, 0x3878c000, 0x3878e000,
0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879a000,
0x3879c000, 0x3879e000, 0x387a0000, 0x387a2000, 0x387a4000, 0x387a6000,
0x387a8000, 0x387aa000, 0x387ac000, 0x387ae000, 0x387b0000, 0x387b2000,
0x387b4000, 0x387b6000, 0x387b8000, 0x387ba000, 0x387bc000, 0x387be000,
0x387c0000, 0x387c2000, 0x387c4000, 0x387c6000, 0x387c8000, 0x387ca000,
0x387cc000, 0x387ce000, 0x387d0000, 0x387d2000, 0x387d4000, 0x387d6000,
0x387d8000, 0x387da000, 0x387dc000, 0x387de000, 0x387e0000, 0x387e2000,
0x387e4000, 0x387e6000, 0x387e8000, 0x387ea000, 0x387ec000, 0x387ee000,
0x387f0000, 0x387f2000, 0x387f4000, 0x387f6000, 0x387f8000, 0x387fa000,
0x387fc000, 0x387fe000};
static const uint16_t offsettable[64] = {
0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0000, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400,
0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400, 0x0400};
static const uint32_t exponenttable[64] = {
0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000,
0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000,
0x06000000, 0x06800000, 0x07000000, 0x07800000, 0x08000000, 0x08800000,
0x09000000, 0x09800000, 0x0a000000, 0x0a800000, 0x0b000000, 0x0b800000,
0x0c000000, 0x0c800000, 0x0d000000, 0x0d800000, 0x0e000000, 0x0e800000,
0x0f000000, 0x47800000, 0x80000000, 0x80800000, 0x81000000, 0x81800000,
0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000,
0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,
0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8a000000, 0x8a800000,
0x8b000000, 0x8b800000, 0x8c000000, 0x8c800000, 0x8d000000, 0x8d800000,
0x8e000000, 0x8e800000, 0x8f000000, 0xc7800000};
static const uint16_t basetable[512] = {
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010,
0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0c00, 0x1000,
0x1400, 0x1800, 0x1c00, 0x2000, 0x2400, 0x2800, 0x2c00, 0x3000, 0x3400,
0x3800, 0x3c00, 0x4000, 0x4400, 0x4800, 0x4c00, 0x5000, 0x5400, 0x5800,
0x5c00, 0x6000, 0x6400, 0x6800, 0x6c00, 0x7000, 0x7400, 0x7800, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x7c00,
0x7c00, 0x7c00, 0x7c00, 0x7c00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001,
0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200,
0x8400, 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00, 0xa000, 0xa400,
0xa800, 0xac00, 0xb000, 0xb400, 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800,
0xcc00, 0xd000, 0xd400, 0xd800, 0xdc00, 0xe000, 0xe400, 0xe800, 0xec00,
0xf000, 0xf400, 0xf800, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00,
0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00, 0xfc00};
static const uint8_t shifttable[512] = {
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17, 0x16, 0x15, 0x14, 0x13,
0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x17,
0x16, 0x15, 0x14, 0x13, 0x12, 0x11, 0x10, 0x0f, 0x0e, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d, 0x0d,
0x0d, 0x0d, 0x0d, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18,
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x0d};
half_t Float2Half(float f) {
uint32_t v = *reinterpret_cast<uint32_t *>(&f);
return basetable[(v >> 23) & 0x1ff] +
((v & 0x007fffff) >> shifttable[(v >> 23) & 0x1ff]);
}
float Half2Float(half_t h) {
uint32_t v = mantissatable[offsettable[h >> 10] + (h & 0x3ff)] +
exponenttable[h >> 10];
return *reinterpret_cast<float *>(&v);
}
void FloatArray2HalfArray(float *f_array, half_t *h_array, int count) {
for (int i = 0; i < count; ++i) {
h_array[i] = Float2Half(f_array[i]);
}
}
void HalfArray2FloatArray(half_t *h_array, float *f_array, int count) {
for (int i = 0; i < count; ++i) {
f_array[i] = Half2Float(h_array[i]);
}
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
namespace paddle {
namespace lite {
typedef uint16_t half_t;
half_t Float2Half(float f);
float Half2Float(half_t h);
void FloatArray2HalfArray(float *f_array, half_t *h_array, int count);
void HalfArray2FloatArray(half_t *h_array, float *f_array, int count);
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include <glog/logging.h>
#include <string>
#include <utility>
#include <vector>
namespace paddle {
namespace lite {
void CLHelper::set_context(CLContext *context) { context_ = context; }
void CLHelper::AddKernel(const std::string &kernel_name,
const std::string &file_name,
const std::string &options) {
CHECK(context_ != nullptr) << "Please use set_context first!";
VLOG(3) << " --- begin to add kernel ---";
auto kernel = context_->GetKernel(kernel_name, file_name, options);
kernels.emplace_back(std::move(kernel));
VLOG(3) << " --- end to add kernel --- ";
}
cl::Kernel &CLHelper::KernelAt(const int index) {
VLOG(3) << " --- kernel count: " << kernels.size() << " --- ";
CHECK(static_cast<size_t>(index) < kernels.size())
<< "The index must be less than the size of kernels.";
CHECK(kernels[index] != nullptr)
<< "The target kernel pointer cannot be null.";
return *(kernels[index]);
}
cl::CommandQueue &CLHelper::OpenCLCommandQueue() {
CHECK(context_ != nullptr) << "Please use set_context first!";
return context_->GetCommandQueue();
}
cl::Context &CLHelper::OpenCLContext() {
CHECK(context_ != nullptr) << "Please use set_context first!";
return context_->GetContext();
}
cl::NDRange CLHelper::DefaultWorkSize(const CLImage &image) {
// n c h w
auto image_dim = image.tensor_dims();
if (image_dim.size() == 4) {
auto n = image_dim[0];
auto h = image_dim[2];
auto w = image_dim[3];
auto image_width = image.ImageWidth();
auto work_size_0 = image_width / w;
auto work_size_1 = w;
auto work_size_2 = n * h;
return cl::NDRange{static_cast<size_t>(work_size_0),
static_cast<size_t>(work_size_1),
static_cast<size_t>(work_size_2)};
} else if (image_dim.size() == 2) {
return cl::NDRange{static_cast<size_t>(1),
static_cast<size_t>(image.ImageWidth()),
static_cast<size_t>(image.ImageHeight())};
} else if (image_dim.size() == 1) {
return cl::NDRange{static_cast<size_t>(1),
static_cast<size_t>(image.ImageWidth()),
static_cast<size_t>(1)};
} else if (image_dim.size() == 3) {
auto c = image_dim[0];
auto h = image_dim[1];
auto w = image_dim[2];
return cl::NDRange{static_cast<size_t>((c + 3) / 4), static_cast<size_t>(w),
static_cast<size_t>(h)};
} else {
LOG(FATAL) << "Not support this dimension, need to be implemented!";
return cl::NDRange{};
}
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
namespace paddle {
namespace lite {
class CLHelper {
public:
CLHelper() = default;
explicit CLHelper(CLContext *context) : context_(context) {}
void set_context(CLContext *context);
void AddKernel(const std::string &kernel_name, const std::string &file_name,
const std::string &options = "");
cl::Kernel &KernelAt(const int index);
cl::CommandQueue &OpenCLCommandQueue();
cl::Context &OpenCLContext();
cl::NDRange DefaultWorkSize(const CLImage &image);
private:
CLContext *context_{nullptr};
std::vector<std::unique_ptr<cl::Kernel>> kernels;
};
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_image.h"
#include <glog/logging.h>
#include <array>
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace paddle {
namespace lite {
std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
int width = cl_image.image_dims_[0];
int height = cl_image.image_dims_[1];
half_t* image_data = new half_t[height * width * 4];
cl::Image* image = cl_image.cl_image();
const std::array<size_t, 3> origin{0, 0, 0};
const std::array<size_t, 3> region{static_cast<size_t>(width),
static_cast<size_t>(height), 1};
cl_int err = CLEngine::Global()->command_queue().enqueueReadImage(
*image, CL_TRUE, origin, region, 0, 0, image_data, nullptr, nullptr);
CL_CHECK_ERRORS(err);
float* tensor_data = new float[cl_image.numel()];
auto* converter = cl_image.image_converter();
converter->ImageToNCHW(image_data, tensor_data, cl_image.image_dims_,
cl_image.tensor_dims_);
int stride = cl_image.numel() / 20;
stride = stride > 0 ? stride : 1;
os << " dims: " << cl_image.tensor_dims_ << "\n";
for (int i = 0; i < cl_image.numel(); i += stride) {
os << tensor_data[i] << " ";
}
delete[] tensor_data;
delete[] image_data;
return os;
}
void CLImage::set_tensor_data(float* tensor_data, const DDim& dim) {
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
auto numel = dim.product();
#else
auto numel = dim.production();
#endif
tensor_data_.reset(new float[numel]);
memcpy(tensor_data_.get(), tensor_data, numel * sizeof(float));
tensor_dims_ = dim;
}
void CLImage::InitCLImage(const cl::Context& context) {
CHECK(tensor_data_ != nullptr) << " Please call "
"set_tensohelper->DefaultWorkSize(out_"
"image)r_data first!";
image_converter_.reset(new CLImageConverterFolder);
InitCLImage(context, image_converter_.get());
}
void CLImage::InitNormalCLImage(const cl::Context& context) {
CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!";
image_converter_.reset(new CLImageConverterNormal);
InitCLImage(context, image_converter_.get());
}
void CLImage::InitNImage(const cl::Context& context) {
CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!";
CHECK(tensor_dims_.size() == 4) << " Tensor dim is not 4.";
image_converter_.reset(new CLImageConverterNWBlock);
InitCLImage(context, image_converter_.get());
}
void CLImage::InitDWImage(const cl::Context& context) {
CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!";
CHECK(tensor_dims_.size() == 4) << " Tensor dim is not 4.";
image_converter_.reset(new CLImageConverterDWBlock);
InitCLImage(context, image_converter_.get());
}
void CLImage::InitEmptyImage(const cl::Context& context, const DDim& dim) {
CHECK(tensor_data_ == nullptr)
<< " Empty image tensor data shouldn't have value";
tensor_dims_ = dim;
image_converter_.reset(new CLImageConverterNormal);
VLOG(3) << " to get image dims ";
image_dims_ = image_converter_->InitImageDimInfoWith(tensor_dims_);
VLOG(3) << " end get image dims " << image_dims_;
InitCLImage(context, image_dims_[0], image_dims_[1], nullptr);
cl_event_ = CLEngine::Global()->CreateEvent(context);
initialized_ = true;
VLOG(3) << " end init cl image ";
}
void CLImage::InitEmptyWithImageDim(const cl::Context& context,
const DDim& image_dims) {
VLOG(3) << " to get image dims ";
image_dims_ = image_dims;
VLOG(3) << " end get image dims " << image_dims_;
InitCLImage(context, image_dims_[0], image_dims_[1], nullptr);
cl_event_ = CLEngine::Global()->CreateEvent(context);
initialized_ = true;
VLOG(3) << " end init cl image";
}
void CLImage::InitCLImage(const cl::Context& context,
CLImageConverterBase* converter) {
CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!";
VLOG(3) << " begin init cl image ";
image_dims_ = converter->InitImageDimInfoWith(tensor_dims_);
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
half_t* image_data = new half_t[image_dims_.product() * 4];
#else
half_t* image_data = new half_t[image_dims_.production() * 4];
#endif
VLOG(3) << " convert to image ";
converter->NCHWToImage(tensor_data_.get(), image_data, tensor_dims_);
VLOG(3) << " end convert to image ";
InitCLImage(context, image_dims_[0], image_dims_[1], image_data);
delete[] image_data;
tensor_data_ = nullptr;
cl_event_ = CLEngine::Global()->CreateEvent(context);
initialized_ = true;
VLOG(3) << " end init cl image ";
}
void CLImage::InitCLImage(const cl::Context& context, int width, int height,
void* data) {
cl::ImageFormat img_format(CL_RGBA, CL_HALF_FLOAT);
cl_int err;
cl_image_.reset(new cl::Image2D(
context, CL_MEM_READ_WRITE | (data ? CL_MEM_COPY_HOST_PTR : 0),
img_format, width, height, 0, data, &err));
CL_CHECK_ERRORS(err);
CHECK(err == CL_SUCCESS) << " Create image 2d error.";
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <iostream>
#include <memory>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_image_converter.h"
namespace paddle {
namespace lite {
class CLImage {
// For debug
friend std::ostream& operator<<(std::ostream& os, const CLImage& image);
public:
CLImage() = default;
/*
* Will not hold input tensor data, memcpy in this method.
* */
void set_tensor_data(float* tensor_data, const DDim& dim);
bool IsInit() { return initialized_; }
/*
* Need call set_tensor_data first.
* Folder when one dim or two dim.
* */
void InitCLImage(const cl::Context& context);
void InitNormalCLImage(const cl::Context& context);
void InitNImage(const cl::Context& context);
void InitDWImage(const cl::Context& context);
void InitEmptyImage(const cl::Context& context, const DDim& dim);
void InitEmptyWithImageDim(const cl::Context& context,
const DDim& image_dims);
cl::Image* cl_image() const { return cl_image_.get(); }
const DDim& image_dims() const { return image_dims_; }
inline size_t ImageWidth() const { return image_dims_[0]; }
inline size_t ImageHeight() const { return image_dims_[1]; }
const DDim& tensor_dims() const { return tensor_dims_; }
/*with_da
* Resize original tensor dim.
* */
inline CLImage& Resize(const DDim& dims) {
tensor_dims_ = dims;
return *this;
}
template <typename T>
T* data() const {
CHECK(!initialized_) << "CL image has initialized, tensor data has been "
"deleted, can't use tensor data!";
return reinterpret_cast<T*>(tensor_data_);
}
/*
* Numel of tensor dim
* */
inline int64_t numel() const {
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
return tensor_dims_.product();
#else
return tensor_dims_.production();
#endif
}
/*
* Original tensor dim
* */
cl::UserEvent& cl_event() const { return *cl_event_; }
CLImageConverterBase* image_converter() const {
return image_converter_.get();
}
private:
void InitCLImage(const cl::Context& context, CLImageConverterBase* converter);
void InitCLImage(const cl::Context& context, int width, int height,
void* data);
bool initialized_ = false;
std::unique_ptr<cl::Image2D> cl_image_{nullptr};
std::unique_ptr<cl::UserEvent> cl_event_{nullptr};
DDim tensor_dims_;
DDim image_dims_;
std::unique_ptr<float> tensor_data_{nullptr};
std::unique_ptr<CLImageConverterBase> image_converter_{nullptr};
};
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_image_converter.h"
#include <glog/logging.h>
#include <vector>
namespace paddle {
namespace lite {
DDim CLImageConverterDefault::InitImageDimInfoWith(const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (size_t j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
}
void CLImageConverterDefault::NCHWToImage(float *nchw, half_t *image,
const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (size_t j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
DDim in_image_dim = InitImageDimInfoWith(tensor_dim);
VLOG(3) << " tensor dim: " << tensor_dim;
VLOG(3) << " image dim: " << in_image_dim;
size_t width = in_image_dim[0];
size_t w_block = width / W;
float *p = nchw;
size_t i0 = 0;
for (size_t n = 0; n < N; n++) {
for (size_t c = 0; c < w_block * 4; c++) {
size_t i1 = i0 + (c / 4) * W;
for (size_t h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (size_t w = 0; w < W; w++) {
if (c < C) {
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image[i2] = Float2Half(*p);
i2 += 4;
p++;
} else {
image[i2] = 0.0;
i2 += 4;
}
}
i1 += width;
}
}
i0 += width * H;
}
}
void CLImageConverterDefault::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (size_t j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
size_t width = image_dim[0];
float *p = tensor;
size_t i0 = 0;
for (size_t n = 0; n < N; n++) {
for (size_t c = 0; c < C; c++) {
size_t i1 = i0 + (c / 4) * W;
for (size_t h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (size_t w = 0; w < W; w++) {
*p = Half2Float(image[i2]);
i2 += 4;
p++;
}
i1 += width;
}
}
i0 += width * H;
}
}
DDim CLImageConverterFolder::InitImageDimInfoWith(const DDim &tensor_dim) {
if (tensor_dim.size() <= 2) {
size_t tdim[2] = {1, 1};
if (tensor_dim.size() == 1) {
tdim[1] = tensor_dim[0];
} else {
tdim[0] = tensor_dim[0];
tdim[1] = tensor_dim[1];
}
size_t width = (tdim[1] + 3) / 4;
size_t height = tdim[0];
width_of_one_block_ = width;
height_of_one_block_ = height;
c_block_ = 1;
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
} else {
size_t new_dims[] = {1, 1, 1, 1};
for (size_t j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
width_of_one_block_ = W;
height_of_one_block_ = H;
c_block_ = width / W;
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
}
}
void CLImageConverterFolder::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {
CHECK(tensor_dim.size() <= 4 && tensor_dim.size() > 0)
<< " Tensor dim is not support!";
if (tensor_dim.size() > 2) {
CLImageConverterDefault default_converter;
default_converter.NCHWToImage(tensor, image, tensor_dim);
} else {
size_t tdim[2] = {1, 1};
if (tensor_dim.size() == 1) {
tdim[1] = tensor_dim[0];
} else {
tdim[0] = tensor_dim[0];
tdim[1] = tensor_dim[1];
}
DDim image_dim = InitImageDimInfoWith(tensor_dim);
size_t width = image_dim[0];
for (size_t h = 0; h < tdim[0]; h++) {
for (size_t w = 0; w < tdim[1]; w++) {
image[(h * width + w / 4) * 4 + (w % 4)] =
Float2Half(tensor[h * tdim[1] + w]);
}
}
}
}
void CLImageConverterFolder::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
if (tensor_dim.size() > 2) {
CLImageConverterDefault default_converter;
default_converter.ImageToNCHW(image, tensor, image_dim, tensor_dim);
} else {
size_t width = image_dim[0];
size_t H = 1, W = 1;
if (tensor_dim.size() == 2) {
H = tensor_dim[0];
W = tensor_dim[1];
} else if (tensor_dim.size() == 1) {
W = tensor_dim[0];
}
float *p = tensor;
for (size_t h = 0; h < H; h++) {
for (size_t w = 0; w < W; w++) {
p[h * W + w] = Half2Float(image[(h * width + w / 4) * 4 + (w % 4)]);
}
}
}
}
DDim CLImageConverterNWBlock::InitImageDimInfoWith(const DDim &tensor_dim) {
CHECK(tensor_dim.size() == 4) << " Tensor dim is not 4.";
size_t N, C, H, W;
N = tensor_dim[0];
C = tensor_dim[1];
H = tensor_dim[2];
W = tensor_dim[3];
size_t width = W * ((N + 3) / 4);
size_t height = C * H;
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
}
void CLImageConverterNWBlock::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {
CHECK(tensor_dim.size() == 4) << " Tensor dim is not 4.";
auto image_dim = InitImageDimInfoWith(tensor_dim);
float *p = tensor;
size_t N = tensor_dim[0];
size_t C = tensor_dim[1];
size_t H = tensor_dim[2];
size_t W = tensor_dim[3];
size_t width = image_dim[0];
size_t height = image_dim[1];
size_t block = image_dim[0] / tensor_dim[3];
for (size_t n = 0; n < block * 4; n++) {
for (size_t c = 0; c < C; c++) {
for (size_t h = 0; h < H; ++h) {
for (size_t w = 0; w < W; ++w) {
size_t index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
w * 4 + n % 4;
if (n < N) {
image[index] = Float2Half(*p);
p++;
} else {
image[index] = 0.0;
}
if (index >= (width * height * 4)) {
LOG(INFO) << " index out of range ";
}
}
}
}
}
VLOG(3) << " init done";
}
void CLImageConverterNWBlock::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
CHECK(tensor_dim.size() == 4) << " Tensor dim is not 4.";
float *p = tensor;
size_t N = tensor_dim[0];
size_t C = tensor_dim[1];
size_t H = tensor_dim[2];
size_t W = tensor_dim[3];
size_t width = image_dim[0];
size_t height = image_dim[1];
for (size_t n = 0; n < N; n++) {
for (size_t c = 0; c < C; c++) {
for (size_t h = 0; h < H; ++h) {
for (size_t w = 0; w < W; ++w) {
size_t index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
w * 4 + n % 4;
*p = Half2Float(image[index]);
p++;
if (index >= (width * height * 4)) {
LOG(INFO) << " index out of range ";
}
}
}
}
}
VLOG(3) << " init done";
}
DDim CLImageConverterDWBlock::InitImageDimInfoWith(const DDim &tensor_dim) {
CHECK(tensor_dim.size() == 4) << " Tensor dim is not 4.";
size_t N, C, H, W;
N = tensor_dim[0];
C = tensor_dim[1];
H = tensor_dim[2];
W = tensor_dim[3];
size_t width = W * ((N + 3) / 4);
size_t height = C * H;
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
}
void CLImageConverterDWBlock::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (size_t j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[1];
C = new_dims[0];
H = new_dims[2];
W = new_dims[3];
DDim in_image_dim = InitImageDimInfoWith(tensor_dim);
VLOG(3) << " tensor dim: " << tensor_dim;
VLOG(3) << " image dim: " << in_image_dim;
size_t width = in_image_dim[0];
size_t w_block = width / W;
float *p = tensor;
size_t i0 = 0;
for (size_t n = 0; n < N; n++) {
for (size_t c = 0; c < w_block * 4; c++) {
size_t i1 = i0 + (c / 4) * W;
for (size_t h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (size_t w = 0; w < W; w++) {
if (c < C) {
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image[i2] = Float2Half(*p);
i2 += 4;
p++;
} else {
image[i2] = 0.0;
i2 += 4;
}
}
i1 += width;
}
}
i0 += width * H;
}
}
void CLImageConverterDWBlock::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
CHECK(tensor_dim.size() == 4) << " Tensor dim is not 4.";
float *p = tensor;
size_t N = tensor_dim[1];
size_t C = tensor_dim[0];
size_t H = tensor_dim[2];
size_t W = tensor_dim[3];
size_t width = image_dim[0];
size_t i0 = 0;
for (size_t n = 0; n < N; n++) {
for (size_t c = 0; c < C; c++) {
size_t i1 = i0 + (c / 4) * W;
for (size_t h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (size_t w = 0; w < W; w++) {
*p = Half2Float(image[i2]);
i2 += 4;
p++;
}
i1 += width;
}
}
i0 += width * H;
}
}
DDim CLImageConverterNormal::InitImageDimInfoWith(const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (size_t j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
width_of_one_block_ = W;
height_of_one_block_ = H;
c_block_ = width / W;
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
}
void CLImageConverterNormal::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {
CHECK(tensor_dim.size() <= 4 && tensor_dim.size() > 0)
<< " Tensor dim is not support!";
CLImageConverterDefault default_converter;
default_converter.NCHWToImage(tensor, image, tensor_dim);
}
void CLImageConverterNormal::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
CLImageConverterDefault default_converter;
default_converter.ImageToNCHW(image, tensor, image_dim, tensor_dim);
}
DDim CLImageConverterWinoTransWeight::InitImageDimInfoWith(
const DDim &tensor_dim) {
CHECK(tensor_dim.size() == 4) << " Tensor dim is not 4.";
size_t N, C;
N = tensor_dim[0];
C = tensor_dim[1];
size_t width = (C + 3) / 4;
size_t height = N * 16; // N * (wino_blk_size + 2) * (wino_blk_size + 2)
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
}
void CLImageConverterWinoTransWeight::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {}
void CLImageConverterWinoTransWeight::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
namespace paddle {
namespace lite {
class CLImageConverterBase {
public:
virtual ~CLImageConverterBase() {}
virtual void NCHWToImage(float *nchw, half_t *image,
const DDim &tensor_dim) = 0;
virtual void ImageToNCHW(half_t *image, float *nchw, const DDim &image_dim,
const DDim &tensor_dim) = 0;
virtual DDim InitImageDimInfoWith(const DDim &tensor_dim) = 0;
};
class CLImageConverterDefault : public CLImageConverterBase {
public:
DDim InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *nchw, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
};
class CLImageConverterFolder : public CLImageConverterBase {
public:
DDim InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
/*
* width of original tensor
* */
inline size_t WidthOfOneBlock() const { return width_of_one_block_; }
/*
* height of original tensor
* */
inline size_t HeightOfOneBlock() const { return height_of_one_block_; }
int GetCBlock() const { return c_block_; }
private:
int c_block_;
int width_of_one_block_;
int height_of_one_block_;
};
class CLImageConverterNormal : public CLImageConverterBase {
public:
DDim InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
/*
* width of original tensor
* */
inline size_t WidthOfOneBlock() const { return width_of_one_block_; }
/*
* height of original tensor
* */
inline size_t HeightOfOneBlock() const { return height_of_one_block_; }
int GetCBlock() const { return c_block_; }
private:
int c_block_;
int width_of_one_block_;
int height_of_one_block_;
};
class CLImageConverterNWBlock : public CLImageConverterBase {
DDim InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
};
class CLImageConverterDWBlock : public CLImageConverterBase {
DDim InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
};
class CLImageConverterWinoTransWeight : public CLImageConverterBase {
public:
DDim InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
};
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
inline half4 activation(half4 in
#ifdef PRELU
,
half4 prelu_alpha
#endif
) {
half4 output;
#ifdef PRELU
output = select(prelu_alpha * in, in, in >= (half4)0.0);
#endif
#ifdef RELU
output = fmax(in, (half4)(0.0f));
#endif
return output;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void elementwise_add(__global image2d_t input, __global image2d_t bias,__write_only image2d_t outputImage) {
int x = get_global_id(0);
int y = get_global_id(1);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int2 coords;
coords.x = x;
coords.y = y;
half4 in = read_imageh(input, sampler, coords);
half4 biase = read_imageh(bias, sampler, coords);
half4 output = in + biase;
write_imageh(outputImage,coords,output);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define MIN_VALUE -FLT_MAX
__kernel void pool_max(
__private const int in_height, __private const int in_width,
__private const int out_height, __private const int out_width,
__private const int pad_top, __private const int pad_left,
__private const int stride_h, __private const int stride_w,
__private const int ksize_h, __private const int ksize_w,
__read_only image2d_t input, __write_only image2d_t output) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const int out_n = out_nh / out_height;
const int out_h = out_nh % out_height;
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int start_h = out_h * stride_h - pad_top;
int end_h = min(start_h + ksize_h, in_height);
start_h = max(start_h,0);
int start_w = out_w * stride_w - pad_left;
int end_w = min(start_w + ksize_w, in_width);
start_w = max(start_w,0);
const int pos_in_x = out_c * in_width;
const int pos_in_y = out_n * in_height;
half4 max_value = (half4)(MIN_VALUE);
for (int y = start_h; y < end_h; ++y) {
for (int x = start_w; x < end_w; ++x) {
half4 tmp = read_imageh(input, sampler, (int2)(pos_in_x + x, pos_in_y + y));
max_value = max(max_value, tmp);
}
}
const int pos_out_x = mad24(out_c, out_width, out_w);
write_imageh(output, (int2)(pos_out_x, out_nh), max_value);
}
__kernel void pool_avg(
__private const int in_height, __private const int in_width,
__private const int out_height, __private const int out_width,
__private const int pad_top, __private const int pad_left,
__private const int stride_h, __private const int stride_w,
__private const int ksize_h, __private const int ksize_w,
__read_only image2d_t input, __write_only image2d_t output) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const int out_n = out_nh / out_height;
const int out_h = out_nh % out_height;
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int start_h = max(out_h * stride_h - pad_top, 0);
int end_h = min(start_h + ksize_h, in_height);
int start_w = max(out_w * stride_w - pad_left, 0);
int end_w = min(start_w + ksize_w, in_width);
const int pos_in_x = out_c * in_width;
const int pos_in_y = out_n * in_height;
half4 sum = (half4)(0.0f);
int num = 0;
for (int y = start_h; y < end_h; ++y) {
for (int x = start_w; x < end_w; ++x) {
sum += read_imageh(input, sampler, (int2)(pos_in_x + x, pos_in_y + y));
num++;
}
}
half4 avg = sum / num;
const int pos_out_x = mad24(out_c, out_width, out_w);
write_imageh(output, (int2)(pos_out_x, out_nh), avg);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory>
#include <random>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
DEFINE_string(cl_path, "/data/local/tmp/opencl", "The OpenCL kernels path.");
namespace paddle {
namespace lite {
TEST(cl_test, engine_test) {
auto* engine = CLEngine::Global();
CHECK(engine->IsInitSuccess());
engine->set_cl_path(FLAGS_cl_path);
engine->platform();
engine->device();
engine->command_queue();
auto& context = engine->context();
auto program = engine->CreateProgram(
context, engine->cl_path() + "/cl_kernel/" + "elementwise_add_kernel.cl");
auto event = engine->CreateEvent(context);
CHECK(engine->BuildProgram(program.get()));
}
TEST(cl_test, context_test) {
auto* engine = CLEngine::Global();
CHECK(engine->IsInitSuccess());
engine->set_cl_path(FLAGS_cl_path);
CLContext context;
context.GetKernel("pool_max", "pool_kernel.cl", "");
context.GetKernel("elementwise_add", "elementwise_add_kernel.cl", "");
context.GetKernel("elementwise_add", "elementwise_add_kernel.cl", "");
}
TEST(cl_test, kernel_test) {
auto* engine = CLEngine::Global();
CHECK(engine->IsInitSuccess());
engine->set_cl_path(FLAGS_cl_path);
std::unique_ptr<CLContext> context(new CLContext);
// std::unique_ptr<CLHelper> helper(new CLHelper(context.get()));
std::unique_ptr<CLHelper> helper(new CLHelper);
helper->set_context(context.get());
helper->AddKernel("elementwise_add", "elementwise_add_kernel.cl");
helper->AddKernel("pool_max", "pool_kernel.cl");
helper->AddKernel("elementwise_add", "elementwise_add_kernel.cl");
auto kernel = helper->KernelAt(2);
std::unique_ptr<float[]> in_data(new float[1024 * 512]);
for (int i = 0; i < 1024 * 512; i++) {
in_data[i] = 1.f;
}
const DDim in_dim = DDim(std::vector<DDim::value_type>{1024, 512});
CLImage in_image;
in_image.set_tensor_data(in_data.get(), in_dim);
in_image.InitNormalCLImage(helper->OpenCLContext());
LOG(INFO) << in_image;
std::unique_ptr<float[]> bias_data(new float[1024 * 512]);
for (int i = 0; i < 1024 * 512; i++) {
bias_data[i] = 2.f;
}
const DDim bias_dim = DDim(std::vector<DDim::value_type>{1024, 512});
CLImage bias_image;
bias_image.set_tensor_data(bias_data.get(), bias_dim);
bias_image.InitNormalCLImage(helper->OpenCLContext());
LOG(INFO) << bias_image;
CLImage out_image;
const DDim out_dim = DDim(std::vector<DDim::value_type>{1024, 512});
out_image.InitEmptyImage(helper->OpenCLContext(), out_dim);
LOG(INFO) << out_image;
cl_int status;
status = kernel.setArg(0, *in_image.cl_image());
CL_CHECK_ERRORS(status);
status = kernel.setArg(1, *bias_image.cl_image());
CL_CHECK_ERRORS(status);
status = kernel.setArg(2, *out_image.cl_image());
CL_CHECK_ERRORS(status);
// auto global_work_size = helper->DefaultWorkSize(out_image);
size_t width = in_image.ImageWidth();
size_t height = in_image.ImageHeight();
auto global_work_size = cl::NDRange{width, height};
cl::Event event;
status = helper->OpenCLCommandQueue().enqueueNDRangeKernel(
kernel, cl::NullRange, global_work_size, cl::NullRange, nullptr, &event);
CL_CHECK_ERRORS(status);
double start_nanos = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
double stop_nanos = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
double elapsed_micros = (stop_nanos - start_nanos) / 1000.0;
LOG(INFO) << "Kernel Run Cost Time: " << elapsed_micros << " us.";
LOG(INFO) << out_image;
}
TEST(cl_test, elementwise_add_test) {
std::default_random_engine engine;
std::uniform_real_distribution<float> dist(-5, 5);
const DDim in_dim = DDim(std::vector<DDim::value_type>{1024, 512});
std::unique_ptr<float[]> in_data(new float[1024 * 512]);
for (int i = 0; i < 1024 * 512; i++) {
in_data[i] = dist(engine);
}
const DDim bias_dim = DDim(std::vector<DDim::value_type>{1024, 512});
std::unique_ptr<float[]> bias_data(new float[1024 * 512]);
for (int i = 0; i < 1024 * 512; i++) {
bias_data[i] = dist(engine);
}
const DDim out_dim = DDim(std::vector<DDim::value_type>{1024, 512});
std::unique_ptr<float[]> out(new float[1024 * 512]);
bool status = InitOpenCLEngine(FLAGS_cl_path);
CHECK(status) << "Fail to initialize OpenCL engine.";
CLContext context;
elementwise_add(&context, in_data.get(), in_dim, bias_data.get(), bias_dim,
out.get(), out_dim);
int stride = 1024 * 512 / 20;
for (int i = 0; i < 1024 * 512; i += stride) {
std::cout << out[i] << " ";
}
std::cout << std::endl;
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace paddle {
namespace lite {
const char *opencl_error_to_str(cl_int error) {
#define CASE_CL_CONSTANT(NAME) \
case NAME: \
return #NAME;
// Suppose that no combinations are possible.
switch (error) {
CASE_CL_CONSTANT(CL_SUCCESS)
CASE_CL_CONSTANT(CL_DEVICE_NOT_FOUND)
CASE_CL_CONSTANT(CL_DEVICE_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_COMPILER_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_MEM_OBJECT_ALLOCATION_FAILURE)
CASE_CL_CONSTANT(CL_OUT_OF_RESOURCES)
CASE_CL_CONSTANT(CL_OUT_OF_HOST_MEMORY)
CASE_CL_CONSTANT(CL_PROFILING_INFO_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_MEM_COPY_OVERLAP)
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_MISMATCH)
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_NOT_SUPPORTED)
CASE_CL_CONSTANT(CL_BUILD_PROGRAM_FAILURE)
CASE_CL_CONSTANT(CL_MAP_FAILURE)
CASE_CL_CONSTANT(CL_MISALIGNED_SUB_BUFFER_OFFSET)
CASE_CL_CONSTANT(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)
CASE_CL_CONSTANT(CL_INVALID_VALUE)
CASE_CL_CONSTANT(CL_INVALID_DEVICE_TYPE)
CASE_CL_CONSTANT(CL_INVALID_PLATFORM)
CASE_CL_CONSTANT(CL_INVALID_DEVICE)
CASE_CL_CONSTANT(CL_INVALID_CONTEXT)
CASE_CL_CONSTANT(CL_INVALID_QUEUE_PROPERTIES)
CASE_CL_CONSTANT(CL_INVALID_COMMAND_QUEUE)
CASE_CL_CONSTANT(CL_INVALID_HOST_PTR)
CASE_CL_CONSTANT(CL_INVALID_MEM_OBJECT)
CASE_CL_CONSTANT(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
CASE_CL_CONSTANT(CL_INVALID_IMAGE_SIZE)
CASE_CL_CONSTANT(CL_INVALID_SAMPLER)
CASE_CL_CONSTANT(CL_INVALID_BINARY)
CASE_CL_CONSTANT(CL_INVALID_BUILD_OPTIONS)
CASE_CL_CONSTANT(CL_INVALID_PROGRAM)
CASE_CL_CONSTANT(CL_INVALID_PROGRAM_EXECUTABLE)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_NAME)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_DEFINITION)
CASE_CL_CONSTANT(CL_INVALID_KERNEL)
CASE_CL_CONSTANT(CL_INVALID_ARG_INDEX)
CASE_CL_CONSTANT(CL_INVALID_ARG_VALUE)
CASE_CL_CONSTANT(CL_INVALID_ARG_SIZE)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_ARGS)
CASE_CL_CONSTANT(CL_INVALID_WORK_DIMENSION)
CASE_CL_CONSTANT(CL_INVALID_WORK_GROUP_SIZE)
CASE_CL_CONSTANT(CL_INVALID_WORK_ITEM_SIZE)
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_OFFSET)
CASE_CL_CONSTANT(CL_INVALID_EVENT_WAIT_LIST)
CASE_CL_CONSTANT(CL_INVALID_EVENT)
CASE_CL_CONSTANT(CL_INVALID_OPERATION)
CASE_CL_CONSTANT(CL_INVALID_GL_OBJECT)
CASE_CL_CONSTANT(CL_INVALID_BUFFER_SIZE)
CASE_CL_CONSTANT(CL_INVALID_MIP_LEVEL)
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_WORK_SIZE)
CASE_CL_CONSTANT(CL_INVALID_PROPERTY)
default:
return "UNKNOWN ERROR CODE";
}
#undef CASE_CL_CONSTANT
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/lite/opencl/cl2_header.h"
namespace paddle {
namespace lite {
const char* opencl_error_to_str(cl_int error);
#define CL_CHECK_ERRORS(ERR) \
if (ERR != CL_SUCCESS) { \
printf( \
"OpenCL error with code %s happened in file %s at line %d. " \
"Exiting.\n", \
opencl_error_to_str(ERR), __FILE__, __LINE__); \
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <dlfcn.h>
#include <glog/logging.h>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
/**
* Wrapper of OpenCL 2.0, based on file opencl20/CL/cl.h
*/
#if CL_HPP_TARGET_OPENCL_VERSION < 200
#define CL_API_SUFFIX__VERSION_2_0
#endif
namespace paddle {
namespace lite {
class OpenCLLibrary final {
private:
OpenCLLibrary();
OpenCLLibrary(const OpenCLLibrary &) = delete;
OpenCLLibrary &operator=(const OpenCLLibrary &) = delete;
bool Load();
void *LoadFromPath(const std::string &path);
public:
static OpenCLLibrary *Get();
using clGetPlatformIDsFunc = cl_int (*)(cl_uint, cl_platform_id *, cl_uint *);
using clGetPlatformInfoFunc = cl_int (*)(cl_platform_id, cl_platform_info,
size_t, void *, size_t *);
using clBuildProgramFunc = cl_int (*)(cl_program, cl_uint,
const cl_device_id *, const char *,
void (*pfn_notify)(cl_program, void *),
void *);
using clEnqueueNDRangeKernelFunc = cl_int (*)(cl_command_queue, cl_kernel,
cl_uint, const size_t *,
const size_t *, const size_t *,
cl_uint, const cl_event *,
cl_event *);
using clSetKernelArgFunc = cl_int (*)(cl_kernel, cl_uint, size_t,
const void *);
using clRetainMemObjectFunc = cl_int (*)(cl_mem);
using clReleaseMemObjectFunc = cl_int (*)(cl_mem);
using clEnqueueUnmapMemObjectFunc = cl_int (*)(cl_command_queue, cl_mem,
void *, cl_uint,
const cl_event *, cl_event *);
using clRetainCommandQueueFunc = cl_int (*)(cl_command_queue command_queue);
using clCreateContextFunc = cl_context (*)(
const cl_context_properties *, cl_uint, const cl_device_id *,
void(CL_CALLBACK *)( // NOLINT(readability/casting)
const char *, const void *, size_t, void *),
void *, cl_int *);
using clCreateContextFromTypeFunc =
cl_context (*)(const cl_context_properties *, cl_device_type,
void(CL_CALLBACK *)( // NOLINT(readability/casting)
const char *, const void *, size_t, void *),
void *, cl_int *);
using clReleaseContextFunc = cl_int (*)(cl_context);
using clWaitForEventsFunc = cl_int (*)(cl_uint, const cl_event *);
using clReleaseEventFunc = cl_int (*)(cl_event);
using clEnqueueWriteBufferFunc = cl_int (*)(cl_command_queue, cl_mem, cl_bool,
size_t, size_t, const void *,
cl_uint, const cl_event *,
cl_event *);
using clEnqueueReadBufferFunc = cl_int (*)(cl_command_queue, cl_mem, cl_bool,
size_t, size_t, void *, cl_uint,
const cl_event *, cl_event *);
using clEnqueueReadImageFunc = cl_int (*)(cl_command_queue, cl_mem, cl_bool,
const size_t *, const size_t *,
size_t, size_t, void *, cl_uint,
const cl_event *, cl_event *);
using clGetProgramBuildInfoFunc = cl_int (*)(cl_program, cl_device_id,
cl_program_build_info, size_t,
void *, size_t *);
using clRetainProgramFunc = cl_int (*)(cl_program program);
using clEnqueueMapBufferFunc = void *(*)(cl_command_queue, cl_mem, cl_bool,
cl_map_flags, size_t, size_t,
cl_uint, const cl_event *,
cl_event *, cl_int *);
using clEnqueueMapImageFunc = void *(*)(cl_command_queue, cl_mem, cl_bool,
cl_map_flags, const size_t *,
const size_t *, size_t *, size_t *,
cl_uint, const cl_event *, cl_event *,
cl_int *);
using clCreateCommandQueueFunc = cl_command_queue(CL_API_CALL *)( // NOLINT
cl_context, cl_device_id, cl_command_queue_properties, cl_int *);
using clCreateCommandQueueWithPropertiesFunc = cl_command_queue (*)(
cl_context, cl_device_id, const cl_queue_properties *, cl_int *);
using clReleaseCommandQueueFunc = cl_int (*)(cl_command_queue);
using clCreateProgramWithBinaryFunc = cl_program (*)(cl_context, cl_uint,
const cl_device_id *,
const size_t *,
const unsigned char **,
cl_int *, cl_int *);
using clRetainContextFunc = cl_int (*)(cl_context context);
using clGetContextInfoFunc = cl_int (*)(cl_context, cl_context_info, size_t,
void *, size_t *);
using clReleaseProgramFunc = cl_int (*)(cl_program program);
using clFlushFunc = cl_int (*)(cl_command_queue command_queue);
using clFinishFunc = cl_int (*)(cl_command_queue command_queue);
using clGetProgramInfoFunc = cl_int (*)(cl_program, cl_program_info, size_t,
void *, size_t *);
using clCreateKernelFunc = cl_kernel (*)(cl_program, const char *, cl_int *);
using clRetainKernelFunc = cl_int (*)(cl_kernel kernel);
using clCreateBufferFunc = cl_mem (*)(cl_context, cl_mem_flags, size_t,
void *, cl_int *);
using clCreateImage2DFunc = cl_mem(CL_API_CALL *)(cl_context, // NOLINT
cl_mem_flags,
const cl_image_format *,
size_t, size_t, size_t,
void *, cl_int *);
using clCreateImageFunc = cl_mem (*)(cl_context, cl_mem_flags,
const cl_image_format *,
const cl_image_desc *, void *, cl_int *);
using clCreateUserEventFunc = cl_event (*)(cl_context, cl_int *);
using clCreateProgramWithSourceFunc = cl_program (*)(cl_context, cl_uint,
const char **,
const size_t *,
cl_int *);
using clReleaseKernelFunc = cl_int (*)(cl_kernel kernel);
using clGetDeviceInfoFunc = cl_int (*)(cl_device_id, cl_device_info, size_t,
void *, size_t *);
using clGetDeviceIDsFunc = cl_int (*)(cl_platform_id, cl_device_type, cl_uint,
cl_device_id *, cl_uint *);
using clRetainDeviceFunc = cl_int (*)(cl_device_id);
using clReleaseDeviceFunc = cl_int (*)(cl_device_id);
using clRetainEventFunc = cl_int (*)(cl_event);
using clGetKernelWorkGroupInfoFunc = cl_int (*)(cl_kernel, cl_device_id,
cl_kernel_work_group_info,
size_t, void *, size_t *);
using clGetEventInfoFunc = cl_int (*)(cl_event event,
cl_event_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
using clGetEventProfilingInfoFunc = cl_int (*)(cl_event event,
cl_profiling_info param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
using clGetImageInfoFunc = cl_int (*)(cl_mem, cl_image_info, size_t, void *,
size_t *);
#define PADDLE_CL_DEFINE_FUNC_PTR(func) func##Func func = nullptr
PADDLE_CL_DEFINE_FUNC_PTR(clGetPlatformIDs);
PADDLE_CL_DEFINE_FUNC_PTR(clGetPlatformInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clBuildProgram);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueNDRangeKernel);
PADDLE_CL_DEFINE_FUNC_PTR(clSetKernelArg);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseKernel);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateProgramWithSource);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateBuffer);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateImage);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateImage2D);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateUserEvent);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainKernel);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateKernel);
PADDLE_CL_DEFINE_FUNC_PTR(clGetProgramInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clFlush);
PADDLE_CL_DEFINE_FUNC_PTR(clFinish);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseProgram);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainContext);
PADDLE_CL_DEFINE_FUNC_PTR(clGetContextInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateProgramWithBinary);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateCommandQueue);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateCommandQueueWithProperties);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseCommandQueue);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueMapBuffer);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueMapImage);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainProgram);
PADDLE_CL_DEFINE_FUNC_PTR(clGetProgramBuildInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueReadBuffer);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueReadImage);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueWriteBuffer);
PADDLE_CL_DEFINE_FUNC_PTR(clWaitForEvents);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseEvent);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateContext);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateContextFromType);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseContext);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainCommandQueue);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueUnmapMemObject);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainMemObject);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseMemObject);
PADDLE_CL_DEFINE_FUNC_PTR(clGetDeviceInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clGetDeviceIDs);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainDevice);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseDevice);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainEvent);
PADDLE_CL_DEFINE_FUNC_PTR(clGetKernelWorkGroupInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clGetEventInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clGetEventProfilingInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clGetImageInfo);
#undef PADDLE_CL_DEFINE_FUNC_PTR
private:
void *handle_ = nullptr;
};
OpenCLLibrary *OpenCLLibrary::Get() {
static OpenCLLibrary library;
return &library;
}
OpenCLLibrary::OpenCLLibrary() {
this->Load();
// Do not call dlclose which may unload all OpenCL symbols.
// If close the OpenCL library, the static OpenCLlite destructor may fail.
// If there is no dlclose, the library will be closed when the program exist.
// Besides, the library will not be load repeatedly even dlopen many times.
}
bool OpenCLLibrary::Load() {
if (handle_ != nullptr) {
return true;
}
// Add customized OpenCL search path here
const std::vector<std::string> paths = {
"libOpenCL.so",
#if defined(__aarch64__)
// Qualcomm Adreno with Android
"/system/vendor/lib64/libOpenCL.so",
"/system/lib64/libOpenCL.so",
// Mali with Android
"/system/vendor/lib64/egl/libGLES_mali.so",
"/system/lib64/egl/libGLES_mali.so",
// Typical Linux board
"/usr/lib/aarch64-linux-gnu/libOpenCL.so",
#else
// Qualcomm Adreno with Android
"/system/vendor/lib/libOpenCL.so",
"/system/lib/libOpenCL.so",
// Mali with Android
"/system/vendor/lib/egl/libGLES_mali.so",
"/system/lib/egl/libGLES_mali.so",
// Typical Linux board
"/usr/lib/arm-linux-gnueabihf/libOpenCL.so",
#endif
};
for (const auto &path : paths) {
VLOG(3) << "Loading OpenCL from " << path;
void *handle = LoadFromPath(path);
if (handle != nullptr) {
handle_ = handle;
break;
}
}
if (handle_ == nullptr) {
LOG(ERROR)
<< "Failed to load OpenCL library, "
"please make sure there exists OpenCL library on your device, "
"and your APP have right to access the library.";
return false;
}
return true;
}
void *OpenCLLibrary::LoadFromPath(const std::string &path) {
void *handle = dlopen(path.c_str(), RTLD_LAZY | RTLD_LOCAL);
if (handle == nullptr) {
VLOG(3) << "Failed to load OpenCL library from path " << path
<< " error code: " << dlerror();
return nullptr;
}
#define PADDLE_CL_ASSIGN_FROM_DLSYM(func) \
do { \
void *ptr = dlsym(handle, #func); \
if (ptr == nullptr) { \
VLOG(1) << "Failed to load " << #func << " from " << path; \
continue; \
} \
func = reinterpret_cast<func##Func>(ptr); \
VLOG(3) << "Loaded " << #func << " from " << path; \
} while (false)
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetPlatformIDs);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetPlatformInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clBuildProgram);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueNDRangeKernel);
PADDLE_CL_ASSIGN_FROM_DLSYM(clSetKernelArg);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseKernel);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateProgramWithSource);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateBuffer);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateImage);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateImage2D);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateUserEvent);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainKernel);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateKernel);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetProgramInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clFlush);
PADDLE_CL_ASSIGN_FROM_DLSYM(clFinish);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseProgram);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainContext);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetContextInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateProgramWithBinary);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateCommandQueue);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateCommandQueueWithProperties);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseCommandQueue);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueMapBuffer);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueMapImage);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainProgram);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetProgramBuildInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueReadBuffer);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueReadImage);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueWriteBuffer);
PADDLE_CL_ASSIGN_FROM_DLSYM(clWaitForEvents);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseEvent);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateContext);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateContextFromType);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseContext);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainCommandQueue);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueUnmapMemObject);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainMemObject);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseMemObject);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetDeviceInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetDeviceIDs);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainDevice);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseDevice);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainEvent);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetKernelWorkGroupInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetEventInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetEventProfilingInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetImageInfo);
#undef PADDLE_CL_ASSIGN_FROM_DLSYM
return handle;
}
} // namespace lite
} // namespace paddle
CL_API_ENTRY cl_event clCreateUserEvent(cl_context context, cl_int *errcode_ret)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clCreateUserEvent;
if (func != nullptr) {
return func(context, errcode_ret);
} else {
if (errcode_ret != nullptr) *errcode_ret = CL_INVALID_PLATFORM;
return nullptr;
}
}
// Platform APIs
CL_API_ENTRY cl_int
clGetPlatformIDs(cl_uint num_entries, cl_platform_id *platforms,
cl_uint *num_platforms) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clGetPlatformIDs;
if (func != nullptr) {
return func(num_entries, platforms, num_platforms);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int
clGetPlatformInfo(cl_platform_id platform, cl_platform_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clGetPlatformInfo;
if (func != nullptr) {
return func(platform, param_name, param_value_size, param_value,
param_value_size_ret);
} else {
return CL_INVALID_PLATFORM;
}
}
// Device APIs
CL_API_ENTRY cl_int clGetDeviceIDs(
cl_platform_id platform, cl_device_type device_type, cl_uint num_entries,
cl_device_id *devices, cl_uint *num_devices) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clGetDeviceIDs;
if (func != nullptr) {
return func(platform, device_type, num_entries, devices, num_devices);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int
clGetDeviceInfo(cl_device_id device, cl_device_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clGetDeviceInfo;
if (func != nullptr) {
return func(device, param_name, param_value_size, param_value,
param_value_size_ret);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clRetainDevice(cl_device_id device)
CL_API_SUFFIX__VERSION_1_2 {
auto func = paddle::lite::OpenCLLibrary::Get()->clRetainDevice;
if (func != nullptr) {
return func(device);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clReleaseDevice(cl_device_id device)
CL_API_SUFFIX__VERSION_1_2 {
auto func = paddle::lite::OpenCLLibrary::Get()->clReleaseDevice;
if (func != nullptr) {
return func(device);
} else {
return CL_INVALID_PLATFORM;
}
}
// Context APIs
CL_API_ENTRY cl_context clCreateContext(
const cl_context_properties *properties, cl_uint num_devices,
const cl_device_id *devices,
void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *),
void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clCreateContext;
if (func != nullptr) {
return func(properties, num_devices, devices, pfn_notify, user_data,
errcode_ret);
} else {
if (errcode_ret != nullptr) *errcode_ret = CL_INVALID_PLATFORM;
return nullptr;
}
}
CL_API_ENTRY cl_context clCreateContextFromType(
const cl_context_properties *properties, cl_device_type device_type,
void(CL_CALLBACK *pfn_notify)(const char *, const void *, size_t, void *),
void *user_data, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clCreateContextFromType;
if (func != nullptr) {
return func(properties, device_type, pfn_notify, user_data, errcode_ret);
} else {
if (errcode_ret != nullptr) *errcode_ret = CL_INVALID_PLATFORM;
return nullptr;
}
}
CL_API_ENTRY cl_int clRetainContext(cl_context context)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clRetainContext;
if (func != nullptr) {
return func(context);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clReleaseContext(cl_context context)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clReleaseContext;
if (func != nullptr) {
return func(context);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int
clGetContextInfo(cl_context context, cl_context_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clGetContextInfo;
if (func != nullptr) {
return func(context, param_name, param_value_size, param_value,
param_value_size_ret);
} else {
return CL_INVALID_PLATFORM;
}
}
// Program Object APIs
CL_API_ENTRY cl_program clCreateProgramWithSource(
cl_context context, cl_uint count, const char **strings,
const size_t *lengths, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clCreateProgramWithSource;
if (func != nullptr) {
return func(context, count, strings, lengths, errcode_ret);
} else {
if (errcode_ret != nullptr) *errcode_ret = CL_INVALID_PLATFORM;
return nullptr;
}
}
CL_API_ENTRY cl_program clCreateProgramWithBinary(
cl_context context, cl_uint num_devices, const cl_device_id *device_list,
const size_t *lengths, const unsigned char **binaries,
cl_int *binary_status, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clCreateProgramWithBinary;
if (func != nullptr) {
return func(context, num_devices, device_list, lengths, binaries,
binary_status, errcode_ret);
} else {
if (errcode_ret != nullptr) *errcode_ret = CL_INVALID_PLATFORM;
return nullptr;
}
}
CL_API_ENTRY cl_int
clGetProgramInfo(cl_program program, cl_program_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clGetProgramInfo;
if (func != nullptr) {
return func(program, param_name, param_value_size, param_value,
param_value_size_ret);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clGetProgramBuildInfo(
cl_program program, cl_device_id device, cl_program_build_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clGetProgramBuildInfo;
if (func != nullptr) {
return func(program, device, param_name, param_value_size, param_value,
param_value_size_ret);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clRetainProgram(cl_program program)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clRetainProgram;
if (func != nullptr) {
return func(program);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clReleaseProgram(cl_program program)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clReleaseProgram;
if (func != nullptr) {
return func(program);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clBuildProgram(
cl_program program, cl_uint num_devices, const cl_device_id *device_list,
const char *options,
void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data),
void *user_data) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clBuildProgram;
if (func != nullptr) {
return func(program, num_devices, device_list, options, pfn_notify,
user_data);
} else {
return CL_INVALID_PLATFORM;
}
}
// Kernel Object APIs
CL_API_ENTRY cl_kernel
clCreateKernel(cl_program program, const char *kernel_name,
cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clCreateKernel;
if (func != nullptr) {
return func(program, kernel_name, errcode_ret);
} else {
if (errcode_ret != nullptr) *errcode_ret = CL_INVALID_PLATFORM;
return nullptr;
}
}
CL_API_ENTRY cl_int clRetainKernel(cl_kernel kernel)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clRetainKernel;
if (func != nullptr) {
return func(kernel);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clReleaseKernel(cl_kernel kernel)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clReleaseKernel;
if (func != nullptr) {
return func(kernel);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clSetKernelArg(cl_kernel kernel, cl_uint arg_index,
size_t arg_size, const void *arg_value)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clSetKernelArg;
if (func != nullptr) {
return func(kernel, arg_index, arg_size, arg_value);
} else {
return CL_INVALID_PLATFORM;
}
}
// Memory Object APIs
CL_API_ENTRY cl_mem
clCreateBuffer(cl_context context, cl_mem_flags flags, size_t size,
void *host_ptr, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clCreateBuffer;
if (func != nullptr) {
return func(context, flags, size, host_ptr, errcode_ret);
} else {
if (errcode_ret != nullptr) *errcode_ret = CL_INVALID_PLATFORM;
return nullptr;
}
}
CL_API_ENTRY cl_mem clCreateImage(
cl_context context, cl_mem_flags flags, const cl_image_format *image_format,
const cl_image_desc *image_desc, void *host_ptr,
cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_2 {
auto func = paddle::lite::OpenCLLibrary::Get()->clCreateImage;
if (func != nullptr) {
return func(context, flags, image_format, image_desc, host_ptr,
errcode_ret);
} else {
if (errcode_ret != nullptr) *errcode_ret = CL_INVALID_PLATFORM;
return nullptr;
}
}
CL_API_ENTRY cl_int clRetainMemObject(cl_mem memobj)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clRetainMemObject;
if (func != nullptr) {
return func(memobj);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clReleaseMemObject(cl_mem memobj)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clReleaseMemObject;
if (func != nullptr) {
return func(memobj);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clGetImageInfo(cl_mem image, cl_image_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clGetImageInfo;
if (func != nullptr) {
return func(image, param_name, param_value_size, param_value,
param_value_size_ret);
} else {
return CL_INVALID_PLATFORM;
}
}
// Command Queue APIs
CL_API_ENTRY cl_command_queue clCreateCommandQueueWithProperties(
cl_context context, cl_device_id device,
const cl_queue_properties *properties,
cl_int *errcode_ret) CL_API_SUFFIX__VERSION_2_0 {
auto func =
paddle::lite::OpenCLLibrary::Get()->clCreateCommandQueueWithProperties;
if (func != nullptr) {
return func(context, device, properties, errcode_ret);
} else {
// Fix MediaTek MT6771 OpenCL driver breakage
VLOG(3) << "Fallback to clCreateCommandQueue";
if (properties[0] == CL_QUEUE_PROPERTIES) {
// When calling with OpenCL-CLHPP, the 2nd param is provided by caller.
#pragma GCC diagnostic push // disable warning both for clang and gcc
#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
return clCreateCommandQueue(context, device, properties[1], errcode_ret);
#pragma GCC diagnostic pop
} else {
LOG(FATAL) << "Unknown calling parameters, check the code here";
if (errcode_ret != nullptr) *errcode_ret = CL_INVALID_PLATFORM;
return nullptr;
}
}
}
CL_API_ENTRY cl_int clRetainCommandQueue(cl_command_queue command_queue)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clRetainCommandQueue;
if (func != nullptr) {
return func(command_queue);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clReleaseCommandQueue(cl_command_queue command_queue)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clReleaseCommandQueue;
if (func != nullptr) {
return func(command_queue);
} else {
return CL_INVALID_PLATFORM;
}
}
// Enqueued Commands APIs
CL_API_ENTRY cl_int clEnqueueReadBuffer(
cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_read,
size_t offset, size_t size, void *ptr, cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clEnqueueReadBuffer;
if (func != nullptr) {
return func(command_queue, buffer, blocking_read, offset, size, ptr,
num_events_in_wait_list, event_wait_list, event);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clEnqueueReadImage(
cl_command_queue command_queue, cl_mem image, cl_bool blocking_read,
const size_t *origin, const size_t *region, size_t row_pitch,
size_t slice_pitch, void *ptr, cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clEnqueueReadImage;
if (func != nullptr) {
return func(command_queue, image, blocking_read, origin, region, row_pitch,
slice_pitch, ptr, num_events_in_wait_list, event_wait_list,
event);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clEnqueueWriteBuffer(
cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_write,
size_t offset, size_t size, const void *ptr,
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
cl_event *event) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clEnqueueWriteBuffer;
if (func != nullptr) {
return func(command_queue, buffer, blocking_write, offset, size, ptr,
num_events_in_wait_list, event_wait_list, event);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY void *clEnqueueMapBuffer(
cl_command_queue command_queue, cl_mem buffer, cl_bool blocking_map,
cl_map_flags map_flags, size_t offset, size_t size,
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
cl_event *event, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clEnqueueMapBuffer;
if (func != nullptr) {
return func(command_queue, buffer, blocking_map, map_flags, offset, size,
num_events_in_wait_list, event_wait_list, event, errcode_ret);
} else {
if (errcode_ret != nullptr) *errcode_ret = CL_INVALID_PLATFORM;
return nullptr;
}
}
CL_API_ENTRY void *clEnqueueMapImage(
cl_command_queue command_queue, cl_mem image, cl_bool blocking_map,
cl_map_flags map_flags, const size_t *origin, const size_t *region,
size_t *image_row_pitch, size_t *image_slice_pitch,
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
cl_event *event, cl_int *errcode_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clEnqueueMapImage;
if (func != nullptr) {
return func(command_queue, image, blocking_map, map_flags, origin, region,
image_row_pitch, image_slice_pitch, num_events_in_wait_list,
event_wait_list, event, errcode_ret);
} else {
if (errcode_ret != nullptr) *errcode_ret = CL_INVALID_PLATFORM;
return nullptr;
}
}
CL_API_ENTRY cl_int clEnqueueUnmapMemObject(
cl_command_queue command_queue, cl_mem memobj, void *mapped_ptr,
cl_uint num_events_in_wait_list, const cl_event *event_wait_list,
cl_event *event) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clEnqueueUnmapMemObject;
if (func != nullptr) {
return func(command_queue, memobj, mapped_ptr, num_events_in_wait_list,
event_wait_list, event);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clGetKernelWorkGroupInfo(
cl_kernel kernel, cl_device_id device, cl_kernel_work_group_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clGetKernelWorkGroupInfo;
if (func != nullptr) {
return func(kernel, device, param_name, param_value_size, param_value,
param_value_size_ret);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clEnqueueNDRangeKernel(
cl_command_queue command_queue, cl_kernel kernel, cl_uint work_dim,
const size_t *global_work_offset, const size_t *global_work_size,
const size_t *local_work_size, cl_uint num_events_in_wait_list,
const cl_event *event_wait_list,
cl_event *event) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clEnqueueNDRangeKernel;
if (func != nullptr) {
return func(command_queue, kernel, work_dim, global_work_offset,
global_work_size, local_work_size, num_events_in_wait_list,
event_wait_list, event);
} else {
return CL_INVALID_PLATFORM;
}
}
// Event Object APIs
CL_API_ENTRY cl_int clWaitForEvents(
cl_uint num_events, const cl_event *event_list) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clWaitForEvents;
if (func != nullptr) {
return func(num_events, event_list);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clRetainEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clRetainEvent;
if (func != nullptr) {
return func(event);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clReleaseEvent(cl_event event) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clReleaseEvent;
if (func != nullptr) {
return func(event);
} else {
return CL_INVALID_PLATFORM;
}
}
// Event API
CL_API_ENTRY cl_int clGetEventInfo(cl_event event, cl_event_info param_name,
size_t param_value_size, void *param_value,
size_t *param_value_size_ret)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clGetEventInfo;
if (func != nullptr) {
return func(event, param_name, param_value_size, param_value,
param_value_size_ret);
} else {
return CL_INVALID_PLATFORM;
}
}
// Profiling APIs
CL_API_ENTRY cl_int clGetEventProfilingInfo(
cl_event event, cl_profiling_info param_name, size_t param_value_size,
void *param_value,
size_t *param_value_size_ret) CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clGetEventProfilingInfo;
if (func != nullptr) {
return func(event, param_name, param_value_size, param_value,
param_value_size_ret);
} else {
return CL_INVALID_PLATFORM;
}
}
// Flush and Finish APIs
CL_API_ENTRY cl_int clFlush(cl_command_queue command_queue)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clFlush;
if (func != nullptr) {
return func(command_queue);
} else {
return CL_INVALID_PLATFORM;
}
}
CL_API_ENTRY cl_int clFinish(cl_command_queue command_queue)
CL_API_SUFFIX__VERSION_1_0 {
auto func = paddle::lite::OpenCLLibrary::Get()->clFinish;
if (func != nullptr) {
return func(command_queue);
} else {
return CL_INVALID_PLATFORM;
}
}
// Deprecated OpenCL 1.1 APIs
CL_API_ENTRY /* CL_EXT_PREFIX__VERSION_1_1_DEPRECATED */ cl_mem clCreateImage2D(
cl_context context, cl_mem_flags flags, const cl_image_format *image_format,
size_t image_width, size_t image_height, size_t image_row_pitch,
void *host_ptr,
cl_int *errcode_ret) /* CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED */ {
auto func = paddle::lite::OpenCLLibrary::Get()->clCreateImage2D;
if (func != nullptr) {
return func(context, flags, image_format, image_width, image_height,
image_row_pitch, host_ptr, errcode_ret);
} else {
if (errcode_ret != nullptr) *errcode_ret = CL_INVALID_PLATFORM;
return nullptr;
}
}
// Deprecated OpenCL 2.0 APIs
CL_API_ENTRY /*CL_EXT_PREFIX__VERSION_1_2_DEPRECATED*/ cl_command_queue
clCreateCommandQueue(cl_context context, cl_device_id device,
cl_command_queue_properties properties,
cl_int *errcode_ret)
/* CL_EXT_SUFFIX__VERSION_1_2_DEPRECATED */ { // NOLINT
auto func = paddle::lite::OpenCLLibrary::Get()->clCreateCommandQueue;
if (func != nullptr) {
return func(context, device, properties, errcode_ret);
} else {
if (errcode_ret != nullptr) *errcode_ret = CL_INVALID_PLATFORM;
return nullptr;
}
}
...@@ -25,6 +25,23 @@ function cmake_x86 { ...@@ -25,6 +25,23 @@ function cmake_x86 {
cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags} cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags}
} }
function cmake_opencl {
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $2: ARM_TARGET_ARCH_ABI in "arm64-v8a", "armeabi-v7a" ,"armeabi-v7a-hf"
cmake .. \
-DLITE_WITH_OPENCL=ON \
-DWITH_GPU=OFF \
-DWITH_MKL=OFF \
-DWITH_LITE=ON \
-DLITE_WITH_CUDA=OFF \
-DLITE_WITH_X86=OFF \
-DLITE_WITH_ARM=ON \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DWITH_TESTING=ON \
-DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2
}
# This method is only called in CI. # This method is only called in CI.
function cmake_x86_for_CI { function cmake_x86_for_CI {
prepare_for_codegen # fake an empty __generated_code__.cc to pass cmake. prepare_for_codegen # fake an empty __generated_code__.cc to pass cmake.
...@@ -422,6 +439,10 @@ function main { ...@@ -422,6 +439,10 @@ function main {
cmake_x86 cmake_x86
shift shift
;; ;;
cmake_opencl)
cmake_opencl $ARM_OS $ARM_ABI
shift
;;
cmake_cuda) cmake_cuda)
cmake_cuda cmake_cuda
shift shift
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册