提交 76f4b1fc 编写于 作者: C Chunwei

Init OpenCL support for lite framework.

......@@ -150,6 +150,7 @@ option(WITH_LITE "Enable lite framework" OFF)
option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
option(LITE_WITH_ARM "Enable ARM in lite mode" OFF)
option(LITE_WITH_OPENCL "Enable OpenCL support in lite" OFF)
option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF)
option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF)
......@@ -181,6 +182,12 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
include(external/eigen) # download eigen3
include(ccache) # set ccache for compilation
# for opencl
if (LITE_WITH_OPENCL)
include(external/opencl-headers)
include(external/opencl-clhpp)
endif()
include(generic) # simplify cmake module
include(configure) # add paddle env configuration
......
......@@ -176,6 +176,10 @@ if (LITE_WITH_ARM)
add_definitions("-DLITE_WITH_ARM")
endif()
if (LITE_WITH_OPENCL)
add_definitions("-DLITE_WITH_OPENCL")
endif()
if (LITE_WITH_PROFILE)
add_definitions("-DLITE_WITH_PROFILE")
endif()
......
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(OPENCL_CLHPP_SRCS_DIR ${THIRD_PARTY_PATH}/opencl-clhpp)
SET(OPENCL_CLHPP_INSTALL_DIR ${THIRD_PARTY_PATH}/install/opencl-clhpp)
SET(OPENCL_CLHPP_INCLUDE_DIR "${OPENCL_CLHPP_INSTALL_DIR}" CACHE PATH "opencl-clhpp include directory." FORCE)
INCLUDE_DIRECTORIES(${OPENCL_CLHPP_INCLUDE_DIR})
ExternalProject_Add(
opencl_clhpp
GIT_REPOSITORY "https://github.com/KhronosGroup/OpenCL-CLHPP.git"
GIT_TAG "v2.0.10"
PREFIX "${OPENCL_CLHPP_SRCS_DIR}"
CMAKE_ARGS -DBUILD_DOCS=OFF
-DBUILD_EXAMPLES=OFF
-DBUILD_TESTS=OFF
-DCMAKE_INSTALL_PREFIX=${OPENCL_CLHPP_INSTALL_DIR}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${OPENCL_CLHPP_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
)
ADD_DEPENDENCIES(opencl_clhpp opencl_headers)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
INCLUDE(ExternalProject)
SET(OPENCL_HEADERS_SRCS_DIR ${THIRD_PARTY_PATH}/opencl-headers)
SET(OPENCL_HEADERS_INCLUDE_DIR "${OPENCL_HEADERS_SRCS_DIR}/src/opencl_headers" CACHE PATH "opencl-headers include directory." FORCE)
INCLUDE_DIRECTORIES(${OPENCL_HEADERS_INCLUDE_DIR})
ExternalProject_Add(
opencl_headers
${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY "https://github.com/KhronosGroup/OpenCL-Headers.git"
GIT_TAG "c5a4bbeabb10d8ed3d1c651b93aa31737bc473dd"
PREFIX ${OPENCL_HEADERS_SRCS_DIR}
DOWNLOAD_NAME "OpenCL-Headers"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND ""
TEST_COMMAND ""
)
......@@ -182,6 +182,7 @@ add_subdirectory(x86)
add_subdirectory(arm)
add_subdirectory(host)
add_subdirectory(cuda)
add_subdirectory(opencl)
add_subdirectory(model_parser)
add_subdirectory(utils)
add_subdirectory(api)
......
if (NOT LITE_WITH_OPENCL)
return()
endif()
if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
cc_library(cl_wrapper SRCS cl_wrapper.cc)
cc_library(cl_tool SRCS cl_tool.cc)
target_compile_options(cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers)
cc_library(cl_half SRCS cl_half.cc)
target_compile_options(cl_half BEFORE PUBLIC -fno-strict-aliasing)
cc_library(cl_engine SRCS cl_engine.cc DEPS cl_tool)
cc_library(cl_context SRCS cl_context.cc DEPS cl_engine)
cc_library(cl_helper SRCS cl_helper.cc DEPS cl_context)
cc_library(cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor)
cc_library(cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine)
cc_library(cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image)
lite_cc_test(test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller cl_wrapper)
add_dependencies(cl_tool opencl_clhpp)
endif()
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#define CL_TARGET_OPENCL_VERSION 200
#define CL_HPP_TARGET_OPENCL_VERSION 200
#define CL_HPP_MINIMUM_OPENCL_VERSION 110
#include <CL/cl2.hpp>
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace paddle {
namespace lite {
static void CopyImageData(const CLImage& cl_image, float* out) {
int width = cl_image.image_dims()[0];
int height = cl_image.image_dims()[1];
half_t* image_data = new half_t[height * width * 4];
cl::Image* image = cl_image.cl_image();
const std::array<size_t, 3> origin{0, 0, 0};
const std::array<size_t, 3> region{static_cast<size_t>(width),
static_cast<size_t>(height), 1};
cl_int err = CLEngine::Global()->command_queue().enqueueReadImage(
*image, CL_TRUE, origin, region, 0, 0, image_data, nullptr, nullptr);
CL_CHECK_ERRORS(err);
auto* converter = cl_image.image_converter();
converter->ImageToNCHW(image_data, out, cl_image.image_dims(),
cl_image.tensor_dims());
delete[] image_data;
}
bool InitOpenCLEngine(std::string cl_path) {
auto* engine = CLEngine::Global();
engine->set_cl_path(cl_path);
return engine->IsInitSuccess();
}
void elementwise_add(CLContext* context, float* in, const DDim& in_dim,
float* bias, const DDim& bias_dim, float* out,
const DDim& out_dim) {
CLHelper helper(context);
helper.AddKernel("elementwise_add", "elementwise_add_kernel.cl");
auto kernel = helper.KernelAt(0);
CLImage in_image;
in_image.set_tensor_data(in, in_dim);
in_image.InitNormalCLImage(helper.OpenCLContext());
VLOG(3) << " --- Inpu image: " << in_image << " --- ";
CLImage bias_image;
bias_image.set_tensor_data(bias, bias_dim);
bias_image.InitNormalCLImage(helper.OpenCLContext());
VLOG(3) << " --- Bias image: " << bias_image << " --- ";
CLImage out_image;
out_image.InitEmptyImage(helper.OpenCLContext(), out_dim);
cl_int status;
status = kernel.setArg(0, *in_image.cl_image());
CL_CHECK_ERRORS(status);
status = kernel.setArg(1, *bias_image.cl_image());
CL_CHECK_ERRORS(status);
status = kernel.setArg(2, *out_image.cl_image());
CL_CHECK_ERRORS(status);
size_t width = in_image.ImageWidth();
size_t height = in_image.ImageHeight();
auto global_work_size = cl::NDRange{width, height};
status = helper.OpenCLCommandQueue().enqueueNDRangeKernel(
kernel, cl::NullRange, global_work_size, cl::NullRange, nullptr, nullptr);
CL_CHECK_ERRORS(status);
VLOG(3) << " --- Out image: " << out_image << " --- ";
CopyImageData(out_image, out);
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
namespace paddle {
namespace lite {
bool InitOpenCLEngine(std::string cl_path);
void elementwise_add(CLContext* context, float* in, const DDim& in_dim,
float* bias, const DDim& bias_dim, float* out,
const DDim& out_dim);
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <glog/logging.h>
#include <memory>
#include <string>
#include <utility>
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace paddle {
namespace lite {
cl::CommandQueue &CLContext::GetCommandQueue() {
return CLEngine::Global()->command_queue();
}
cl::Context &CLContext::GetContext() { return CLEngine::Global()->context(); }
cl::Program &CLContext::GetProgram(const std::string &file_name,
const std::string &options) {
std::string program_key = file_name;
if (!options.empty()) {
program_key += options;
}
auto it = programs_.find(program_key);
if (it != programs_.end()) {
VLOG(3) << " --- program -> " << program_key << " has been built --- ";
return *(it->second);
}
auto program = CLEngine::Global()->CreateProgram(
GetContext(), CLEngine::Global()->cl_path() + "/cl_kernel/" + file_name);
VLOG(3) << " --- begin build program -> " << program_key << " --- ";
CLEngine::Global()->BuildProgram(program.get(), options);
VLOG(3) << " --- end build program -> " << program_key << " --- ";
programs_[program_key] = std::move(program);
return *(programs_[program_key]);
}
std::unique_ptr<cl::Kernel> CLContext::GetKernel(const std::string &kernel_name,
const std::string &file_name,
const std::string &options) {
cl_int status{CL_SUCCESS};
VLOG(3) << " --- to get program " << file_name << " --- ";
auto program = GetProgram(file_name, options);
VLOG(3) << " --- end get program --- ";
VLOG(3) << " --- to create kernel: " << kernel_name << " --- ";
std::unique_ptr<cl::Kernel> kernel(
new cl::Kernel(program, kernel_name.c_str(), &status));
CL_CHECK_ERRORS(status);
VLOG(3) << " --- end create kernel --- ";
return std::move(kernel);
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <unordered_map>
#include "paddle/fluid/lite/opencl/cl2_header.h"
namespace paddle {
namespace lite {
class CLContext {
public:
cl::CommandQueue &GetCommandQueue();
cl::Context &GetContext();
cl::Program &GetProgram(const std::string &file_name,
const std::string &options);
std::unique_ptr<cl::Kernel> GetKernel(const std::string &kernel_name,
const std::string &file_name,
const std::string &options);
private:
std::unordered_map<std::string, std::unique_ptr<cl::Program>> programs_;
};
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include <glog/logging.h>
#include <string>
#include <utility>
#include <vector>
namespace paddle {
namespace lite {
CLEngine* CLEngine::Global() {
static CLEngine cl_engine_;
cl_engine_.Init();
return &cl_engine_;
}
CLEngine::~CLEngine() {
if (command_queue_ != nullptr) {
command_queue_->finish();
}
// For controlling the destruction order:
command_queue_.reset();
context_.reset();
device_.reset();
platform_.reset();
}
bool CLEngine::Init() {
if (initialized_) {
return true;
}
bool is_platform_init = InitializePlatform();
bool is_device_init = InitializeDevice();
is_init_success_ = is_platform_init && is_device_init;
initialized_ = true;
return initialized_;
}
cl::Platform& CLEngine::platform() {
CHECK(platform_ != nullptr) << "platform_ is not initialized!";
return *platform_;
}
cl::Context& CLEngine::context() {
if (context_ == nullptr) {
context_ = CreateContext();
}
return *context_;
}
cl::Device& CLEngine::device() {
CHECK(device_ != nullptr) << "device_ is not initialized!";
return *device_;
}
cl::CommandQueue& CLEngine::command_queue() {
if (command_queue_ == nullptr) {
command_queue_ = CreateCommandQueue(context());
}
return *command_queue_;
}
std::unique_ptr<cl::Program> CLEngine::CreateProgram(const cl::Context& context,
std::string file_name) {
std::ifstream file{file_name, std::ios::binary | std::ios::ate};
CHECK(file.is_open()) << "Can't open file from " << file_name;
auto size = file.tellg();
CHECK(size > 0) << "size is too small.";
std::string content(size, '\0');
file.seekg(0);
file.read(&content[0], size);
cl::Program::Sources sources;
sources.push_back(content);
auto prog =
std::unique_ptr<cl::Program>(new cl::Program(context, sources, &status_));
LOG(INFO) << "OpenCL kernel file name: " << file_name;
LOG(INFO) << "Program source size: " << content.size();
CL_CHECK_ERRORS(status_);
return std::move(prog);
}
std::unique_ptr<cl::UserEvent> CLEngine::CreateEvent(
const cl::Context& context) {
auto event =
std::unique_ptr<cl::UserEvent>(new cl::UserEvent(context, &status_));
CL_CHECK_ERRORS(status_);
return std::move(event);
}
bool CLEngine::BuildProgram(cl::Program* program, const std::string& options) {
std::string build_option = options + " -cl-fast-relaxed-math -I " +
CLEngine::Global()->cl_path() + "/cl_kernel";
status_ = program->build({*device_}, build_option.c_str());
CL_CHECK_ERRORS(status_);
if (status_ != CL_SUCCESS) {
if (program->getBuildInfo<CL_PROGRAM_BUILD_STATUS>(device()) ==
CL_BUILD_ERROR) {
std::string log = program->getBuildInfo<CL_PROGRAM_BUILD_LOG>(device());
LOG(INFO) << "Program build error: " << log;
}
return false;
}
return true;
}
bool CLEngine::InitializePlatform() {
std::vector<cl::Platform> all_platforms;
status_ = cl::Platform::get(&all_platforms);
CL_CHECK_ERRORS(status_);
if (all_platforms.empty()) {
LOG(ERROR) << "No OpenCL platform found!";
return false;
}
platform_ = std::make_shared<cl::Platform>();
*platform_ = all_platforms[0];
return true;
}
bool CLEngine::InitializeDevice() {
std::vector<cl::Device> all_devices;
status_ = platform_->getDevices(CL_DEVICE_TYPE_GPU, &all_devices);
CL_CHECK_ERRORS(status_);
if (all_devices.empty()) {
LOG(ERROR) << "No OpenCL GPU device found!";
return false;
}
device_ = std::make_shared<cl::Device>();
*device_ = all_devices[0];
auto device_name = device_->getInfo<CL_DEVICE_NAME>();
LOG(INFO) << "Using device: " << device_name;
auto image_support = device_->getInfo<CL_DEVICE_IMAGE_SUPPORT>();
if (image_support) {
LOG(INFO) << "The chosen device supports image processing.";
} else {
LOG(ERROR) << "The chosen device doesn't support image processing!";
return false;
}
auto ext_data = device_->getInfo<CL_DEVICE_EXTENSIONS>();
LOG(INFO) << "The extensions supported by this device: " << ext_data;
if (ext_data.find("cl_khr_fp16") != std::string::npos) {
LOG(INFO) << "The chosen device supports the half data type.";
} else {
LOG(ERROR) << "The chosen device doesn't support the half data type!";
return false;
}
auto max_units = device_->getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
LOG(INFO) << "The chosen device has " << max_units << " compute units.";
auto local_mem = device_->getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
LOG(INFO) << "The local memory size of the chosen device is "
<< static_cast<float>(local_mem) / 1024 << " KB.";
return true;
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <fstream>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace paddle {
namespace lite {
class CLEngine {
public:
static CLEngine* Global();
bool Init();
cl::Platform& platform();
cl::Context& context();
cl::Device& device();
cl::CommandQueue& command_queue();
std::unique_ptr<cl::Program> CreateProgram(const cl::Context& context,
std::string file_name);
std::unique_ptr<cl::UserEvent> CreateEvent(const cl::Context& context);
bool BuildProgram(cl::Program* program, const std::string& options = "");
bool IsInitSuccess() { return is_init_success_; }
std::string cl_path() { return cl_path_; }
void set_cl_path(std::string cl_path) { cl_path_ = cl_path; }
private:
CLEngine() = default;
~CLEngine();
bool InitializePlatform();
bool InitializeDevice();
std::shared_ptr<cl::Context> CreateContext() {
auto context = std::make_shared<cl::Context>(
std::vector<cl::Device>{device()}, nullptr, nullptr, nullptr, &status_);
CL_CHECK_ERRORS(status_);
return context;
}
std::shared_ptr<cl::CommandQueue> CreateCommandQueue(
const cl::Context& context) {
auto queue =
std::make_shared<cl::CommandQueue>(context, device(), 0, &status_);
CL_CHECK_ERRORS(status_);
return queue;
}
std::string cl_path_;
std::shared_ptr<cl::Platform> platform_{nullptr};
std::shared_ptr<cl::Context> context_{nullptr};
std::shared_ptr<cl::Device> device_{nullptr};
std::shared_ptr<cl::CommandQueue> command_queue_{nullptr};
cl_int status_{CL_SUCCESS};
bool initialized_{false};
bool is_init_success_{false};
};
} // namespace lite
} // namespace paddle
此差异已折叠。
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
namespace paddle {
namespace lite {
typedef uint16_t half_t;
half_t Float2Half(float f);
float Half2Float(half_t h);
void FloatArray2HalfArray(float *f_array, half_t *h_array, int count);
void HalfArray2FloatArray(half_t *h_array, float *f_array, int count);
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include <glog/logging.h>
#include <string>
#include <utility>
#include <vector>
namespace paddle {
namespace lite {
void CLHelper::set_context(CLContext *context) { context_ = context; }
void CLHelper::AddKernel(const std::string &kernel_name,
const std::string &file_name,
const std::string &options) {
CHECK(context_ != nullptr) << "Please use set_context first!";
VLOG(3) << " --- begin to add kernel ---";
auto kernel = context_->GetKernel(kernel_name, file_name, options);
kernels.emplace_back(std::move(kernel));
VLOG(3) << " --- end to add kernel --- ";
}
cl::Kernel &CLHelper::KernelAt(const int index) {
VLOG(3) << " --- kernel count: " << kernels.size() << " --- ";
CHECK(static_cast<size_t>(index) < kernels.size())
<< "The index must be less than the size of kernels.";
CHECK(kernels[index] != nullptr)
<< "The target kernel pointer cannot be null.";
return *(kernels[index]);
}
cl::CommandQueue &CLHelper::OpenCLCommandQueue() {
CHECK(context_ != nullptr) << "Please use set_context first!";
return context_->GetCommandQueue();
}
cl::Context &CLHelper::OpenCLContext() {
CHECK(context_ != nullptr) << "Please use set_context first!";
return context_->GetContext();
}
cl::NDRange CLHelper::DefaultWorkSize(const CLImage &image) {
// n c h w
auto image_dim = image.tensor_dims();
if (image_dim.size() == 4) {
auto n = image_dim[0];
auto h = image_dim[2];
auto w = image_dim[3];
auto image_width = image.ImageWidth();
auto work_size_0 = image_width / w;
auto work_size_1 = w;
auto work_size_2 = n * h;
return cl::NDRange{static_cast<size_t>(work_size_0),
static_cast<size_t>(work_size_1),
static_cast<size_t>(work_size_2)};
} else if (image_dim.size() == 2) {
return cl::NDRange{static_cast<size_t>(1),
static_cast<size_t>(image.ImageWidth()),
static_cast<size_t>(image.ImageHeight())};
} else if (image_dim.size() == 1) {
return cl::NDRange{static_cast<size_t>(1),
static_cast<size_t>(image.ImageWidth()),
static_cast<size_t>(1)};
} else if (image_dim.size() == 3) {
auto c = image_dim[0];
auto h = image_dim[1];
auto w = image_dim[2];
return cl::NDRange{static_cast<size_t>((c + 3) / 4), static_cast<size_t>(w),
static_cast<size_t>(h)};
} else {
LOG(FATAL) << "Not support this dimension, need to be implemented!";
return cl::NDRange{};
}
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
namespace paddle {
namespace lite {
class CLHelper {
public:
CLHelper() = default;
explicit CLHelper(CLContext *context) : context_(context) {}
void set_context(CLContext *context);
void AddKernel(const std::string &kernel_name, const std::string &file_name,
const std::string &options = "");
cl::Kernel &KernelAt(const int index);
cl::CommandQueue &OpenCLCommandQueue();
cl::Context &OpenCLContext();
cl::NDRange DefaultWorkSize(const CLImage &image);
private:
CLContext *context_{nullptr};
std::vector<std::unique_ptr<cl::Kernel>> kernels;
};
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_image.h"
#include <glog/logging.h>
#include <array>
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace paddle {
namespace lite {
std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
int width = cl_image.image_dims_[0];
int height = cl_image.image_dims_[1];
half_t* image_data = new half_t[height * width * 4];
cl::Image* image = cl_image.cl_image();
const std::array<size_t, 3> origin{0, 0, 0};
const std::array<size_t, 3> region{static_cast<size_t>(width),
static_cast<size_t>(height), 1};
cl_int err = CLEngine::Global()->command_queue().enqueueReadImage(
*image, CL_TRUE, origin, region, 0, 0, image_data, nullptr, nullptr);
CL_CHECK_ERRORS(err);
float* tensor_data = new float[cl_image.numel()];
auto* converter = cl_image.image_converter();
converter->ImageToNCHW(image_data, tensor_data, cl_image.image_dims_,
cl_image.tensor_dims_);
int stride = cl_image.numel() / 20;
stride = stride > 0 ? stride : 1;
os << " dims: " << cl_image.tensor_dims_ << "\n";
for (int i = 0; i < cl_image.numel(); i += stride) {
os << tensor_data[i] << " ";
}
delete[] tensor_data;
delete[] image_data;
return os;
}
void CLImage::set_tensor_data(float* tensor_data, const DDim& dim) {
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
auto numel = dim.product();
#else
auto numel = dim.production();
#endif
tensor_data_.reset(new float[numel]);
memcpy(tensor_data_.get(), tensor_data, numel * sizeof(float));
tensor_dims_ = dim;
}
void CLImage::InitCLImage(const cl::Context& context) {
CHECK(tensor_data_ != nullptr) << " Please call "
"set_tensohelper->DefaultWorkSize(out_"
"image)r_data first!";
image_converter_.reset(new CLImageConverterFolder);
InitCLImage(context, image_converter_.get());
}
void CLImage::InitNormalCLImage(const cl::Context& context) {
CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!";
image_converter_.reset(new CLImageConverterNormal);
InitCLImage(context, image_converter_.get());
}
void CLImage::InitNImage(const cl::Context& context) {
CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!";
CHECK(tensor_dims_.size() == 4) << " Tensor dim is not 4.";
image_converter_.reset(new CLImageConverterNWBlock);
InitCLImage(context, image_converter_.get());
}
void CLImage::InitDWImage(const cl::Context& context) {
CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!";
CHECK(tensor_dims_.size() == 4) << " Tensor dim is not 4.";
image_converter_.reset(new CLImageConverterDWBlock);
InitCLImage(context, image_converter_.get());
}
void CLImage::InitEmptyImage(const cl::Context& context, const DDim& dim) {
CHECK(tensor_data_ == nullptr)
<< " Empty image tensor data shouldn't have value";
tensor_dims_ = dim;
image_converter_.reset(new CLImageConverterNormal);
VLOG(3) << " to get image dims ";
image_dims_ = image_converter_->InitImageDimInfoWith(tensor_dims_);
VLOG(3) << " end get image dims " << image_dims_;
InitCLImage(context, image_dims_[0], image_dims_[1], nullptr);
cl_event_ = CLEngine::Global()->CreateEvent(context);
initialized_ = true;
VLOG(3) << " end init cl image ";
}
void CLImage::InitEmptyWithImageDim(const cl::Context& context,
const DDim& image_dims) {
VLOG(3) << " to get image dims ";
image_dims_ = image_dims;
VLOG(3) << " end get image dims " << image_dims_;
InitCLImage(context, image_dims_[0], image_dims_[1], nullptr);
cl_event_ = CLEngine::Global()->CreateEvent(context);
initialized_ = true;
VLOG(3) << " end init cl image";
}
void CLImage::InitCLImage(const cl::Context& context,
CLImageConverterBase* converter) {
CHECK(tensor_data_ != nullptr) << " Please call set_tensor_data first!";
VLOG(3) << " begin init cl image ";
image_dims_ = converter->InitImageDimInfoWith(tensor_dims_);
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
half_t* image_data = new half_t[image_dims_.product() * 4];
#else
half_t* image_data = new half_t[image_dims_.production() * 4];
#endif
VLOG(3) << " convert to image ";
converter->NCHWToImage(tensor_data_.get(), image_data, tensor_dims_);
VLOG(3) << " end convert to image ";
InitCLImage(context, image_dims_[0], image_dims_[1], image_data);
delete[] image_data;
tensor_data_ = nullptr;
cl_event_ = CLEngine::Global()->CreateEvent(context);
initialized_ = true;
VLOG(3) << " end init cl image ";
}
void CLImage::InitCLImage(const cl::Context& context, int width, int height,
void* data) {
cl::ImageFormat img_format(CL_RGBA, CL_HALF_FLOAT);
cl_int err;
cl_image_.reset(new cl::Image2D(
context, CL_MEM_READ_WRITE | (data ? CL_MEM_COPY_HOST_PTR : 0),
img_format, width, height, 0, data, &err));
CL_CHECK_ERRORS(err);
CHECK(err == CL_SUCCESS) << " Create image 2d error.";
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <iostream>
#include <memory>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl2_header.h"
#include "paddle/fluid/lite/opencl/cl_image_converter.h"
namespace paddle {
namespace lite {
class CLImage {
// For debug
friend std::ostream& operator<<(std::ostream& os, const CLImage& image);
public:
CLImage() = default;
/*
* Will not hold input tensor data, memcpy in this method.
* */
void set_tensor_data(float* tensor_data, const DDim& dim);
bool IsInit() { return initialized_; }
/*
* Need call set_tensor_data first.
* Folder when one dim or two dim.
* */
void InitCLImage(const cl::Context& context);
void InitNormalCLImage(const cl::Context& context);
void InitNImage(const cl::Context& context);
void InitDWImage(const cl::Context& context);
void InitEmptyImage(const cl::Context& context, const DDim& dim);
void InitEmptyWithImageDim(const cl::Context& context,
const DDim& image_dims);
cl::Image* cl_image() const { return cl_image_.get(); }
const DDim& image_dims() const { return image_dims_; }
inline size_t ImageWidth() const { return image_dims_[0]; }
inline size_t ImageHeight() const { return image_dims_[1]; }
const DDim& tensor_dims() const { return tensor_dims_; }
/*with_da
* Resize original tensor dim.
* */
inline CLImage& Resize(const DDim& dims) {
tensor_dims_ = dims;
return *this;
}
template <typename T>
T* data() const {
CHECK(!initialized_) << "CL image has initialized, tensor data has been "
"deleted, can't use tensor data!";
return reinterpret_cast<T*>(tensor_data_);
}
/*
* Numel of tensor dim
* */
inline int64_t numel() const {
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
return tensor_dims_.product();
#else
return tensor_dims_.production();
#endif
}
/*
* Original tensor dim
* */
cl::UserEvent& cl_event() const { return *cl_event_; }
CLImageConverterBase* image_converter() const {
return image_converter_.get();
}
private:
void InitCLImage(const cl::Context& context, CLImageConverterBase* converter);
void InitCLImage(const cl::Context& context, int width, int height,
void* data);
bool initialized_ = false;
std::unique_ptr<cl::Image2D> cl_image_{nullptr};
std::unique_ptr<cl::UserEvent> cl_event_{nullptr};
DDim tensor_dims_;
DDim image_dims_;
std::unique_ptr<float> tensor_data_{nullptr};
std::unique_ptr<CLImageConverterBase> image_converter_{nullptr};
};
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_image_converter.h"
#include <glog/logging.h>
#include <vector>
namespace paddle {
namespace lite {
DDim CLImageConverterDefault::InitImageDimInfoWith(const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (size_t j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
}
void CLImageConverterDefault::NCHWToImage(float *nchw, half_t *image,
const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (size_t j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
DDim in_image_dim = InitImageDimInfoWith(tensor_dim);
VLOG(3) << " tensor dim: " << tensor_dim;
VLOG(3) << " image dim: " << in_image_dim;
size_t width = in_image_dim[0];
size_t w_block = width / W;
float *p = nchw;
size_t i0 = 0;
for (size_t n = 0; n < N; n++) {
for (size_t c = 0; c < w_block * 4; c++) {
size_t i1 = i0 + (c / 4) * W;
for (size_t h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (size_t w = 0; w < W; w++) {
if (c < C) {
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image[i2] = Float2Half(*p);
i2 += 4;
p++;
} else {
image[i2] = 0.0;
i2 += 4;
}
}
i1 += width;
}
}
i0 += width * H;
}
}
void CLImageConverterDefault::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (size_t j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
size_t width = image_dim[0];
float *p = tensor;
size_t i0 = 0;
for (size_t n = 0; n < N; n++) {
for (size_t c = 0; c < C; c++) {
size_t i1 = i0 + (c / 4) * W;
for (size_t h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (size_t w = 0; w < W; w++) {
*p = Half2Float(image[i2]);
i2 += 4;
p++;
}
i1 += width;
}
}
i0 += width * H;
}
}
DDim CLImageConverterFolder::InitImageDimInfoWith(const DDim &tensor_dim) {
if (tensor_dim.size() <= 2) {
size_t tdim[2] = {1, 1};
if (tensor_dim.size() == 1) {
tdim[1] = tensor_dim[0];
} else {
tdim[0] = tensor_dim[0];
tdim[1] = tensor_dim[1];
}
size_t width = (tdim[1] + 3) / 4;
size_t height = tdim[0];
width_of_one_block_ = width;
height_of_one_block_ = height;
c_block_ = 1;
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
} else {
size_t new_dims[] = {1, 1, 1, 1};
for (size_t j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
width_of_one_block_ = W;
height_of_one_block_ = H;
c_block_ = width / W;
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
}
}
void CLImageConverterFolder::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {
CHECK(tensor_dim.size() <= 4 && tensor_dim.size() > 0)
<< " Tensor dim is not support!";
if (tensor_dim.size() > 2) {
CLImageConverterDefault default_converter;
default_converter.NCHWToImage(tensor, image, tensor_dim);
} else {
size_t tdim[2] = {1, 1};
if (tensor_dim.size() == 1) {
tdim[1] = tensor_dim[0];
} else {
tdim[0] = tensor_dim[0];
tdim[1] = tensor_dim[1];
}
DDim image_dim = InitImageDimInfoWith(tensor_dim);
size_t width = image_dim[0];
for (size_t h = 0; h < tdim[0]; h++) {
for (size_t w = 0; w < tdim[1]; w++) {
image[(h * width + w / 4) * 4 + (w % 4)] =
Float2Half(tensor[h * tdim[1] + w]);
}
}
}
}
void CLImageConverterFolder::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
if (tensor_dim.size() > 2) {
CLImageConverterDefault default_converter;
default_converter.ImageToNCHW(image, tensor, image_dim, tensor_dim);
} else {
size_t width = image_dim[0];
size_t H = 1, W = 1;
if (tensor_dim.size() == 2) {
H = tensor_dim[0];
W = tensor_dim[1];
} else if (tensor_dim.size() == 1) {
W = tensor_dim[0];
}
float *p = tensor;
for (size_t h = 0; h < H; h++) {
for (size_t w = 0; w < W; w++) {
p[h * W + w] = Half2Float(image[(h * width + w / 4) * 4 + (w % 4)]);
}
}
}
}
DDim CLImageConverterNWBlock::InitImageDimInfoWith(const DDim &tensor_dim) {
CHECK(tensor_dim.size() == 4) << " Tensor dim is not 4.";
size_t N, C, H, W;
N = tensor_dim[0];
C = tensor_dim[1];
H = tensor_dim[2];
W = tensor_dim[3];
size_t width = W * ((N + 3) / 4);
size_t height = C * H;
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
}
void CLImageConverterNWBlock::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {
CHECK(tensor_dim.size() == 4) << " Tensor dim is not 4.";
auto image_dim = InitImageDimInfoWith(tensor_dim);
float *p = tensor;
size_t N = tensor_dim[0];
size_t C = tensor_dim[1];
size_t H = tensor_dim[2];
size_t W = tensor_dim[3];
size_t width = image_dim[0];
size_t height = image_dim[1];
size_t block = image_dim[0] / tensor_dim[3];
for (size_t n = 0; n < block * 4; n++) {
for (size_t c = 0; c < C; c++) {
for (size_t h = 0; h < H; ++h) {
for (size_t w = 0; w < W; ++w) {
size_t index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
w * 4 + n % 4;
if (n < N) {
image[index] = Float2Half(*p);
p++;
} else {
image[index] = 0.0;
}
if (index >= (width * height * 4)) {
LOG(INFO) << " index out of range ";
}
}
}
}
}
VLOG(3) << " init done";
}
void CLImageConverterNWBlock::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
CHECK(tensor_dim.size() == 4) << " Tensor dim is not 4.";
float *p = tensor;
size_t N = tensor_dim[0];
size_t C = tensor_dim[1];
size_t H = tensor_dim[2];
size_t W = tensor_dim[3];
size_t width = image_dim[0];
size_t height = image_dim[1];
for (size_t n = 0; n < N; n++) {
for (size_t c = 0; c < C; c++) {
for (size_t h = 0; h < H; ++h) {
for (size_t w = 0; w < W; ++w) {
size_t index = 4 * c * (width * H) + 4 * h * width + 4 * W * (n / 4) +
w * 4 + n % 4;
*p = Half2Float(image[index]);
p++;
if (index >= (width * height * 4)) {
LOG(INFO) << " index out of range ";
}
}
}
}
}
VLOG(3) << " init done";
}
DDim CLImageConverterDWBlock::InitImageDimInfoWith(const DDim &tensor_dim) {
CHECK(tensor_dim.size() == 4) << " Tensor dim is not 4.";
size_t N, C, H, W;
N = tensor_dim[0];
C = tensor_dim[1];
H = tensor_dim[2];
W = tensor_dim[3];
size_t width = W * ((N + 3) / 4);
size_t height = C * H;
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
}
void CLImageConverterDWBlock::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (size_t j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[1];
C = new_dims[0];
H = new_dims[2];
W = new_dims[3];
DDim in_image_dim = InitImageDimInfoWith(tensor_dim);
VLOG(3) << " tensor dim: " << tensor_dim;
VLOG(3) << " image dim: " << in_image_dim;
size_t width = in_image_dim[0];
size_t w_block = width / W;
float *p = tensor;
size_t i0 = 0;
for (size_t n = 0; n < N; n++) {
for (size_t c = 0; c < w_block * 4; c++) {
size_t i1 = i0 + (c / 4) * W;
for (size_t h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (size_t w = 0; w < W; w++) {
if (c < C) {
// size_t x = (n * width * H + h * width + (c / 4) * W + w) * 4 +
// (c % 4);
image[i2] = Float2Half(*p);
i2 += 4;
p++;
} else {
image[i2] = 0.0;
i2 += 4;
}
}
i1 += width;
}
}
i0 += width * H;
}
}
void CLImageConverterDWBlock::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
CHECK(tensor_dim.size() == 4) << " Tensor dim is not 4.";
float *p = tensor;
size_t N = tensor_dim[1];
size_t C = tensor_dim[0];
size_t H = tensor_dim[2];
size_t W = tensor_dim[3];
size_t width = image_dim[0];
size_t i0 = 0;
for (size_t n = 0; n < N; n++) {
for (size_t c = 0; c < C; c++) {
size_t i1 = i0 + (c / 4) * W;
for (size_t h = 0; h < H; h++) {
size_t i2 = (i1 << 2) + c % 4;
for (size_t w = 0; w < W; w++) {
*p = Half2Float(image[i2]);
i2 += 4;
p++;
}
i1 += width;
}
}
i0 += width * H;
}
}
DDim CLImageConverterNormal::InitImageDimInfoWith(const DDim &tensor_dim) {
size_t new_dims[] = {1, 1, 1, 1};
for (size_t j = 0; j < tensor_dim.size(); ++j) {
new_dims[4 - tensor_dim.size() + j] = tensor_dim[j];
}
size_t N, C, H, W;
N = new_dims[0];
C = new_dims[1];
H = new_dims[2];
W = new_dims[3];
size_t width = W * ((C + 3) / 4);
size_t height = H * N;
width_of_one_block_ = W;
height_of_one_block_ = H;
c_block_ = width / W;
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
}
void CLImageConverterNormal::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {
CHECK(tensor_dim.size() <= 4 && tensor_dim.size() > 0)
<< " Tensor dim is not support!";
CLImageConverterDefault default_converter;
default_converter.NCHWToImage(tensor, image, tensor_dim);
}
void CLImageConverterNormal::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {
CLImageConverterDefault default_converter;
default_converter.ImageToNCHW(image, tensor, image_dim, tensor_dim);
}
DDim CLImageConverterWinoTransWeight::InitImageDimInfoWith(
const DDim &tensor_dim) {
CHECK(tensor_dim.size() == 4) << " Tensor dim is not 4.";
size_t N, C;
N = tensor_dim[0];
C = tensor_dim[1];
size_t width = (C + 3) / 4;
size_t height = N * 16; // N * (wino_blk_size + 2) * (wino_blk_size + 2)
return DDim(
std::vector<DDim::value_type>({static_cast<DDim::value_type>(width),
static_cast<DDim::value_type>(height)}));
}
void CLImageConverterWinoTransWeight::NCHWToImage(float *tensor, half_t *image,
const DDim &tensor_dim) {}
void CLImageConverterWinoTransWeight::ImageToNCHW(half_t *image, float *tensor,
const DDim &image_dim,
const DDim &tensor_dim) {}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_half.h"
namespace paddle {
namespace lite {
class CLImageConverterBase {
public:
virtual ~CLImageConverterBase() {}
virtual void NCHWToImage(float *nchw, half_t *image,
const DDim &tensor_dim) = 0;
virtual void ImageToNCHW(half_t *image, float *nchw, const DDim &image_dim,
const DDim &tensor_dim) = 0;
virtual DDim InitImageDimInfoWith(const DDim &tensor_dim) = 0;
};
class CLImageConverterDefault : public CLImageConverterBase {
public:
DDim InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *nchw, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
};
class CLImageConverterFolder : public CLImageConverterBase {
public:
DDim InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
/*
* width of original tensor
* */
inline size_t WidthOfOneBlock() const { return width_of_one_block_; }
/*
* height of original tensor
* */
inline size_t HeightOfOneBlock() const { return height_of_one_block_; }
int GetCBlock() const { return c_block_; }
private:
int c_block_;
int width_of_one_block_;
int height_of_one_block_;
};
class CLImageConverterNormal : public CLImageConverterBase {
public:
DDim InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
/*
* width of original tensor
* */
inline size_t WidthOfOneBlock() const { return width_of_one_block_; }
/*
* height of original tensor
* */
inline size_t HeightOfOneBlock() const { return height_of_one_block_; }
int GetCBlock() const { return c_block_; }
private:
int c_block_;
int width_of_one_block_;
int height_of_one_block_;
};
class CLImageConverterNWBlock : public CLImageConverterBase {
DDim InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
};
class CLImageConverterDWBlock : public CLImageConverterBase {
DDim InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
};
class CLImageConverterWinoTransWeight : public CLImageConverterBase {
public:
DDim InitImageDimInfoWith(const DDim &tensor_dim);
void NCHWToImage(float *tensor, half_t *image, const DDim &tensor_dim);
void ImageToNCHW(half_t *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim);
};
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
inline half4 activation(half4 in
#ifdef PRELU
,
half4 prelu_alpha
#endif
) {
half4 output;
#ifdef PRELU
output = select(prelu_alpha * in, in, in >= (half4)0.0);
#endif
#ifdef RELU
output = fmax(in, (half4)(0.0f));
#endif
return output;
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void elementwise_add(__global image2d_t input, __global image2d_t bias,__write_only image2d_t outputImage) {
int x = get_global_id(0);
int y = get_global_id(1);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int2 coords;
coords.x = x;
coords.y = y;
half4 in = read_imageh(input, sampler, coords);
half4 biase = read_imageh(bias, sampler, coords);
half4 output = in + biase;
write_imageh(outputImage,coords,output);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define MIN_VALUE -FLT_MAX
__kernel void pool_max(
__private const int in_height, __private const int in_width,
__private const int out_height, __private const int out_width,
__private const int pad_top, __private const int pad_left,
__private const int stride_h, __private const int stride_w,
__private const int ksize_h, __private const int ksize_w,
__read_only image2d_t input, __write_only image2d_t output) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const int out_n = out_nh / out_height;
const int out_h = out_nh % out_height;
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int start_h = out_h * stride_h - pad_top;
int end_h = min(start_h + ksize_h, in_height);
start_h = max(start_h,0);
int start_w = out_w * stride_w - pad_left;
int end_w = min(start_w + ksize_w, in_width);
start_w = max(start_w,0);
const int pos_in_x = out_c * in_width;
const int pos_in_y = out_n * in_height;
half4 max_value = (half4)(MIN_VALUE);
for (int y = start_h; y < end_h; ++y) {
for (int x = start_w; x < end_w; ++x) {
half4 tmp = read_imageh(input, sampler, (int2)(pos_in_x + x, pos_in_y + y));
max_value = max(max_value, tmp);
}
}
const int pos_out_x = mad24(out_c, out_width, out_w);
write_imageh(output, (int2)(pos_out_x, out_nh), max_value);
}
__kernel void pool_avg(
__private const int in_height, __private const int in_width,
__private const int out_height, __private const int out_width,
__private const int pad_top, __private const int pad_left,
__private const int stride_h, __private const int stride_w,
__private const int ksize_h, __private const int ksize_w,
__read_only image2d_t input, __write_only image2d_t output) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const int out_n = out_nh / out_height;
const int out_h = out_nh % out_height;
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int start_h = max(out_h * stride_h - pad_top, 0);
int end_h = min(start_h + ksize_h, in_height);
int start_w = max(out_w * stride_w - pad_left, 0);
int end_w = min(start_w + ksize_w, in_width);
const int pos_in_x = out_c * in_width;
const int pos_in_y = out_n * in_height;
half4 sum = (half4)(0.0f);
int num = 0;
for (int y = start_h; y < end_h; ++y) {
for (int x = start_w; x < end_w; ++x) {
sum += read_imageh(input, sampler, (int2)(pos_in_x + x, pos_in_y + y));
num++;
}
}
half4 avg = sum / num;
const int pos_out_x = mad24(out_c, out_width, out_w);
write_imageh(output, (int2)(pos_out_x, out_nh), avg);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory>
#include <random>
#include <vector>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/opencl/cl_caller.h"
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h"
DEFINE_string(cl_path, "/data/local/tmp/opencl", "The OpenCL kernels path.");
namespace paddle {
namespace lite {
TEST(cl_test, engine_test) {
auto* engine = CLEngine::Global();
CHECK(engine->IsInitSuccess());
engine->set_cl_path(FLAGS_cl_path);
engine->platform();
engine->device();
engine->command_queue();
auto& context = engine->context();
auto program = engine->CreateProgram(
context, engine->cl_path() + "/cl_kernel/" + "elementwise_add_kernel.cl");
auto event = engine->CreateEvent(context);
CHECK(engine->BuildProgram(program.get()));
}
TEST(cl_test, context_test) {
auto* engine = CLEngine::Global();
CHECK(engine->IsInitSuccess());
engine->set_cl_path(FLAGS_cl_path);
CLContext context;
context.GetKernel("pool_max", "pool_kernel.cl", "");
context.GetKernel("elementwise_add", "elementwise_add_kernel.cl", "");
context.GetKernel("elementwise_add", "elementwise_add_kernel.cl", "");
}
TEST(cl_test, kernel_test) {
auto* engine = CLEngine::Global();
CHECK(engine->IsInitSuccess());
engine->set_cl_path(FLAGS_cl_path);
std::unique_ptr<CLContext> context(new CLContext);
// std::unique_ptr<CLHelper> helper(new CLHelper(context.get()));
std::unique_ptr<CLHelper> helper(new CLHelper);
helper->set_context(context.get());
helper->AddKernel("elementwise_add", "elementwise_add_kernel.cl");
helper->AddKernel("pool_max", "pool_kernel.cl");
helper->AddKernel("elementwise_add", "elementwise_add_kernel.cl");
auto kernel = helper->KernelAt(2);
std::unique_ptr<float[]> in_data(new float[1024 * 512]);
for (int i = 0; i < 1024 * 512; i++) {
in_data[i] = 1.f;
}
const DDim in_dim = DDim(std::vector<DDim::value_type>{1024, 512});
CLImage in_image;
in_image.set_tensor_data(in_data.get(), in_dim);
in_image.InitNormalCLImage(helper->OpenCLContext());
LOG(INFO) << in_image;
std::unique_ptr<float[]> bias_data(new float[1024 * 512]);
for (int i = 0; i < 1024 * 512; i++) {
bias_data[i] = 2.f;
}
const DDim bias_dim = DDim(std::vector<DDim::value_type>{1024, 512});
CLImage bias_image;
bias_image.set_tensor_data(bias_data.get(), bias_dim);
bias_image.InitNormalCLImage(helper->OpenCLContext());
LOG(INFO) << bias_image;
CLImage out_image;
const DDim out_dim = DDim(std::vector<DDim::value_type>{1024, 512});
out_image.InitEmptyImage(helper->OpenCLContext(), out_dim);
LOG(INFO) << out_image;
cl_int status;
status = kernel.setArg(0, *in_image.cl_image());
CL_CHECK_ERRORS(status);
status = kernel.setArg(1, *bias_image.cl_image());
CL_CHECK_ERRORS(status);
status = kernel.setArg(2, *out_image.cl_image());
CL_CHECK_ERRORS(status);
// auto global_work_size = helper->DefaultWorkSize(out_image);
size_t width = in_image.ImageWidth();
size_t height = in_image.ImageHeight();
auto global_work_size = cl::NDRange{width, height};
cl::Event event;
status = helper->OpenCLCommandQueue().enqueueNDRangeKernel(
kernel, cl::NullRange, global_work_size, cl::NullRange, nullptr, &event);
CL_CHECK_ERRORS(status);
double start_nanos = event.getProfilingInfo<CL_PROFILING_COMMAND_START>();
double stop_nanos = event.getProfilingInfo<CL_PROFILING_COMMAND_END>();
double elapsed_micros = (stop_nanos - start_nanos) / 1000.0;
LOG(INFO) << "Kernel Run Cost Time: " << elapsed_micros << " us.";
LOG(INFO) << out_image;
}
TEST(cl_test, elementwise_add_test) {
std::default_random_engine engine;
std::uniform_real_distribution<float> dist(-5, 5);
const DDim in_dim = DDim(std::vector<DDim::value_type>{1024, 512});
std::unique_ptr<float[]> in_data(new float[1024 * 512]);
for (int i = 0; i < 1024 * 512; i++) {
in_data[i] = dist(engine);
}
const DDim bias_dim = DDim(std::vector<DDim::value_type>{1024, 512});
std::unique_ptr<float[]> bias_data(new float[1024 * 512]);
for (int i = 0; i < 1024 * 512; i++) {
bias_data[i] = dist(engine);
}
const DDim out_dim = DDim(std::vector<DDim::value_type>{1024, 512});
std::unique_ptr<float[]> out(new float[1024 * 512]);
bool status = InitOpenCLEngine(FLAGS_cl_path);
CHECK(status) << "Fail to initialize OpenCL engine.";
CLContext context;
elementwise_add(&context, in_data.get(), in_dim, bias_data.get(), bias_dim,
out.get(), out_dim);
int stride = 1024 * 512 / 20;
for (int i = 0; i < 1024 * 512; i += stride) {
std::cout << out[i] << " ";
}
std::cout << std::endl;
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_tool.h"
namespace paddle {
namespace lite {
const char *opencl_error_to_str(cl_int error) {
#define CASE_CL_CONSTANT(NAME) \
case NAME: \
return #NAME;
// Suppose that no combinations are possible.
switch (error) {
CASE_CL_CONSTANT(CL_SUCCESS)
CASE_CL_CONSTANT(CL_DEVICE_NOT_FOUND)
CASE_CL_CONSTANT(CL_DEVICE_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_COMPILER_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_MEM_OBJECT_ALLOCATION_FAILURE)
CASE_CL_CONSTANT(CL_OUT_OF_RESOURCES)
CASE_CL_CONSTANT(CL_OUT_OF_HOST_MEMORY)
CASE_CL_CONSTANT(CL_PROFILING_INFO_NOT_AVAILABLE)
CASE_CL_CONSTANT(CL_MEM_COPY_OVERLAP)
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_MISMATCH)
CASE_CL_CONSTANT(CL_IMAGE_FORMAT_NOT_SUPPORTED)
CASE_CL_CONSTANT(CL_BUILD_PROGRAM_FAILURE)
CASE_CL_CONSTANT(CL_MAP_FAILURE)
CASE_CL_CONSTANT(CL_MISALIGNED_SUB_BUFFER_OFFSET)
CASE_CL_CONSTANT(CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST)
CASE_CL_CONSTANT(CL_INVALID_VALUE)
CASE_CL_CONSTANT(CL_INVALID_DEVICE_TYPE)
CASE_CL_CONSTANT(CL_INVALID_PLATFORM)
CASE_CL_CONSTANT(CL_INVALID_DEVICE)
CASE_CL_CONSTANT(CL_INVALID_CONTEXT)
CASE_CL_CONSTANT(CL_INVALID_QUEUE_PROPERTIES)
CASE_CL_CONSTANT(CL_INVALID_COMMAND_QUEUE)
CASE_CL_CONSTANT(CL_INVALID_HOST_PTR)
CASE_CL_CONSTANT(CL_INVALID_MEM_OBJECT)
CASE_CL_CONSTANT(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR)
CASE_CL_CONSTANT(CL_INVALID_IMAGE_SIZE)
CASE_CL_CONSTANT(CL_INVALID_SAMPLER)
CASE_CL_CONSTANT(CL_INVALID_BINARY)
CASE_CL_CONSTANT(CL_INVALID_BUILD_OPTIONS)
CASE_CL_CONSTANT(CL_INVALID_PROGRAM)
CASE_CL_CONSTANT(CL_INVALID_PROGRAM_EXECUTABLE)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_NAME)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_DEFINITION)
CASE_CL_CONSTANT(CL_INVALID_KERNEL)
CASE_CL_CONSTANT(CL_INVALID_ARG_INDEX)
CASE_CL_CONSTANT(CL_INVALID_ARG_VALUE)
CASE_CL_CONSTANT(CL_INVALID_ARG_SIZE)
CASE_CL_CONSTANT(CL_INVALID_KERNEL_ARGS)
CASE_CL_CONSTANT(CL_INVALID_WORK_DIMENSION)
CASE_CL_CONSTANT(CL_INVALID_WORK_GROUP_SIZE)
CASE_CL_CONSTANT(CL_INVALID_WORK_ITEM_SIZE)
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_OFFSET)
CASE_CL_CONSTANT(CL_INVALID_EVENT_WAIT_LIST)
CASE_CL_CONSTANT(CL_INVALID_EVENT)
CASE_CL_CONSTANT(CL_INVALID_OPERATION)
CASE_CL_CONSTANT(CL_INVALID_GL_OBJECT)
CASE_CL_CONSTANT(CL_INVALID_BUFFER_SIZE)
CASE_CL_CONSTANT(CL_INVALID_MIP_LEVEL)
CASE_CL_CONSTANT(CL_INVALID_GLOBAL_WORK_SIZE)
CASE_CL_CONSTANT(CL_INVALID_PROPERTY)
default:
return "UNKNOWN ERROR CODE";
}
#undef CASE_CL_CONSTANT
}
} // namespace lite
} // namespace paddle
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/lite/opencl/cl2_header.h"
namespace paddle {
namespace lite {
const char* opencl_error_to_str(cl_int error);
#define CL_CHECK_ERRORS(ERR) \
if (ERR != CL_SUCCESS) { \
printf( \
"OpenCL error with code %s happened in file %s at line %d. " \
"Exiting.\n", \
opencl_error_to_str(ERR), __FILE__, __LINE__); \
}
} // namespace lite
} // namespace paddle
此差异已折叠。
......@@ -25,6 +25,23 @@ function cmake_x86 {
cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags}
}
function cmake_opencl {
# $1: ARM_TARGET_OS in "android" , "armlinux"
# $2: ARM_TARGET_ARCH_ABI in "arm64-v8a", "armeabi-v7a" ,"armeabi-v7a-hf"
cmake .. \
-DLITE_WITH_OPENCL=ON \
-DWITH_GPU=OFF \
-DWITH_MKL=OFF \
-DWITH_LITE=ON \
-DLITE_WITH_CUDA=OFF \
-DLITE_WITH_X86=OFF \
-DLITE_WITH_ARM=ON \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DWITH_TESTING=ON \
-DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2
}
# This method is only called in CI.
function cmake_x86_for_CI {
prepare_for_codegen # fake an empty __generated_code__.cc to pass cmake.
......@@ -422,6 +439,10 @@ function main {
cmake_x86
shift
;;
cmake_opencl)
cmake_opencl $ARM_OS $ARM_ABI
shift
;;
cmake_cuda)
cmake_cuda
shift
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册