提交 44b09240 编写于 作者: Z ZhenWang

add pool2d opencl support.

上级 b067ce50
...@@ -64,4 +64,5 @@ USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host); ...@@ -64,4 +64,5 @@ USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host);
#ifdef LITE_WITH_OPENCL #ifdef LITE_WITH_OPENCL
USE_LITE_KERNEL(elementwise_add, kOpenCL, kFloat, kNCHW, def); USE_LITE_KERNEL(elementwise_add, kOpenCL, kFloat, kNCHW, def);
USE_LITE_KERNEL(pool2d, kOpenCL, kFloat, kNCHW, def);
#endif #endif
...@@ -31,8 +31,6 @@ class FeedCompute ...@@ -31,8 +31,6 @@ class FeedCompute
VLOG(4) << "col " << param.col; VLOG(4) << "col " << param.col;
const lite::Tensor &feed_item = (*param.feed_list)[0]; const lite::Tensor &feed_item = (*param.feed_list)[0];
param.out->ShareDataWith(feed_item); param.out->ShareDataWith(feed_item);
VLOG(4) << "FEED input " << feed_item << " col " << param.col;
VLOG(4) << "FEED output " << *param.out;
} }
}; };
......
...@@ -5,12 +5,19 @@ endif() ...@@ -5,12 +5,19 @@ endif()
set(cl_kernel_deps op_params_lite cl_caller cl_engine cl_context cl_wrapper) set(cl_kernel_deps op_params_lite cl_caller cl_engine cl_context cl_wrapper)
cc_library(elementwise_add_opencl SRCS elementwise_add_compute.cc DEPS ${cl_kernel_deps}) cc_library(elementwise_add_opencl SRCS elementwise_add_compute.cc DEPS ${cl_kernel_deps})
cc_library(pool_opencl SRCS pool_compute.cc DEPS ${cl_kernel_deps})
lite_cc_test(test_elementwise_add_opencl SRCS elementwise_add_compute_test.cc DEPS elementwise_add_opencl lite_cc_test(test_elementwise_add_opencl SRCS elementwise_add_compute_test.cc DEPS elementwise_add_opencl
op_registry_lite program_lite op_registry_lite program_lite
context_lite context_lite
) )
lite_cc_test(test_pool_opencl SRCS pool_compute_test.cc DEPS pool_opencl
op_registry_lite program_lite
context_lite
)
set(opencl_kernels set(opencl_kernels
elementwise_add_opencl elementwise_add_opencl
CACHE INTERNAL "") pool_opencl
CACHE INTERNAL "opencl_kernels")
...@@ -40,23 +40,23 @@ TEST(elementwise_add, init) { ...@@ -40,23 +40,23 @@ TEST(elementwise_add, init) {
kernel->SetParam(param); kernel->SetParam(param);
kernel->SetContext(std::move(context)); kernel->SetContext(std::move(context));
X.Resize({1, 1, 1, 10}); X.Resize({4, 3, 10, 10});
Y.Resize({1, 1, 1, 10}); Y.Resize({4, 3, 10, 10});
Out.Resize({1, 1, 1, 10}); Out.Resize({4, 3, 10, 10});
auto* x_data = X.mutable_data<float>(); auto* x_data = X.mutable_data<float>();
auto* y_data = Y.mutable_data<float>(); auto* y_data = Y.mutable_data<float>();
auto* out_data = Out.mutable_data<float>(); auto* out_data = Out.mutable_data<float>();
for (int i = 0; i < 10; i++) { for (int i = 0; i < 4 * 3 * 10 * 10; i++) {
x_data[i] = 1.1 * i; x_data[i] = 1.1 * i;
y_data[i] = 2.3 * i; y_data[i] = 2.3 * i;
} }
kernel->Launch(); kernel->Launch();
for (int i = 0; i < 10; i++) { for (int i = 0; i < 4 * 3 * 10 * 10; i++) {
EXPECT_NEAR(out_data[i], 3.4 * i, 1e-6); EXPECT_NEAR(out_data[i], static_cast<float>(3.4 * i), 1e-6);
} }
} }
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <vector>
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/operators/op_params.h"
// NOTE ugly here, hide these.
#include "paddle/fluid/lite/opencl/cl_caller.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace opencl {
class PoolCompute
: public KernelLite<TARGET(kOpenCL), PRECISION(kFloat), DATALAYOUT(kNCHW)> {
public:
using param_t = operators::PoolParam;
void Run() override {
auto& param = *param_.get_mutable<param_t>();
auto& in_dims = param.x->dims();
auto& out_dims = param.output->dims();
const std::string pooling_type = param.pooling_type;
bool global_pooling = param.global_pooling;
std::vector<int>& paddings = param.paddings;
std::vector<int>& strides = param.strides;
std::vector<int>& ksize = param.ksize;
if (global_pooling) {
for (size_t i = 0; i < ksize.size(); ++i) {
paddings[i] = 0;
ksize[i] = static_cast<int>(in_dims[i + 2]);
}
}
auto& context = ctx_->As<OpenClContext>();
CHECK(context.cl_helper() != nullptr);
pool(context.cl_helper(), pooling_type, paddings[0], paddings[1],
strides[0], strides[1], ksize[0], ksize[1],
static_cast<const float*>(param.x->raw_data()), in_dims,
param.output->mutable_data<float>(), out_dims);
}
};
} // namespace opencl
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(pool2d, kOpenCL, kFloat, kNCHW,
paddle::lite::kernels::opencl::PoolCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kHost))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kHost))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <random>
#include "paddle/fluid/lite/core/compatible_tensor.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace paddle {
namespace lite {
void pool_avg(const int padding_height, const int padding_width,
const int stride_height, const int stride_width,
const int ksize_height, const int ksize_width,
const float* input_data, const DDim& in_dim, float* output_data,
const DDim& out_dim) {
const int batch_size = in_dim[0];
const int input_height = in_dim[2];
const int input_width = in_dim[3];
const int output_channels = out_dim[1];
const int output_height = out_dim[2];
const int output_width = out_dim[3];
const size_t input_spatial_size = input_height * input_width;
const size_t output_spatial_size = output_height * output_width;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
int channel = i * output_channels + c;
const float* input_ptr = input_data + channel * input_spatial_size;
float* output_ptr = output_data + channel * output_spatial_size;
for (int ph = 0; ph < output_height; ++ph) {
int hstart = ph * stride_height - padding_height;
int hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
for (int pw = 0; pw < output_width; ++pw) {
int wstart = pw * stride_width - padding_width;
int wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
float val = 0.f;
int count = 0;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
val += input_ptr[h * input_width + w];
++count;
}
}
output_ptr[ph * output_width + pw] =
(count > 0) ? val * (1.f / count) : 0.f;
}
}
}
}
}
TEST(pool2d, init) {
LOG(INFO) << "to get kernel ...";
auto kernels = KernelRegistry::Global().Create(
"pool2d", TARGET(kOpenCL), PRECISION(kFloat), DATALAYOUT(kNCHW));
ASSERT_FALSE(kernels.empty());
auto kernel = std::move(kernels.front());
LOG(INFO) << "get kernel";
lite::Tensor x, out;
operators::PoolParam param;
param.x = &x;
param.output = &out;
param.global_pooling = true;
param.pooling_type = "avg";
param.paddings = std::vector<int>{0, 0};
param.strides = std::vector<int>{1, 1};
param.ksize = std::vector<int>{7, 7};
std::unique_ptr<KernelContext> context(new KernelContext);
context->As<OpenClContext>().InitOnce();
kernel->SetParam(param);
kernel->SetContext(std::move(context));
const DDim in_dim = DDim(std::vector<DDim::value_type>{4, 1024, 7, 7});
const DDim out_dim = DDim(std::vector<DDim::value_type>{4, 1024, 1, 1});
x.Resize(in_dim);
out.Resize(out_dim);
auto* x_data = x.mutable_data<float>();
auto* out_data = out.mutable_data<float>();
std::default_random_engine engine;
std::uniform_real_distribution<float> dist(-5, 5);
for (int i = 0; i < 4 * 1024 * 7 * 7; i++) {
x_data[i] = dist(engine);
}
kernel->Launch();
std::unique_ptr<float[]> out_ref(new float[4 * 1024 * 1 * 1]);
pool_avg(0, 0, 1, 1, 7, 7, x_data, in_dim, out_ref.get(), out_dim);
for (int i = 0; i < 4 * 1024 * 1 * 1; i++) {
EXPECT_NEAR(out_data[i], out_ref[i], 1e-6);
}
}
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(pool2d, kOpenCL, kFloat, kNCHW, def);
...@@ -2,8 +2,8 @@ if (NOT LITE_WITH_OPENCL) ...@@ -2,8 +2,8 @@ if (NOT LITE_WITH_OPENCL)
return() return()
endif() endif()
cc_library(cl_wrapper SRCS cl_wrapper.cc) cc_library(cl_wrapper SRCS cl_wrapper.cxx)
cc_library(cl_tool SRCS cl_tool.cc) cc_library(cl_tool SRCS cl_tool.cc DEPS cl_wrapper)
target_compile_options(cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers) target_compile_options(cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers)
cc_library(cl_engine SRCS cl_engine.cc DEPS cl_tool) cc_library(cl_engine SRCS cl_engine.cc DEPS cl_tool)
cc_library(cl_context SRCS cl_context.cc DEPS cl_engine) cc_library(cl_context SRCS cl_context.cc DEPS cl_engine)
......
...@@ -19,6 +19,7 @@ limitations under the License. */ ...@@ -19,6 +19,7 @@ limitations under the License. */
#include "paddle/fluid/lite/opencl/cl_helper.h" #include "paddle/fluid/lite/opencl/cl_helper.h"
#include "paddle/fluid/lite/opencl/cl_image.h" #include "paddle/fluid/lite/opencl/cl_image.h"
#include "paddle/fluid/lite/opencl/cl_tool.h" #include "paddle/fluid/lite/opencl/cl_tool.h"
#include "paddle/fluid/lite/utils/string.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
...@@ -94,5 +95,62 @@ void elementwise_add(CLHelper* helper, const float* in, const DDim& in_dim, ...@@ -94,5 +95,62 @@ void elementwise_add(CLHelper* helper, const float* in, const DDim& in_dim,
CopyImageData(helper, out_image, out); CopyImageData(helper, out_image, out);
} }
void pool(CLHelper* helper, const std::string pooling_type, const int pad_h,
const int pad_w, const int stride_h, const int stride_w,
const int ksize_h, const int ksize_w, const float* in,
const DDim& in_dim, float* out, const DDim& out_dim) {
auto kernel =
helper->GetKernel(string_format("pool_%s", pooling_type.c_str()));
CLImage in_image;
in_image.set_tensor_data(in, in_dim);
in_image.InitNormalCLImage(helper->OpenCLContext());
VLOG(3) << " --- Inpu image: " << in_image << " --- ";
CLImage out_image;
out_image.InitEmptyImage(helper->OpenCLContext(), out_dim);
auto global_work_size = helper->DefaultWorkSize(out_image);
auto* in_converter =
dynamic_cast<CLImageConverterNormal*>(in_image.image_converter());
auto* out_converter =
dynamic_cast<CLImageConverterNormal*>(out_image.image_converter());
const int in_height = in_converter->HeightOfOneBlock();
const int in_width = in_converter->WidthOfOneBlock();
const int out_height = out_converter->HeightOfOneBlock();
const int out_width = out_converter->WidthOfOneBlock();
cl_int status;
status = kernel.setArg(0, in_height);
CL_CHECK_ERRORS(status);
status = kernel.setArg(1, in_width);
CL_CHECK_ERRORS(status);
status = kernel.setArg(2, out_height);
CL_CHECK_ERRORS(status);
status = kernel.setArg(3, out_width);
CL_CHECK_ERRORS(status);
status = kernel.setArg(4, pad_h);
CL_CHECK_ERRORS(status);
status = kernel.setArg(5, pad_w);
CL_CHECK_ERRORS(status);
status = kernel.setArg(6, stride_h);
CL_CHECK_ERRORS(status);
status = kernel.setArg(7, stride_w);
CL_CHECK_ERRORS(status);
status = kernel.setArg(8, ksize_h);
CL_CHECK_ERRORS(status);
status = kernel.setArg(9, ksize_w);
CL_CHECK_ERRORS(status);
status = kernel.setArg(10, *in_image.cl_image());
CL_CHECK_ERRORS(status);
status = kernel.setArg(11, *out_image.cl_image());
CL_CHECK_ERRORS(status);
status = helper->OpenCLCommandQueue().enqueueNDRangeKernel(
kernel, cl::NullRange, global_work_size, cl::NullRange, nullptr, nullptr);
CL_CHECK_ERRORS(status);
status = helper->OpenCLCommandQueue().finish();
CL_CHECK_ERRORS(status);
VLOG(3) << " --- Out image: " << out_image << " --- ";
CopyImageData(helper, out_image, out);
}
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
...@@ -31,5 +31,10 @@ void elementwise_add(CLHelper* helper, const float* in, const DDim& in_dim, ...@@ -31,5 +31,10 @@ void elementwise_add(CLHelper* helper, const float* in, const DDim& in_dim,
const float* bias, const DDim& bias_dim, float* out, const float* bias, const DDim& bias_dim, float* out,
const DDim& out_dim); const DDim& out_dim);
void pool(CLHelper* helper, const std::string pooling_type, const int pad_h,
const int pad_w, const int stride_h, const int stride_w,
const int ksize_h, const int ksize_w, const float* in,
const DDim& in_dim, float* out, const DDim& out_dim);
} // namespace lite } // namespace lite
} // namespace paddle } // namespace paddle
...@@ -33,18 +33,19 @@ class CLImageConverterBase { ...@@ -33,18 +33,19 @@ class CLImageConverterBase {
class CLImageConverterDefault : public CLImageConverterBase { class CLImageConverterDefault : public CLImageConverterBase {
public: public:
DDim InitImageDimInfoWith(const DDim &tensor_dim); DDim InitImageDimInfoWith(const DDim &tensor_dim) override;
void NCHWToImage(float *nchw, float *image, const DDim &tensor_dim); void NCHWToImage(float *nchw, float *image, const DDim &tensor_dim) override;
void ImageToNCHW(float *image, float *tensor, const DDim &image_dim, void ImageToNCHW(float *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim); const DDim &tensor_dim) override;
}; };
class CLImageConverterFolder : public CLImageConverterBase { class CLImageConverterFolder : public CLImageConverterBase {
public: public:
DDim InitImageDimInfoWith(const DDim &tensor_dim); DDim InitImageDimInfoWith(const DDim &tensor_dim) override;
void NCHWToImage(float *tensor, float *image, const DDim &tensor_dim); void NCHWToImage(float *tensor, float *image,
const DDim &tensor_dim) override;
void ImageToNCHW(float *image, float *tensor, const DDim &image_dim, void ImageToNCHW(float *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim); const DDim &tensor_dim) override;
/* /*
* width of original tensor * width of original tensor
...@@ -66,10 +67,11 @@ class CLImageConverterFolder : public CLImageConverterBase { ...@@ -66,10 +67,11 @@ class CLImageConverterFolder : public CLImageConverterBase {
class CLImageConverterNormal : public CLImageConverterBase { class CLImageConverterNormal : public CLImageConverterBase {
public: public:
DDim InitImageDimInfoWith(const DDim &tensor_dim); DDim InitImageDimInfoWith(const DDim &tensor_dim) override;
void NCHWToImage(float *tensor, float *image, const DDim &tensor_dim); void NCHWToImage(float *tensor, float *image,
const DDim &tensor_dim) override;
void ImageToNCHW(float *image, float *tensor, const DDim &image_dim, void ImageToNCHW(float *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim); const DDim &tensor_dim) override;
/* /*
* width of original tensor * width of original tensor
...@@ -90,24 +92,27 @@ class CLImageConverterNormal : public CLImageConverterBase { ...@@ -90,24 +92,27 @@ class CLImageConverterNormal : public CLImageConverterBase {
}; };
class CLImageConverterNWBlock : public CLImageConverterBase { class CLImageConverterNWBlock : public CLImageConverterBase {
DDim InitImageDimInfoWith(const DDim &tensor_dim); DDim InitImageDimInfoWith(const DDim &tensor_dim) override;
void NCHWToImage(float *tensor, float *image, const DDim &tensor_dim); void NCHWToImage(float *tensor, float *image,
const DDim &tensor_dim) override;
void ImageToNCHW(float *image, float *tensor, const DDim &image_dim, void ImageToNCHW(float *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim); const DDim &tensor_dim) override;
}; };
class CLImageConverterDWBlock : public CLImageConverterBase { class CLImageConverterDWBlock : public CLImageConverterBase {
DDim InitImageDimInfoWith(const DDim &tensor_dim); DDim InitImageDimInfoWith(const DDim &tensor_dim) override;
void NCHWToImage(float *tensor, float *image, const DDim &tensor_dim); void NCHWToImage(float *tensor, float *image,
const DDim &tensor_dim) override;
void ImageToNCHW(float *image, float *tensor, const DDim &image_dim, void ImageToNCHW(float *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim); const DDim &tensor_dim) override;
}; };
class CLImageConverterWinoTransWeight : public CLImageConverterBase { class CLImageConverterWinoTransWeight : public CLImageConverterBase {
public: public:
DDim InitImageDimInfoWith(const DDim &tensor_dim); DDim InitImageDimInfoWith(const DDim &tensor_dim) override;
void NCHWToImage(float *tensor, float *image, const DDim &tensor_dim); void NCHWToImage(float *tensor, float *image,
const DDim &tensor_dim) override;
void ImageToNCHW(float *image, float *tensor, const DDim &image_dim, void ImageToNCHW(float *image, float *tensor, const DDim &image_dim,
const DDim &tensor_dim); const DDim &tensor_dim) override;
}; };
} // namespace lite } // namespace lite
......
...@@ -160,12 +160,11 @@ TEST(cl_test, channel_add_test) { ...@@ -160,12 +160,11 @@ TEST(cl_test, channel_add_test) {
for (int i = 0; i < 4 * 16 * 256 * 512; i += stride) { for (int i = 0; i < 4 * 16 * 256 * 512; i += stride) {
std::cout << out[i] << " "; std::cout << out[i] << " ";
} }
std::cout << std::endl;
for (int i = 0; i < 4 * 16 * 256 * 512; i++) { for (int i = 0; i < 4 * 16 * 256 * 512; i++) {
EXPECT_NEAR(out[i], out_ref[i], 1e-6); EXPECT_NEAR(out[i], out_ref[i], 1e-6);
} }
std::cout << std::endl;
} }
TEST(cl_test, elementwise_add_test) { TEST(cl_test, elementwise_add_test) {
...@@ -205,12 +204,86 @@ TEST(cl_test, elementwise_add_test) { ...@@ -205,12 +204,86 @@ TEST(cl_test, elementwise_add_test) {
for (int i = 0; i < 4 * 16 * 256 * 512; i += stride) { for (int i = 0; i < 4 * 16 * 256 * 512; i += stride) {
std::cout << out[i] << " "; std::cout << out[i] << " ";
} }
std::cout << std::endl;
for (int i = 0; i < 4 * 16 * 256 * 512; i++) { for (int i = 0; i < 4 * 16 * 256 * 512; i++) {
EXPECT_NEAR(out[i], out_ref[i], 1e-6); EXPECT_NEAR(out[i], out_ref[i], 1e-6);
} }
}
std::cout << std::endl; void pool_avg(const int padding_height, const int padding_width,
const int stride_height, const int stride_width,
const int ksize_height, const int ksize_width,
const float* input_data, const DDim& in_dim, float* output_data,
const DDim& out_dim) {
const int batch_size = in_dim[0];
const int input_height = in_dim[2];
const int input_width = in_dim[3];
const int output_channels = out_dim[1];
const int output_height = out_dim[2];
const int output_width = out_dim[3];
const size_t input_spatial_size = input_height * input_width;
const size_t output_spatial_size = output_height * output_width;
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
int channel = i * output_channels + c;
const float* input_ptr = input_data + channel * input_spatial_size;
float* output_ptr = output_data + channel * output_spatial_size;
for (int ph = 0; ph < output_height; ++ph) {
int hstart = ph * stride_height - padding_height;
int hend = std::min(hstart + ksize_height, input_height);
hstart = std::max(hstart, 0);
for (int pw = 0; pw < output_width; ++pw) {
int wstart = pw * stride_width - padding_width;
int wend = std::min(wstart + ksize_width, input_width);
wstart = std::max(wstart, 0);
float val = 0.f;
int count = 0;
for (int h = hstart; h < hend; ++h) {
for (int w = wstart; w < wend; ++w) {
val += input_ptr[h * input_width + w];
++count;
}
}
output_ptr[ph * output_width + pw] =
(count > 0) ? val * (1.f / count) : 0.f;
}
}
}
}
}
TEST(cl_test, pool_test) {
std::default_random_engine engine;
std::uniform_real_distribution<float> dist(-5, 5);
const DDim in_dim = DDim(std::vector<DDim::value_type>{4, 1024, 7, 7});
std::unique_ptr<float[]> in_data(new float[4 * 1024 * 7 * 7]);
for (int i = 0; i < 4 * 1024 * 7 * 7; i++) {
in_data[i] = dist(engine);
}
const DDim out_dim = DDim(std::vector<DDim::value_type>{4, 1024, 1, 1});
std::unique_ptr<float[]> out(new float[4 * 1024 * 1 * 1]);
std::unique_ptr<float[]> out_ref(new float[4 * 1024 * 1 * 1]);
bool status = InitOpenCLEngine(FLAGS_cl_path);
CHECK(status) << "Fail to initialize OpenCL engine.";
std::unique_ptr<CLContext> context(new CLContext);
std::unique_ptr<CLHelper> helper(new CLHelper(context.get()));
helper->AddKernel("pool_max", "pool_kernel.cl");
helper->AddKernel("pool_avg", "pool_kernel.cl");
pool(helper.get(), "avg", 0, 0, 1, 1, 7, 7, in_data.get(), in_dim, out.get(),
out_dim);
pool_avg(0, 0, 1, 1, 7, 7, in_data.get(), in_dim, out_ref.get(), out_dim);
for (int i = 0; i < 4 * 1024 * 1 * 1; i++) {
EXPECT_NEAR(out[i], out_ref[i], 1e-6);
}
} }
} // namespace lite } // namespace lite
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // Copyright 2018 The MACE Authors. All Rights Reserved.
//
Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
You may obtain a copy of the License at // You may obtain a copy of the License at
//
http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
//
Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS, // distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
limitations under the License. */ // limitations under the License.
// This file is borrowed from MACE, and we will refactor it
// in the near future.
#include <dlfcn.h> #include <dlfcn.h>
#include <glog/logging.h> #include <glog/logging.h>
...@@ -157,58 +159,58 @@ class OpenCLLibrary final { ...@@ -157,58 +159,58 @@ class OpenCLLibrary final {
using clGetImageInfoFunc = cl_int (*)(cl_mem, cl_image_info, size_t, void *, using clGetImageInfoFunc = cl_int (*)(cl_mem, cl_image_info, size_t, void *,
size_t *); size_t *);
#define PADDLE_CL_DEFINE_FUNC_PTR(func) func##Func func = nullptr #define CL_DEFINE_FUNC_PTR(func) func##Func func = nullptr
PADDLE_CL_DEFINE_FUNC_PTR(clGetPlatformIDs); CL_DEFINE_FUNC_PTR(clGetPlatformIDs);
PADDLE_CL_DEFINE_FUNC_PTR(clGetPlatformInfo); CL_DEFINE_FUNC_PTR(clGetPlatformInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clBuildProgram); CL_DEFINE_FUNC_PTR(clBuildProgram);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueNDRangeKernel); CL_DEFINE_FUNC_PTR(clEnqueueNDRangeKernel);
PADDLE_CL_DEFINE_FUNC_PTR(clSetKernelArg); CL_DEFINE_FUNC_PTR(clSetKernelArg);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseKernel); CL_DEFINE_FUNC_PTR(clReleaseKernel);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateProgramWithSource); CL_DEFINE_FUNC_PTR(clCreateProgramWithSource);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateBuffer); CL_DEFINE_FUNC_PTR(clCreateBuffer);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateImage); CL_DEFINE_FUNC_PTR(clCreateImage);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateImage2D); CL_DEFINE_FUNC_PTR(clCreateImage2D);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateUserEvent); CL_DEFINE_FUNC_PTR(clCreateUserEvent);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainKernel); CL_DEFINE_FUNC_PTR(clRetainKernel);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateKernel); CL_DEFINE_FUNC_PTR(clCreateKernel);
PADDLE_CL_DEFINE_FUNC_PTR(clGetProgramInfo); CL_DEFINE_FUNC_PTR(clGetProgramInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clFlush); CL_DEFINE_FUNC_PTR(clFlush);
PADDLE_CL_DEFINE_FUNC_PTR(clFinish); CL_DEFINE_FUNC_PTR(clFinish);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseProgram); CL_DEFINE_FUNC_PTR(clReleaseProgram);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainContext); CL_DEFINE_FUNC_PTR(clRetainContext);
PADDLE_CL_DEFINE_FUNC_PTR(clGetContextInfo); CL_DEFINE_FUNC_PTR(clGetContextInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateProgramWithBinary); CL_DEFINE_FUNC_PTR(clCreateProgramWithBinary);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateCommandQueue); CL_DEFINE_FUNC_PTR(clCreateCommandQueue);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateCommandQueueWithProperties); CL_DEFINE_FUNC_PTR(clCreateCommandQueueWithProperties);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseCommandQueue); CL_DEFINE_FUNC_PTR(clReleaseCommandQueue);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueMapBuffer); CL_DEFINE_FUNC_PTR(clEnqueueMapBuffer);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueMapImage); CL_DEFINE_FUNC_PTR(clEnqueueMapImage);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainProgram); CL_DEFINE_FUNC_PTR(clRetainProgram);
PADDLE_CL_DEFINE_FUNC_PTR(clGetProgramBuildInfo); CL_DEFINE_FUNC_PTR(clGetProgramBuildInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueReadBuffer); CL_DEFINE_FUNC_PTR(clEnqueueReadBuffer);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueReadImage); CL_DEFINE_FUNC_PTR(clEnqueueReadImage);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueWriteBuffer); CL_DEFINE_FUNC_PTR(clEnqueueWriteBuffer);
PADDLE_CL_DEFINE_FUNC_PTR(clWaitForEvents); CL_DEFINE_FUNC_PTR(clWaitForEvents);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseEvent); CL_DEFINE_FUNC_PTR(clReleaseEvent);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateContext); CL_DEFINE_FUNC_PTR(clCreateContext);
PADDLE_CL_DEFINE_FUNC_PTR(clCreateContextFromType); CL_DEFINE_FUNC_PTR(clCreateContextFromType);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseContext); CL_DEFINE_FUNC_PTR(clReleaseContext);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainCommandQueue); CL_DEFINE_FUNC_PTR(clRetainCommandQueue);
PADDLE_CL_DEFINE_FUNC_PTR(clEnqueueUnmapMemObject); CL_DEFINE_FUNC_PTR(clEnqueueUnmapMemObject);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainMemObject); CL_DEFINE_FUNC_PTR(clRetainMemObject);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseMemObject); CL_DEFINE_FUNC_PTR(clReleaseMemObject);
PADDLE_CL_DEFINE_FUNC_PTR(clGetDeviceInfo); CL_DEFINE_FUNC_PTR(clGetDeviceInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clGetDeviceIDs); CL_DEFINE_FUNC_PTR(clGetDeviceIDs);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainDevice); CL_DEFINE_FUNC_PTR(clRetainDevice);
PADDLE_CL_DEFINE_FUNC_PTR(clReleaseDevice); CL_DEFINE_FUNC_PTR(clReleaseDevice);
PADDLE_CL_DEFINE_FUNC_PTR(clRetainEvent); CL_DEFINE_FUNC_PTR(clRetainEvent);
PADDLE_CL_DEFINE_FUNC_PTR(clGetKernelWorkGroupInfo); CL_DEFINE_FUNC_PTR(clGetKernelWorkGroupInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clGetEventInfo); CL_DEFINE_FUNC_PTR(clGetEventInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clGetEventProfilingInfo); CL_DEFINE_FUNC_PTR(clGetEventProfilingInfo);
PADDLE_CL_DEFINE_FUNC_PTR(clGetImageInfo); CL_DEFINE_FUNC_PTR(clGetImageInfo);
#undef PADDLE_CL_DEFINE_FUNC_PTR #undef CL_DEFINE_FUNC_PTR
private: private:
void *handle_ = nullptr; void *handle_ = nullptr;
...@@ -285,7 +287,7 @@ void *OpenCLLibrary::LoadFromPath(const std::string &path) { ...@@ -285,7 +287,7 @@ void *OpenCLLibrary::LoadFromPath(const std::string &path) {
return nullptr; return nullptr;
} }
#define PADDLE_CL_ASSIGN_FROM_DLSYM(func) \ #define CL_ASSIGN_FROM_DLSYM(func) \
do { \ do { \
void *ptr = dlsym(handle, #func); \ void *ptr = dlsym(handle, #func); \
if (ptr == nullptr) { \ if (ptr == nullptr) { \
...@@ -296,56 +298,56 @@ void *OpenCLLibrary::LoadFromPath(const std::string &path) { ...@@ -296,56 +298,56 @@ void *OpenCLLibrary::LoadFromPath(const std::string &path) {
VLOG(3) << "Loaded " << #func << " from " << path; \ VLOG(3) << "Loaded " << #func << " from " << path; \
} while (false) } while (false)
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetPlatformIDs); CL_ASSIGN_FROM_DLSYM(clGetPlatformIDs);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetPlatformInfo); CL_ASSIGN_FROM_DLSYM(clGetPlatformInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clBuildProgram); CL_ASSIGN_FROM_DLSYM(clBuildProgram);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueNDRangeKernel); CL_ASSIGN_FROM_DLSYM(clEnqueueNDRangeKernel);
PADDLE_CL_ASSIGN_FROM_DLSYM(clSetKernelArg); CL_ASSIGN_FROM_DLSYM(clSetKernelArg);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseKernel); CL_ASSIGN_FROM_DLSYM(clReleaseKernel);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateProgramWithSource); CL_ASSIGN_FROM_DLSYM(clCreateProgramWithSource);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateBuffer); CL_ASSIGN_FROM_DLSYM(clCreateBuffer);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateImage); CL_ASSIGN_FROM_DLSYM(clCreateImage);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateImage2D); CL_ASSIGN_FROM_DLSYM(clCreateImage2D);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateUserEvent); CL_ASSIGN_FROM_DLSYM(clCreateUserEvent);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainKernel); CL_ASSIGN_FROM_DLSYM(clRetainKernel);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateKernel); CL_ASSIGN_FROM_DLSYM(clCreateKernel);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetProgramInfo); CL_ASSIGN_FROM_DLSYM(clGetProgramInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clFlush); CL_ASSIGN_FROM_DLSYM(clFlush);
PADDLE_CL_ASSIGN_FROM_DLSYM(clFinish); CL_ASSIGN_FROM_DLSYM(clFinish);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseProgram); CL_ASSIGN_FROM_DLSYM(clReleaseProgram);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainContext); CL_ASSIGN_FROM_DLSYM(clRetainContext);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetContextInfo); CL_ASSIGN_FROM_DLSYM(clGetContextInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateProgramWithBinary); CL_ASSIGN_FROM_DLSYM(clCreateProgramWithBinary);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateCommandQueue); CL_ASSIGN_FROM_DLSYM(clCreateCommandQueue);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateCommandQueueWithProperties); CL_ASSIGN_FROM_DLSYM(clCreateCommandQueueWithProperties);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseCommandQueue); CL_ASSIGN_FROM_DLSYM(clReleaseCommandQueue);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueMapBuffer); CL_ASSIGN_FROM_DLSYM(clEnqueueMapBuffer);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueMapImage); CL_ASSIGN_FROM_DLSYM(clEnqueueMapImage);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainProgram); CL_ASSIGN_FROM_DLSYM(clRetainProgram);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetProgramBuildInfo); CL_ASSIGN_FROM_DLSYM(clGetProgramBuildInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueReadBuffer); CL_ASSIGN_FROM_DLSYM(clEnqueueReadBuffer);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueReadImage); CL_ASSIGN_FROM_DLSYM(clEnqueueReadImage);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueWriteBuffer); CL_ASSIGN_FROM_DLSYM(clEnqueueWriteBuffer);
PADDLE_CL_ASSIGN_FROM_DLSYM(clWaitForEvents); CL_ASSIGN_FROM_DLSYM(clWaitForEvents);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseEvent); CL_ASSIGN_FROM_DLSYM(clReleaseEvent);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateContext); CL_ASSIGN_FROM_DLSYM(clCreateContext);
PADDLE_CL_ASSIGN_FROM_DLSYM(clCreateContextFromType); CL_ASSIGN_FROM_DLSYM(clCreateContextFromType);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseContext); CL_ASSIGN_FROM_DLSYM(clReleaseContext);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainCommandQueue); CL_ASSIGN_FROM_DLSYM(clRetainCommandQueue);
PADDLE_CL_ASSIGN_FROM_DLSYM(clEnqueueUnmapMemObject); CL_ASSIGN_FROM_DLSYM(clEnqueueUnmapMemObject);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainMemObject); CL_ASSIGN_FROM_DLSYM(clRetainMemObject);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseMemObject); CL_ASSIGN_FROM_DLSYM(clReleaseMemObject);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetDeviceInfo); CL_ASSIGN_FROM_DLSYM(clGetDeviceInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetDeviceIDs); CL_ASSIGN_FROM_DLSYM(clGetDeviceIDs);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainDevice); CL_ASSIGN_FROM_DLSYM(clRetainDevice);
PADDLE_CL_ASSIGN_FROM_DLSYM(clReleaseDevice); CL_ASSIGN_FROM_DLSYM(clReleaseDevice);
PADDLE_CL_ASSIGN_FROM_DLSYM(clRetainEvent); CL_ASSIGN_FROM_DLSYM(clRetainEvent);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetKernelWorkGroupInfo); CL_ASSIGN_FROM_DLSYM(clGetKernelWorkGroupInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetEventInfo); CL_ASSIGN_FROM_DLSYM(clGetEventInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetEventProfilingInfo); CL_ASSIGN_FROM_DLSYM(clGetEventProfilingInfo);
PADDLE_CL_ASSIGN_FROM_DLSYM(clGetImageInfo); CL_ASSIGN_FROM_DLSYM(clGetImageInfo);
#undef PADDLE_CL_ASSIGN_FROM_DLSYM #undef CL_ASSIGN_FROM_DLSYM
return handle; return handle;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册