未验证 提交 b92e9f9a 编写于 作者: Y Yuan Shuai 提交者: GitHub

Replace uint16_t with half_t. test=develop (#2996)

上级 e68b36f5
......@@ -131,8 +131,8 @@ lite_cc_test(test_mul_buffer_opencl SRCS mul_buffer_compute_test.cc
ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
#lite_cc_test(test_elementwise_add_buffer_opencl SRCS elementwise_add__buffer_compute_test.cc
# DEPS elementwise_add_opencl op_registry program context
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
# DEPS elementwise_add_opencl op_registry program context
# ARGS --cl_path=${CMAKE_SOURCE_DIR}/lite/backends/opencl)
lite_cc_test(test_io_copy_buffer_opencl SRCS io_copy_buffer_compute_test.cc
DEPS io_copy_opencl op_registry program context
......
......@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
......@@ -43,9 +44,9 @@ class ReluComputeImageDefault : public KernelLite<TARGET(kOpenCL),
void Run() override {
auto& param = *param_.get_mutable<param_t>();
const auto& x_dims = param.X->dims();
auto* x_buf = param.X->data<uint16_t, cl::Image2D>();
auto* x_buf = param.X->data<half_t, cl::Image2D>();
auto image_shape = InitImageDimInfoWith(x_dims);
auto* out_buf = param.Out->mutable_data<uint16_t, cl::Image2D>(
auto* out_buf = param.Out->mutable_data<half_t, cl::Image2D>(
image_shape["width"], image_shape["height"]);
const auto& y_dims = param.Out->dims(); // useless: check dim only
......@@ -111,9 +112,9 @@ class Relu6ComputeImageDefault : public KernelLite<TARGET(kOpenCL),
void Run() override {
auto& param = *param_.get_mutable<param_t>();
const auto& x_dims = param.X->dims();
auto* x_buf = param.X->data<uint16_t, cl::Image2D>();
auto* x_buf = param.X->data<half_t, cl::Image2D>();
auto image_shape = InitImageDimInfoWith(x_dims);
auto* out_buf = param.Out->mutable_data<uint16_t, cl::Image2D>(
auto* out_buf = param.Out->mutable_data<half_t, cl::Image2D>(
image_shape["width"], image_shape["height"]);
const auto& y_dims = param.Out->dims(); // useless: check dim only
auto threshold = param.Relu_clipped_coef;
......@@ -185,14 +186,13 @@ class SigmoidComputeImageDefault
auto& param = *param_.get_mutable<param_t>();
const auto& x_dims = param.X->dims();
auto* x_buf =
param.X->data<uint16_t,
cl::Image2D>(); // use uint16_t represents half float
param.X->data<half_t,
cl::Image2D>(); // use half_t represents half float
auto image_shape = InitImageDimInfoWith(x_dims);
auto* out_buf =
param.Out->mutable_data<uint16_t, cl::Image2D>( // use uint16_t
// represents half float
image_shape["width"],
image_shape["height"]);
auto* out_buf = param.Out->mutable_data<half_t, cl::Image2D>( // use half_t
// represents half float
image_shape["width"],
image_shape["height"]);
const auto& y_dims = param.Out->dims(); // useless: check dim only
auto& context = ctx_->As<OpenCLContext>();
......
......@@ -133,9 +133,9 @@ TEST(relu_image2d_fp16, compute) {
mapped_x[i] = static_cast<int>(i) - x_dim.production() / 2;
mapped_y[i] = static_cast<int>(0);
}
auto *relu_in_data = relu_in.mutable_data<uint16_t, cl::Image2D>(
auto *relu_in_data = relu_in.mutable_data<half_t, cl::Image2D>(
relu_image2d_shape["width"], relu_image2d_shape["height"]);
auto *relu_out_data = relu_out.mutable_data<uint16_t, cl::Image2D>(
auto *relu_out_data = relu_out.mutable_data<half_t, cl::Image2D>(
relu_image2d_shape["width"], relu_image2d_shape["height"]);
// set context and kernel args
......@@ -290,9 +290,9 @@ TEST(relu6_image2d_fp16, compute) {
mapped_x[i] = static_cast<int>(i) - x_dim.production() / 2;
mapped_y[i] = static_cast<int>(0);
}
auto *relu_in_data = relu_in.mutable_data<uint16_t, cl::Image2D>(
auto *relu_in_data = relu_in.mutable_data<half_t, cl::Image2D>(
relu_image2d_shape["width"], relu_image2d_shape["height"]);
auto *relu_out_data = relu_out.mutable_data<uint16_t, cl::Image2D>(
auto *relu_out_data = relu_out.mutable_data<half_t, cl::Image2D>(
relu_image2d_shape["width"], relu_image2d_shape["height"]);
// set context and kernel args
......@@ -447,12 +447,10 @@ TEST(sigmoid_image2d_fp16, compute) {
for (int i = 0; i < x_dim.production(); ++i) {
mapped_x[i] = static_cast<float>(dist(engine));
}
auto *sigmoid_in_data =
sigmoid_in.mutable_data<uint16_t, cl::Image2D>(
sigmoid_image2d_shape["width"],
sigmoid_image2d_shape["height"]);
auto *sigmoid_in_data = sigmoid_in.mutable_data<half_t, cl::Image2D>(
sigmoid_image2d_shape["width"], sigmoid_image2d_shape["height"]);
auto *sigmoid_out_data =
sigmoid_out.mutable_data<uint16_t, cl::Image2D>(
sigmoid_out.mutable_data<half_t, cl::Image2D>(
sigmoid_image2d_shape["width"],
sigmoid_image2d_shape["height"]);
......
......@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
......@@ -80,7 +81,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
auto& param = *param_.get_mutable<param_t>();
const auto& x_dims = param.output->dims();
auto image_shape = InitImageDimInfoWith(x_dims);
auto* out_buf = param.output->mutable_data<uint16_t, cl::Image2D>(
auto* out_buf = param.output->mutable_data<half_t, cl::Image2D>(
image_shape["width"], image_shape["height"]);
const auto& y_dims = param.output->dims(); // useless: check dim only
......@@ -124,8 +125,8 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
printf("this axis: %d does not support \n", axis_);
}
if (inputs.size() == 2) {
auto* x_buf0 = inputs[0]->data<uint16_t, cl::Image2D>();
auto* x_buf1 = inputs[1]->data<uint16_t, cl::Image2D>();
auto* x_buf0 = inputs[0]->data<half_t, cl::Image2D>();
auto* x_buf1 = inputs[1]->data<half_t, cl::Image2D>();
cl_int status = kernel.setArg(arg_idx, *x_buf0);
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, *x_buf1);
......@@ -152,7 +153,7 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
auto start = 0;
for (int i = 0; i < inputs.size(); i++) {
arg_idx = 0;
auto* x_buf = inputs[i]->data<uint16_t, cl::Image2D>();
auto* x_buf = inputs[i]->data<half_t, cl::Image2D>();
cl_int status = kernel.setArg(arg_idx, *x_buf);
CL_CHECK_FATAL(status);
status = kernel.setArg(++arg_idx, *out_buf);
......
......@@ -192,15 +192,15 @@ TEST(concat_image2d, compute) {
mapped_y[i] = static_cast<int>(0);
}
auto *concat_in_data0 =
concat_in0.mutable_data<uint16_t, cl::Image2D>(
concat_in0.mutable_data<half_t, cl::Image2D>(
concat_image2d_shape_in0["width"],
concat_image2d_shape_in0["height"]);
auto *concat_in_data1 =
concat_in1.mutable_data<uint16_t, cl::Image2D>(
concat_in1.mutable_data<half_t, cl::Image2D>(
concat_image2d_shape_in1["width"],
concat_image2d_shape_in1["height"]);
auto *concat_out_data =
concat_out.mutable_data<uint16_t, cl::Image2D>(
concat_out.mutable_data<half_t, cl::Image2D>(
concat_image2d_shape["width"],
concat_image2d_shape["height"]);
......
......@@ -85,10 +85,10 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterNWBlock converter;
const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims);
std::vector<uint16_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
filter_gpu_image_.mutable_data<uint16_t, cl::Image2D>(
filter_gpu_image_.mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::Conv2d1x1;
......@@ -109,10 +109,10 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterNWBlock converter;
const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims);
std::vector<uint16_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
filter_gpu_image_.mutable_data<uint16_t, cl::Image2D>(
filter_gpu_image_.mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
} else if (filter_dims[1] == 1 && x_dims[1] == output_dims[1] &&
kernel_h != 3) {
......@@ -123,10 +123,10 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterNWBlock converter;
const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims);
std::vector<uint16_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
filter_gpu_image_.mutable_data<uint16_t, cl::Image2D>(
filter_gpu_image_.mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::DepthwiseConv2d;
......@@ -137,10 +137,10 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterFolder converter;
const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims);
std::vector<uint16_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
filter_gpu_image_.mutable_data<uint16_t, cl::Image2D>(
filter_gpu_image_.mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::Conv2d3x3;
......@@ -151,10 +151,10 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterFolder converter;
const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims);
std::vector<uint16_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
filter_gpu_image_.mutable_data<uint16_t, cl::Image2D>(
filter_gpu_image_.mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::Conv2d5x5;
......@@ -165,10 +165,10 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterFolder converter;
const DDim& filter_image_dims = converter.InitImageDimInfoWith(filter_dims);
std::vector<uint16_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
std::vector<half_t> filter_image_v(filter_image_dims[0] *
filter_image_dims[1] * 4); // 4 : RGBA
converter.NCHWToImage(filter_cpu, filter_image_v.data(), filter_dims);
this->filter_gpu_image_.mutable_data<uint16_t, cl::Image2D>(
this->filter_gpu_image_.mutable_data<half_t, cl::Image2D>(
filter_image_dims[0], filter_image_dims[1], filter_image_v.data());
impl_ = &ConvImageCompute::Conv2d7x7;
......@@ -200,12 +200,12 @@ void ConvImageCompute::PrepareForRun() {
CLImageConverterFolder bias_converter;
const DDim& bias_image_dims =
bias_converter.InitImageDimInfoWith(param.bias->dims());
std::vector<uint16_t> bias_image_v(bias_image_dims[0] * bias_image_dims[1] *
4);
std::vector<half_t> bias_image_v(bias_image_dims[0] * bias_image_dims[1] *
4);
float* bias_cpu_data = param.bias->mutable_data<float>();
bias_converter.NCHWToImage(
bias_cpu_data, bias_image_v.data(), param.bias->dims());
this->bias_gpu_image_.mutable_data<uint16_t, cl::Image2D>(
this->bias_gpu_image_.mutable_data<half_t, cl::Image2D>(
bias_image_dims[0], bias_image_dims[1], bias_image_v.data());
// convert cpu buffer bias --> gpu image --- end ----
}
......@@ -223,8 +223,8 @@ void ConvImageCompute::Conv2d1x1() {
auto input_dims = param.x->dims();
auto paddings = *param.paddings;
auto strides = param.strides;
auto* input_image = param.x->data<uint16_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<uint16_t, cl::Image2D>();
auto* input_image = param.x->data<half_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<half_t, cl::Image2D>();
auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims();
......@@ -233,7 +233,7 @@ void ConvImageCompute::Conv2d1x1() {
int output_width = output_dims[3];
int output_height = output_dims[2];
auto out_image_shape = InitImageDimInfoWith(output_dims);
auto* out_image = param.output->mutable_data<uint16_t, cl::Image2D>(
auto* out_image = param.output->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
const bool has_bias = param.bias != nullptr;
......@@ -292,7 +292,7 @@ void ConvImageCompute::Conv2d1x1() {
const cl::Buffer* bias_buf = nullptr;
const cl::Image2D* bias_image = nullptr;
if (has_bias) {
bias_image = bias_gpu_image_.data<uint16_t, cl::Image2D>();
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>();
}
auto& context = ctx_->As<OpenCLContext>();
......@@ -373,8 +373,8 @@ void ConvImageCompute::Conv2d3x3() {
auto paddings = *param.paddings;
auto strides = param.strides;
auto* input_image = param.x->data<uint16_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<uint16_t, cl::Image2D>();
auto* input_image = param.x->data<half_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<half_t, cl::Image2D>();
auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims();
......@@ -388,7 +388,7 @@ void ConvImageCompute::Conv2d3x3() {
int filter_height = filter_dims[2];
int filter_channel = filter_dims[1];
auto out_image_shape = InitImageDimInfoWith(output_dims);
auto* out_image = param.output->mutable_data<uint16_t, cl::Image2D>(
auto* out_image = param.output->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
const bool has_bias = param.bias != nullptr;
......@@ -464,7 +464,7 @@ void ConvImageCompute::Conv2d3x3() {
const cl::Image2D* bias_image = nullptr;
if (has_bias) {
bias_image = bias_gpu_image_.data<uint16_t, cl::Image2D>();
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>();
}
auto& context = ctx_->As<OpenCLContext>();
......@@ -549,8 +549,8 @@ void ConvImageCompute::Conv2d5x5() {
auto input_dims = param.x->dims();
auto paddings = *param.paddings;
auto strides = param.strides;
auto* input_image = param.x->data<uint16_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<uint16_t, cl::Image2D>();
auto* input_image = param.x->data<half_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<half_t, cl::Image2D>();
auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims();
......@@ -561,7 +561,7 @@ void ConvImageCompute::Conv2d5x5() {
int filter_width = filter_dims[3];
int filter_height = filter_dims[2];
auto out_image_shape = InitImageDimInfoWith(output_dims);
auto* out_image = param.output->mutable_data<uint16_t, cl::Image2D>(
auto* out_image = param.output->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
const bool has_bias = param.bias != nullptr;
......@@ -619,7 +619,7 @@ void ConvImageCompute::Conv2d5x5() {
const cl::Image2D* bias_image = nullptr;
if (has_bias) {
bias_image = bias_gpu_image_.data<uint16_t, cl::Image2D>();
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>();
}
auto& context = ctx_->As<OpenCLContext>();
......@@ -694,8 +694,8 @@ void ConvImageCompute::Conv2d7x7() {
auto input_dims = param.x->dims();
auto paddings = *param.paddings;
auto strides = param.strides;
auto* input_image = param.x->data<uint16_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<uint16_t, cl::Image2D>();
auto* input_image = param.x->data<half_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<half_t, cl::Image2D>();
auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims();
......@@ -706,7 +706,7 @@ void ConvImageCompute::Conv2d7x7() {
int filter_width = filter_dims[3];
int filter_height = filter_dims[2];
auto out_image_shape = InitImageDimInfoWith(output_dims);
auto* out_image = param.output->mutable_data<uint16_t, cl::Image2D>(
auto* out_image = param.output->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
const bool has_bias = param.bias != nullptr;
......@@ -764,7 +764,7 @@ void ConvImageCompute::Conv2d7x7() {
const cl::Image2D* bias_image = nullptr;
if (has_bias) {
bias_image = bias_gpu_image_.data<uint16_t, cl::Image2D>();
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>();
}
auto& context = ctx_->As<OpenCLContext>();
......@@ -845,17 +845,17 @@ void ConvImageCompute::DepthwiseConv2d3x3s1() {
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
auto* input_img = param.x->data<uint16_t, cl::Image2D>();
auto* filter_img = filter_gpu_image_.data<uint16_t, cl::Image2D>();
auto* input_img = param.x->data<half_t, cl::Image2D>();
auto* filter_img = filter_gpu_image_.data<half_t, cl::Image2D>();
const cl::Image2D* bias_img = nullptr;
if (param.bias) {
bias_img = bias_gpu_image_.data<uint16_t, cl::Image2D>();
bias_img = bias_gpu_image_.data<half_t, cl::Image2D>();
}
auto image_shape = InitImageDimInfoWith(output_dims);
auto* output_img = param.output->mutable_data<uint16_t, cl::Image2D>(
auto* output_img = param.output->mutable_data<half_t, cl::Image2D>(
image_shape["width"], image_shape["height"]);
STL::stringstream kernel_key;
......@@ -926,17 +926,17 @@ void ConvImageCompute::DepthwiseConv2d3x3() {
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
auto* input_img = param.x->data<uint16_t, cl::Image2D>();
auto* filter_img = filter_gpu_image_.data<uint16_t, cl::Image2D>();
auto* input_img = param.x->data<half_t, cl::Image2D>();
auto* filter_img = filter_gpu_image_.data<half_t, cl::Image2D>();
const cl::Image2D* bias_img = nullptr;
if (param.bias) {
bias_img = bias_gpu_image_.data<uint16_t, cl::Image2D>();
bias_img = bias_gpu_image_.data<half_t, cl::Image2D>();
}
auto image_shape = InitImageDimInfoWith(output_dims);
auto* output_img = param.output->mutable_data<uint16_t, cl::Image2D>(
auto* output_img = param.output->mutable_data<half_t, cl::Image2D>(
image_shape["width"], image_shape["height"]);
STL::stringstream kernel_key;
......@@ -1009,8 +1009,8 @@ void ConvImageCompute::DepthwiseConv2d() {
auto input_dims = param.x->dims();
auto paddings = *param.paddings;
auto strides = param.strides;
auto* input_image = param.x->data<uint16_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<uint16_t, cl::Image2D>();
auto* input_image = param.x->data<half_t, cl::Image2D>();
auto* filter_image = filter_gpu_image_.data<half_t, cl::Image2D>();
auto filter_dims = param.filter->dims();
auto output_dims = param.output->dims();
......@@ -1021,7 +1021,7 @@ void ConvImageCompute::DepthwiseConv2d() {
int filter_width = filter_dims[3];
int filter_height = filter_dims[2];
auto out_image_shape = InitImageDimInfoWith(output_dims);
auto* out_image = param.output->mutable_data<uint16_t, cl::Image2D>(
auto* out_image = param.output->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
const bool has_bias = param.bias != nullptr;
......@@ -1080,7 +1080,7 @@ void ConvImageCompute::DepthwiseConv2d() {
const cl::Buffer* bias_buf = nullptr;
const cl::Image2D* bias_image = nullptr;
if (has_bias) {
bias_image = bias_gpu_image_.data<uint16_t, cl::Image2D>();
bias_image = bias_gpu_image_.data<half_t, cl::Image2D>();
}
auto& context = ctx_->As<OpenCLContext>();
......
......@@ -18,6 +18,7 @@
#include <string>
#include <vector>
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/tensor.h"
......
......@@ -284,13 +284,13 @@ TEST(conv2d, compute_image2d_1x1) {
paddle::lite::CLImageConverterDefault default_convertor;
SHADOW_LOG << "set mapped input ...";
std::vector<uint16_t> x_image_v(
std::vector<half_t> x_image_v(
input_image_width * input_image_height * 4); // 4 : RGBA
std::vector<uint16_t> filter_image_v(
std::vector<half_t> filter_image_v(
filter_image_width * filter_image_height * 4); // 4 :RGBA
std::vector<uint16_t> bias_image_v(
std::vector<half_t> bias_image_v(
bias_image_width * bias_image_height * 4); // 4 : RGBA
std::vector<uint16_t> out_image_v(
std::vector<half_t> out_image_v(
out_image_width * out_image_height * 4); // 4 : RGBA
default_convertor.NCHWToImage(
......@@ -301,13 +301,13 @@ TEST(conv2d, compute_image2d_1x1) {
nw_convertor.NCHWToImage(
filter_v.data(), filter_image_v.data(), filter_dim);
auto* input_image2d = input.mutable_data<uint16_t, cl::Image2D>(
auto* input_image2d = input.mutable_data<half_t, cl::Image2D>(
input_image_width, input_image_height, x_image_v.data());
// assign filter as target arm
filter.Assign<float, lite::DDim, TARGET(kARM)>(filter_v.data(),
filter_dim);
// auto* filter_image2d =
// filter.mutable_data<uint16_t, cl::Image2D>(
// filter.mutable_data<half_t, cl::Image2D>(
// filter_image_width,
// filter_image_height,
// filter_image_v.data());
......@@ -357,12 +357,11 @@ TEST(conv2d, compute_image2d_1x1) {
SHADOW_LOG << "kernel launch ...";
kernel->Launch();
SHADOW_LOG << "mutable output ...";
auto* output_image2d =
output.mutable_data<uint16_t, cl::Image2D>(
out_image_width, out_image_height);
auto* output_image2d = output.mutable_data<half_t, cl::Image2D>(
out_image_width, out_image_height);
auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.output->data<uint16_t, cl::Image2D>();
auto* out_ptr = param.output->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_ptr);
if (it != wait_list->end()) {
......@@ -375,14 +374,13 @@ TEST(conv2d, compute_image2d_1x1) {
"cl tensor.";
}
TargetWrapperCL::ImgcpySync(
out_image_v.data(),
output.data<uint16_t, cl::Image2D>(),
out_image_width,
out_image_height,
cl_image2d_row_pitch,
cl_image2d_slice_pitch,
IoDirection::DtoH);
TargetWrapperCL::ImgcpySync(out_image_v.data(),
output.data<half_t, cl::Image2D>(),
out_image_width,
out_image_height,
cl_image2d_row_pitch,
cl_image2d_slice_pitch,
IoDirection::DtoH);
DDim out_image_shape =
default_convertor.InitImageDimInfoWith(output.dims());
......@@ -641,14 +639,14 @@ TEST(conv2d, compute_image2d_3x3) {
paddle::lite::CLImageConverterDefault default_convertor;
SHADOW_LOG << "set mapped input ...";
std::vector<uint16_t> x_image_v(
input_image_width * input_image_height * 4); // 4 :RGBA
std::vector<uint16_t> filter_image_v(
std::vector<half_t> x_image_v(input_image_width *
input_image_height * 4); // 4 :RGBA
std::vector<half_t> filter_image_v(
filter_image_width * filter_image_height * 4); // 4 : RGBA
std::vector<uint16_t> bias_image_v(
std::vector<half_t> bias_image_v(
bias_image_width * bias_image_height * 4); // 4 : RGBA
std::vector<uint16_t> out_image_v(
out_image_width * out_image_height * 4); // 4 :RGBA
std::vector<half_t> out_image_v(out_image_width *
out_image_height * 4); // 4 :RGBA
default_convertor.NCHWToImage(
input_v.data(), x_image_v.data(), input_dim);
......@@ -673,7 +671,7 @@ TEST(conv2d, compute_image2d_3x3) {
for (int i = 0; i < filter_image_v.size(); i++) {
SHADOW_LOG << "(" << i << ")" << filter_image_v[i];
}
auto* input_image2d = input.mutable_data<uint16_t, cl::Image2D>(
auto* input_image2d = input.mutable_data<half_t, cl::Image2D>(
input_image_width, input_image_height, x_image_v.data());
// assign filter as target arm
filter.Assign<float, lite::DDim, TARGET(kARM)>(filter_v.data(),
......@@ -714,11 +712,11 @@ TEST(conv2d, compute_image2d_3x3) {
SHADOW_LOG << "kernel launch ...";
kernel->Launch();
SHADOW_LOG << "mutable output ...";
auto* output_image2d = output.mutable_data<uint16_t, cl::Image2D>(
auto* output_image2d = output.mutable_data<half_t, cl::Image2D>(
out_image_width, out_image_height);
auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.output->data<uint16_t, cl::Image2D>();
auto* out_ptr = param.output->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_ptr);
if (it != wait_list->end()) {
......@@ -732,7 +730,7 @@ TEST(conv2d, compute_image2d_3x3) {
}
TargetWrapperCL::ImgcpySync(out_image_v.data(),
output.data<uint16_t, cl::Image2D>(),
output.data<half_t, cl::Image2D>(),
out_image_width,
out_image_height,
cl_image2d_row_pitch,
......@@ -987,14 +985,14 @@ TEST(conv2d, compute_image2d_5x5) {
paddle::lite::CLImageConverterDefault default_convertor;
SHADOW_LOG << "set mapped input ...";
std::vector<uint16_t> x_image_v(
input_image_width * input_image_height * 4); // 4 :RGBA
std::vector<uint16_t> filter_image_v(
std::vector<half_t> x_image_v(input_image_width *
input_image_height * 4); // 4 :RGBA
std::vector<half_t> filter_image_v(
filter_image_width * filter_image_height * 4); // 4 : RGBA
std::vector<uint16_t> bias_image_v(
std::vector<half_t> bias_image_v(
bias_image_width * bias_image_height * 4); // 4 : RGBA
std::vector<uint16_t> out_image_v(
out_image_width * out_image_height * 4); // 4 :RGBA
std::vector<half_t> out_image_v(out_image_width *
out_image_height * 4); // 4 :RGBA
default_convertor.NCHWToImage(
input_v.data(), x_image_v.data(), input_dim);
......@@ -1019,7 +1017,7 @@ TEST(conv2d, compute_image2d_5x5) {
for (int i = 0; i < filter_image_v.size(); i++) {
SHADOW_LOG << "(" << i << ")" << filter_image_v[i];
}
auto* input_image2d = input.mutable_data<uint16_t, cl::Image2D>(
auto* input_image2d = input.mutable_data<half_t, cl::Image2D>(
input_image_width, input_image_height, x_image_v.data());
// assign filter as target arm
filter.Assign<float, lite::DDim, TARGET(kARM)>(filter_v.data(),
......@@ -1060,11 +1058,11 @@ TEST(conv2d, compute_image2d_5x5) {
SHADOW_LOG << "kernel launch ...";
kernel->Launch();
SHADOW_LOG << "mutable output ...";
auto* output_image2d = output.mutable_data<uint16_t, cl::Image2D>(
auto* output_image2d = output.mutable_data<half_t, cl::Image2D>(
out_image_width, out_image_height);
auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.output->data<uint16_t, cl::Image2D>();
auto* out_ptr = param.output->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_ptr);
if (it != wait_list->end()) {
......@@ -1078,7 +1076,7 @@ TEST(conv2d, compute_image2d_5x5) {
}
TargetWrapperCL::ImgcpySync(out_image_v.data(),
output.data<uint16_t, cl::Image2D>(),
output.data<half_t, cl::Image2D>(),
out_image_width,
out_image_height,
cl_image2d_row_pitch,
......@@ -1325,13 +1323,13 @@ TEST(conv2d, compute_image2d_7x7) {
paddle::lite::CLImageConverterDefault default_convertor;
SHADOW_LOG << "set mapped input ...";
std::vector<uint16_t> x_image_v(
std::vector<half_t> x_image_v(
input_image_width * input_image_height * 4); // 4 : RGBA
std::vector<uint16_t> filter_image_v(
std::vector<half_t> filter_image_v(
filter_image_width * filter_image_height * 4); // 4 : RGBA
std::vector<uint16_t> bias_image_v(
std::vector<half_t> bias_image_v(
bias_image_width * bias_image_height * 4); // 4 : RGBA
std::vector<uint16_t> out_image_v(
std::vector<half_t> out_image_v(
out_image_width * out_image_height * 4); // 4 : RGBA
default_convertor.NCHWToImage(
......@@ -1357,7 +1355,7 @@ TEST(conv2d, compute_image2d_7x7) {
for (int i = 0; i < filter_image_v.size(); i++) {
SHADOW_LOG << "(" << i << ")" << filter_image_v[i];
}
auto* input_image2d = input.mutable_data<uint16_t, cl::Image2D>(
auto* input_image2d = input.mutable_data<half_t, cl::Image2D>(
input_image_width, input_image_height, x_image_v.data());
// assign filter as target arm
......@@ -1399,11 +1397,11 @@ TEST(conv2d, compute_image2d_7x7) {
SHADOW_LOG << "kernel launch ...";
kernel->Launch();
SHADOW_LOG << "mutable output ...";
auto* output_image2d = output.mutable_data<uint16_t, cl::Image2D>(
auto* output_image2d = output.mutable_data<half_t, cl::Image2D>(
out_image_width, out_image_height);
auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.output->data<uint16_t, cl::Image2D>();
auto* out_ptr = param.output->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_ptr);
if (it != wait_list->end()) {
......@@ -1417,7 +1415,7 @@ TEST(conv2d, compute_image2d_7x7) {
}
TargetWrapperCL::ImgcpySync(out_image_v.data(),
output.data<uint16_t, cl::Image2D>(),
output.data<half_t, cl::Image2D>(),
out_image_width,
out_image_height,
cl_image2d_row_pitch,
......
......@@ -253,14 +253,14 @@ TEST(depthwise_conv2d_basic, compute) {
paddle::lite::CLImageConverterDefault default_convertor;
VLOG(4) << "set mapped input ...";
std::vector<uint16_t> x_image_v(input_image_width *
input_image_height * 4); // 4 : RGBA
std::vector<uint16_t> filter_image_v(
std::vector<half_t> x_image_v(input_image_width * input_image_height *
4); // 4 : RGBA
std::vector<half_t> filter_image_v(
filter_image_width * filter_image_height * 4); // 4 : RGBA
std::vector<uint16_t> bias_image_v(
bias_image_width * bias_image_height * 4); // 4 : RGBA
std::vector<uint16_t> out_image_v(out_image_width * out_image_height *
4); // 4 : RGBA
std::vector<half_t> bias_image_v(bias_image_width *
bias_image_height * 4); // 4 : RGBA
std::vector<half_t> out_image_v(out_image_width * out_image_height *
4); // 4 : RGBA
default_convertor.NCHWToImage(
input_v.data(), x_image_v.data(), input_dim);
......@@ -270,9 +270,9 @@ TEST(depthwise_conv2d_basic, compute) {
nw_convertor.NCHWToImage(
filter_v.data(), filter_image_v.data(), filter_dim);
auto* input_image2d = input.mutable_data<uint16_t, cl::Image2D>(
auto* input_image2d = input.mutable_data<half_t, cl::Image2D>(
input_image_width, input_image_height, x_image_v.data());
auto* filter_image2d = filter.mutable_data<uint16_t, cl::Image2D>(
auto* filter_image2d = filter.mutable_data<half_t, cl::Image2D>(
filter_image_width, filter_image_height, filter_image_v.data());
if (bias_flag) {
......@@ -285,7 +285,7 @@ TEST(depthwise_conv2d_basic, compute) {
CLImageConverterFolder folder_convertor;
folder_convertor.NCHWToImage(
bias_v.data(), bias_image_v.data(), bias_dim);
auto* bias_data = bias.mutable_data<uint16_t, cl::Image2D>(
auto* bias_data = bias.mutable_data<half_t, cl::Image2D>(
bias_image_width, bias_image_height, bias_image_v.data());
}
......@@ -301,11 +301,11 @@ TEST(depthwise_conv2d_basic, compute) {
VLOG(4) << "kernel launch ...";
kernel->Launch();
VLOG(4) << "mutable output ...";
auto* output_image2d = output.mutable_data<uint16_t, cl::Image2D>(
auto* output_image2d = output.mutable_data<half_t, cl::Image2D>(
out_image_width, out_image_height);
auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.output->data<uint16_t, cl::Image2D>();
auto* out_ptr = param.output->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_ptr);
if (it != wait_list->end()) {
......@@ -319,7 +319,7 @@ TEST(depthwise_conv2d_basic, compute) {
}
TargetWrapperCL::ImgcpySync(out_image_v.data(),
output.data<uint16_t, cl::Image2D>(),
output.data<half_t, cl::Image2D>(),
out_image_width,
out_image_height,
cl_image2d_row_pitch,
......@@ -434,11 +434,11 @@ TEST(depthwise_conv2d_image2d_fp16, compute) {
default_converter->InitImageDimInfoWith(input.dims());
LOG(INFO) << "input_image_shape = " << input_image_shape[0] << " "
<< input_image_shape[1];
std::vector<uint16_t> input_image_data(input_image_shape.production() *
4); // 4 : RGBA
std::vector<half_t> input_image_data(input_image_shape.production() *
4); // 4 : RGBA
default_converter->NCHWToImage(
input_v.data(), input_image_data.data(), input.dims());
auto* input_image = input.mutable_data<uint16_t, cl::Image2D>(
auto* input_image = input.mutable_data<half_t, cl::Image2D>(
input_image_shape[0], input_image_shape[1], input_image_data.data());
LOG(INFO) << "prepare kernel";
......@@ -447,11 +447,11 @@ TEST(depthwise_conv2d_image2d_fp16, compute) {
DDim filter_image_shape = nw_converter->InitImageDimInfoWith(filter.dims());
LOG(INFO) << "filter_image_shape = " << filter_image_shape[0] << " "
<< filter_image_shape[1];
std::vector<uint16_t> filter_image_data(filter_image_shape.production() *
4); // 4 : RGBA
std::vector<half_t> filter_image_data(filter_image_shape.production() *
4); // 4 : RGBA
nw_converter->NCHWToImage(
filter_v.data(), filter_image_data.data(), filter.dims());
auto* filter_image = filter.mutable_data<uint16_t, cl::Image2D>(
auto* filter_image = filter.mutable_data<half_t, cl::Image2D>(
filter_image_shape[0], filter_image_shape[1], filter_image_data.data());
LOG(INFO) << "launch";
......@@ -460,13 +460,13 @@ TEST(depthwise_conv2d_image2d_fp16, compute) {
default_converter->InitImageDimInfoWith(output.dims());
LOG(INFO) << "output_image_shape = " << output_image_shape[0] << " "
<< output_image_shape[1];
auto* output_image = output.mutable_data<uint16_t, cl::Image2D>(
auto* output_image = output.mutable_data<half_t, cl::Image2D>(
output_image_shape[0], output_image_shape[1]);
kernel->Launch();
auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.output->data<uint16_t, cl::Image2D>();
auto* out_ptr = param.output->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_ptr);
if (it != wait_list->end()) {
VLOG(4) << "--- Find the sync event for the target cl tensor. ---";
......@@ -491,8 +491,7 @@ TEST(depthwise_conv2d_image2d_fp16, compute) {
const size_t cl_image2d_row_pitch{0};
const size_t cl_image2d_slice_pitch{0};
uint16_t* output_image_data =
new uint16_t[output_image_shape.production() * 4];
half_t* output_image_data = new half_t[output_image_shape.production() * 4];
TargetWrapperCL::ImgcpySync(output_image_data,
output_image,
output_image_shape[0],
......
......@@ -78,10 +78,10 @@ void ElementwiseAddImageCompute::Run() {
default_convertor.InitImageDimInfoWith(out->dims()); // w, h
auto y_img_shape = default_convertor.InitImageDimInfoWith(y->dims());
auto* x_img = x->data<uint16_t, cl::Image2D>();
auto* y_img = y->data<uint16_t, cl::Image2D>();
auto* out_img = out->mutable_data<uint16_t, cl::Image2D>(out_img_shape[0],
out_img_shape[1]);
auto* x_img = x->data<half_t, cl::Image2D>();
auto* y_img = y->data<half_t, cl::Image2D>();
auto* out_img = out->mutable_data<half_t, cl::Image2D>(out_img_shape[0],
out_img_shape[1]);
VLOG(4) << "x_img_shape[w,h]:" << x_img_width << " " << x_img_height;
VLOG(4) << "y_img_shape[w,h]:" << y_img_shape[0] << " " << y_img_shape[1];
......
......@@ -15,6 +15,7 @@
#include <memory>
#include <string>
#include "lite/backends/opencl/cl_half.h"
#include "lite/core/kernel.h"
#include "lite/operators/op_params.h"
#include "lite/utils/cp_logging.h"
......
......@@ -158,9 +158,9 @@ TEST(elementwise_add_image, compute) {
auto x_img_shape = default_convertor.InitImageDimInfoWith(x_dim); // w, h
auto x_img_w = x_img_shape[0];
auto x_img_h = x_img_shape[1];
std::vector<uint16_t> x_img_v(x_img_w * x_img_h * 4); // 4: RGBA
std::vector<half_t> x_img_v(x_img_w * x_img_h * 4); // 4: RGBA
default_convertor.NCHWToImage(x_v.data(), x_img_v.data(), x_dim);
eleadd_x.mutable_data<uint16_t, cl::Image2D>(
eleadd_x.mutable_data<half_t, cl::Image2D>(
x_img_w, x_img_h, x_img_v.data());
// y
......@@ -169,10 +169,10 @@ TEST(elementwise_add_image, compute) {
auto y_img_shape = default_convertor.InitImageDimInfoWith(y_dim); // w, h
auto y_img_w = y_img_shape[0];
auto y_img_h = y_img_shape[1];
std::vector<uint16_t> y_img_v(y_img_shape[0] * y_img_shape[1] *
4); // 4: RGBA
std::vector<half_t> y_img_v(y_img_shape[0] * y_img_shape[1] *
4); // 4: RGBA
default_convertor.NCHWToImage(y_v.data(), y_img_v.data(), y_dim);
eleadd_y.mutable_data<uint16_t, cl::Image2D>(
eleadd_y.mutable_data<half_t, cl::Image2D>(
y_img_w, y_img_h, y_img_v.data());
// out
......@@ -180,10 +180,10 @@ TEST(elementwise_add_image, compute) {
default_convertor.InitImageDimInfoWith(out_dim); // w, h
auto out_img_w = out_img_shape[0];
auto out_img_h = out_img_shape[1];
eleadd_out.mutable_data<uint16_t, cl::Image2D>(out_img_w, out_img_h);
eleadd_out.mutable_data<half_t, cl::Image2D>(out_img_w, out_img_h);
std::vector<uint16_t> out_img_v(out_img_w * out_img_h * 4);
fill_data<uint16_t>(
std::vector<half_t> out_img_v(out_img_w * out_img_h * 4);
fill_data<half_t>(
out_img_v.data(), out_img_v.size(), 0); // fill with zero value
std::vector<float> out_v(out_dim.production());
......@@ -235,7 +235,7 @@ TEST(elementwise_add_image, compute) {
const size_t cl_image2d_row_pitch{0};
const size_t cl_image2d_slice_pitch{0};
TargetWrapperCL::ImgcpySync(out_img_v.data(),
eleadd_out.data<uint16_t, cl::Image2D>(),
eleadd_out.data<half_t, cl::Image2D>(),
out_img_w,
out_img_h,
cl_image2d_row_pitch,
......
......@@ -14,6 +14,7 @@
#include <memory>
#include <string>
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_image_converter.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
......@@ -95,10 +96,10 @@ class ElementwiseMulImageCompute
default_convertor.InitImageDimInfoWith(out->dims()); // w, h
auto y_img_shape = default_convertor.InitImageDimInfoWith(y->dims());
auto* x_img = x->data<uint16_t, cl::Image2D>();
auto* y_img = y->data<uint16_t, cl::Image2D>();
auto* out_img = out->mutable_data<uint16_t, cl::Image2D>(out_img_shape[0],
out_img_shape[1]);
auto* x_img = x->data<half_t, cl::Image2D>();
auto* y_img = y->data<half_t, cl::Image2D>();
auto* out_img = out->mutable_data<half_t, cl::Image2D>(out_img_shape[0],
out_img_shape[1]);
VLOG(4) << "x_img_shape[w,h]:" << x_img_width << " " << x_img_height;
VLOG(4) << "y_img_shape[w,h]:" << y_img_shape[0] << " " << y_img_shape[1];
......
......@@ -151,9 +151,9 @@ TEST(elementwise_mul_image, compute) {
auto x_img_shape = default_convertor.InitImageDimInfoWith(x_dim); // w, h
auto x_img_w = x_img_shape[0];
auto x_img_h = x_img_shape[1];
std::vector<uint16_t> x_img_v(x_img_w * x_img_h * 4); // 4: RGBA
std::vector<half_t> x_img_v(x_img_w * x_img_h * 4); // 4: RGBA
default_convertor.NCHWToImage(x_v.data(), x_img_v.data(), x_dim);
elemul_x.mutable_data<uint16_t, cl::Image2D>(
elemul_x.mutable_data<half_t, cl::Image2D>(
x_img_w, x_img_h, x_img_v.data());
// y
......@@ -162,10 +162,10 @@ TEST(elementwise_mul_image, compute) {
auto y_img_shape = default_convertor.InitImageDimInfoWith(y_dim); // w, h
auto y_img_w = y_img_shape[0];
auto y_img_h = y_img_shape[1];
std::vector<uint16_t> y_img_v(y_img_shape[0] * y_img_shape[1] *
4); // 4: RGBA
std::vector<half_t> y_img_v(y_img_shape[0] * y_img_shape[1] *
4); // 4: RGBA
default_convertor.NCHWToImage(y_v.data(), y_img_v.data(), y_dim);
elemul_y.mutable_data<uint16_t, cl::Image2D>(
elemul_y.mutable_data<half_t, cl::Image2D>(
y_img_w, y_img_h, y_img_v.data());
// out
......@@ -173,10 +173,10 @@ TEST(elementwise_mul_image, compute) {
default_convertor.InitImageDimInfoWith(out_dim); // w, h
auto out_img_w = out_img_shape[0];
auto out_img_h = out_img_shape[1];
elemul_out.mutable_data<uint16_t, cl::Image2D>(out_img_w, out_img_h);
elemul_out.mutable_data<half_t, cl::Image2D>(out_img_w, out_img_h);
std::vector<uint16_t> out_img_v(out_img_w * out_img_h * 4);
fill_data<uint16_t>(
std::vector<half_t> out_img_v(out_img_w * out_img_h * 4);
fill_data<half_t>(
out_img_v.data(), out_img_v.size(), 0); // fill with zero value
std::vector<float> out_v(out_dim.production());
......@@ -218,7 +218,7 @@ TEST(elementwise_mul_image, compute) {
const size_t cl_image2d_row_pitch{0};
const size_t cl_image2d_slice_pitch{0};
TargetWrapperCL::ImgcpySync(out_img_v.data(),
elemul_out.data<uint16_t, cl::Image2D>(),
elemul_out.data<half_t, cl::Image2D>(),
out_img_w,
out_img_h,
cl_image2d_row_pitch,
......
......@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/op_registry.h"
#include "lite/kernels/opencl/elementwise_add_image_compute.h"
......
......@@ -15,6 +15,7 @@
#include <memory>
#include <string>
#include "lite/api/paddle_place.h"
#include "lite/backends/opencl/cl_half.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/core/target_wrapper.h"
......@@ -47,7 +48,7 @@ class LayoutComputeBufferChwToImageDefault
auto* x_data = param.x->data<float, cl::Buffer>();
auto x_dims = param.x->dims();
auto image_shape = InitImageDimInfoWith(x_dims);
auto* y_data = param.y->mutable_data<uint16_t, cl::Image2D>(
auto* y_data = param.y->mutable_data<half_t, cl::Image2D>(
image_shape["width"], image_shape["height"]);
auto y_dims = param.y->dims();
......@@ -146,7 +147,7 @@ class LayoutComputeImageDefaultToBufferChw
void Run() override {
auto& param = Param<param_t>();
auto* x_data = param.x->data<uint16_t, cl::Image2D>();
auto* x_data = param.x->data<half_t, cl::Image2D>();
auto x_dims = param.x->dims();
auto* y_data = param.y->mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
auto y_dims = param.y->dims();
......
......@@ -79,7 +79,7 @@ TEST(layout_ImageDefault, compute) {
auto* y_data = y.mutable_data<float, cl::Buffer>(TARGET(kOpenCL));
auto image_shape =
paddle::lite::kernels::opencl::InitImageDimInfoWith(x_dim);
auto* y_image_data = y_image.mutable_data<uint16_t, cl::Image2D>(
auto* y_image_data = y_image.mutable_data<half_t, cl::Image2D>(
image_shape["width"], image_shape["height"]);
auto* mapped_x = static_cast<float*>(TargetWrapperCL::Map(
x_data, 0, sizeof(float) * x_dim.production()));
......
......@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
......@@ -45,14 +46,13 @@ class NearestInterpComputeImageDefault
auto& param = *param_.get_mutable<param_t>();
const auto& x_dims = param.X->dims();
auto* x_buf =
param.X->data<uint16_t,
cl::Image2D>(); // use uint16_t represents half float
param.X->data<half_t,
cl::Image2D>(); // use half_t represents half float
auto image_shape = InitImageDimInfoWith(x_dims);
auto* out_buf =
param.Out->mutable_data<uint16_t, cl::Image2D>( // use uint16_t
// represents half float
image_shape["width"],
image_shape["height"]);
auto* out_buf = param.Out->mutable_data<half_t, cl::Image2D>( // use half_t
// represents half float
image_shape["width"],
image_shape["height"]);
const auto& y_dims = param.Out->dims(); // useless: check dim only
float scale_h = y_dims[2] / x_dims[2];
float scale_w = y_dims[3] / x_dims[3];
......
......@@ -166,11 +166,11 @@ TEST(nearest_interp_image2d, compute) {
mapped_y[i] = static_cast<int>(0);
}
auto *nearest_interp_in_data =
nearest_interp_in.mutable_data<uint16_t, cl::Image2D>(
nearest_interp_in.mutable_data<half_t, cl::Image2D>(
nearest_interp_image2d_shape["width"],
nearest_interp_image2d_shape["height"]);
auto *nearest_interp_out_data =
nearest_interp_out.mutable_data<uint16_t, cl::Image2D>(
nearest_interp_out.mutable_data<half_t, cl::Image2D>(
y_dim[3], y_dim[2]);
// set context and kernel args
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <vector>
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
......@@ -67,13 +68,13 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
auto& context = ctx_->As<OpenCLContext>();
CHECK(context.cl_context() != nullptr);
auto* x_img = param.x->data<uint16_t, cl::Image2D>();
auto* x_img = param.x->data<half_t, cl::Image2D>();
LOG(INFO) << "x_image" << x_img;
auto out_image_shape = InitImageDimInfoWith(out_dims);
LOG(INFO) << "out_image_shape = " << out_image_shape["width"] << " "
<< out_image_shape["height"];
auto* out_img = param.output->mutable_data<uint16_t, cl::Image2D>(
auto* out_img = param.output->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
LOG(INFO) << "out_image" << out_img;
......
......@@ -123,23 +123,22 @@ TEST(pool2d_image2d, compute) {
DDim x_image_shape = default_converter->InitImageDimInfoWith(in_dim);
LOG(INFO) << "x_image_shape = " << x_image_shape[0] << " "
<< x_image_shape[1];
std::vector<uint16_t> x_image_data(x_image_shape.production() *
4); // 4 : RGBA
std::vector<half_t> x_image_data(x_image_shape.production() * 4); // 4 : RGBA
default_converter->NCHWToImage(input_v.data(), x_image_data.data(), in_dim);
auto* x_image = x.mutable_data<uint16_t, cl::Image2D>(
auto* x_image = x.mutable_data<half_t, cl::Image2D>(
x_image_shape[0], x_image_shape[1], x_image_data.data());
LOG(INFO) << "x_image:" << x_image;
DDim out_image_shape = default_converter->InitImageDimInfoWith(out_dim);
LOG(INFO) << "out_image_shape = " << out_image_shape[0] << " "
<< out_image_shape[1];
auto* out_image = out.mutable_data<uint16_t, cl::Image2D>(out_image_shape[0],
out_image_shape[1]);
auto* out_image = out.mutable_data<half_t, cl::Image2D>(out_image_shape[0],
out_image_shape[1]);
LOG(INFO) << "out_image:" << out_image;
kernel->Launch();
auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.output->data<uint16_t, cl::Image2D>();
auto* out_ptr = param.output->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_ptr);
if (it != wait_list->end()) {
VLOG(4) << "--- Find the sync event for the target cl tensor. ---";
......@@ -154,7 +153,7 @@ TEST(pool2d_image2d, compute) {
const size_t cl_image2d_row_pitch{0};
const size_t cl_image2d_slice_pitch{0};
uint16_t* out_image_data = new uint16_t[out_image_shape.production() * 4];
half_t* out_image_data = new half_t[out_image_shape.production() * 4];
TargetWrapperCL::ImgcpySync(out_image_data,
out_image,
out_image_shape[0],
......
......@@ -12,6 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
......@@ -51,7 +52,7 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
const int64_t& input_image_width = input_image_shape.at("width");
const int64_t& input_image_height = input_image_shape.at("height");
const cl::Image2D* const x_image = x->data<uint16_t, cl::Image2D>();
const cl::Image2D* const x_image = x->data<half_t, cl::Image2D>();
const std::vector<int>& shape_vct = param.shape_vct;
Tensor* const output = param.output;
......@@ -60,7 +61,7 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
const std::map<std::string, size_t>& out_image_shape =
InitImageDimInfoWith(out_dims);
cl::Image2D* const out_image = output->mutable_data<uint16_t, cl::Image2D>(
cl::Image2D* const out_image = output->mutable_data<half_t, cl::Image2D>(
out_image_shape.at("width"), out_image_shape.at("height"));
LOG(INFO) << "out_dims= " << out_dims;
......
......@@ -152,13 +152,13 @@ TEST(reshape_opencl, compute) {
}
paddle::lite::CLImageConverterDefault default_convertor;
std::vector<uint16_t> x_image_data(input_image_width * input_image_height *
4); // 4 : RGBA
std::vector<half_t> x_image_data(input_image_width * input_image_height *
4); // 4 : RGBA
LOG(INFO) << "set mapped input ...";
default_convertor.NCHWToImage(input_v_data, x_image_data.data(), input_dim);
auto* input_image = input.mutable_data<uint16_t, cl::Image2D>(
auto* input_image = input.mutable_data<half_t, cl::Image2D>(
input_image_width, input_image_height, x_image_data.data());
LOG(INFO) << "prepare kernel ready";
......@@ -168,7 +168,7 @@ TEST(reshape_opencl, compute) {
DDim out_image_shape = default_converter.InitImageDimInfoWith(output_dim);
LOG(INFO) << "out_image_shape = " << out_image_shape[0] << " "
<< out_image_shape[1];
auto* out_image = output.mutable_data<uint16_t, cl::Image2D>(
auto* out_image = output.mutable_data<half_t, cl::Image2D>(
out_image_shape[0], out_image_shape[1]);
VLOG(4) << "out_dims= " << output_dim;
......@@ -185,7 +185,7 @@ TEST(reshape_opencl, compute) {
kernel->Launch();
auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.output->data<uint16_t, cl::Image2D>();
auto* out_ptr = param.output->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_image);
if (it != wait_list->end()) {
......@@ -196,9 +196,9 @@ TEST(reshape_opencl, compute) {
LOG(FATAL) << "Could not find the sync event for the target cl tensor.";
}
uint16_t* out_image_data = new uint16_t[out_image_shape.production() * 4];
half_t* out_image_data = new half_t[out_image_shape.production() * 4];
TargetWrapperCL::ImgcpySync(out_image_data,
output.data<uint16_t, cl::Image2D>(),
output.data<half_t, cl::Image2D>(),
out_image_shape[0],
out_image_shape[1],
cl_image2d_row_pitch,
......
......@@ -13,6 +13,7 @@
// limitations under the License.
#include <vector>
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
......@@ -43,7 +44,7 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
void Run() override {
const auto& param = *param_.get_mutable<param_t>();
const auto& in_dims = param.x->dims();
auto* x_img = param.x->data<uint16_t, cl::Image2D>();
auto* x_img = param.x->data<half_t, cl::Image2D>();
const float scale = param.scale;
const float bias = param.bias;
......@@ -51,7 +52,7 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
auto out_image_shape = InitImageDimInfoWith(in_dims);
LOG(INFO) << "out_image_shape = " << out_image_shape["width"] << " "
<< out_image_shape["height"];
auto* out_img = param.output->mutable_data<uint16_t, cl::Image2D>(
auto* out_img = param.output->mutable_data<half_t, cl::Image2D>(
out_image_shape["width"], out_image_shape["height"]);
LOG(INFO) << "out_image" << out_img;
......
......@@ -77,19 +77,19 @@ TEST(scale_image2d_fp32, compute) {
CLImageConverterDefault* default_converter = new CLImageConverterDefault();
DDim image_shape = default_converter->InitImageDimInfoWith(in_dim);
LOG(INFO) << "image_shape = " << image_shape[0] << " " << image_shape[1];
std::vector<uint16_t> x_image_data(image_shape.production() * 4); // 4 : RGBA
std::vector<half_t> x_image_data(image_shape.production() * 4); // 4 : RGBA
default_converter->NCHWToImage(input_v.data(), x_image_data.data(), in_dim);
auto* x_image = x.mutable_data<uint16_t, cl::Image2D>(
auto* x_image = x.mutable_data<half_t, cl::Image2D>(
image_shape[0], image_shape[1], x_image_data.data());
LOG(INFO) << "x_image:" << x_image;
auto* out_image =
out.mutable_data<uint16_t, cl::Image2D>(image_shape[0], image_shape[1]);
out.mutable_data<half_t, cl::Image2D>(image_shape[0], image_shape[1]);
LOG(INFO) << "out_image:" << out_image;
kernel->Launch();
auto* wait_list = context->As<OpenCLContext>().cl_wait_list();
auto* out_ptr = param.output->data<uint16_t, cl::Image2D>();
auto* out_ptr = param.output->data<half_t, cl::Image2D>();
auto it = wait_list->find(out_ptr);
if (it != wait_list->end()) {
VLOG(4) << "--- Find the sync event for the target cl tensor. ---";
......@@ -104,7 +104,7 @@ TEST(scale_image2d_fp32, compute) {
const size_t cl_image2d_row_pitch{0};
const size_t cl_image2d_slice_pitch{0};
uint16_t* out_image_data = new uint16_t[image_shape.production() * 4];
half_t* out_image_data = new half_t[image_shape.production() * 4];
TargetWrapperCL::ImgcpySync(out_image_data,
out_image,
image_shape[0],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册