提交 5d5d06c2 编写于 作者: L liuqi

Fix buffer to image bug for half type and refactor some cl apis.

上级 3272a989
...@@ -17,7 +17,7 @@ static void Add2(const Tensor *input0, const Tensor *input1, Tensor *output) { ...@@ -17,7 +17,7 @@ static void Add2(const Tensor *input0, const Tensor *input1, Tensor *output) {
auto runtime = OpenCLRuntime::Get(); auto runtime = OpenCLRuntime::Get();
std::set<std::string> built_options; std::set<std::string> built_options;
built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(output->dtype())); built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(output->dtype()));
auto addn_kernel = runtime->BuildKernel("addn", "add2", built_options); auto addn_kernel = runtime->BuildKernel("addn", "add2", built_options);
const uint32_t lws = runtime->GetKernelMaxWorkGroupSize(addn_kernel); const uint32_t lws = runtime->GetKernelMaxWorkGroupSize(addn_kernel);
......
...@@ -30,7 +30,7 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()( ...@@ -30,7 +30,7 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
auto runtime = OpenCLRuntime::Get(); auto runtime = OpenCLRuntime::Get();
std::set<std::string> built_options; std::set<std::string> built_options;
built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype())); built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
auto bm_kernel = runtime->BuildKernel("batch_norm", "batch_norm", built_options); auto bm_kernel = runtime->BuildKernel("batch_norm", "batch_norm", built_options);
const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(bm_kernel); const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(bm_kernel);
......
...@@ -24,8 +24,13 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer, ...@@ -24,8 +24,13 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
} }
std::set<std::string> built_options; std::set<std::string> built_options;
built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(DataTypeToEnum<T>::value)); if (buffer->dtype() == image->dtype()) {
built_options.emplace("-DCMD_DATA_TYPE=" + DataTypeToOPENCLCMDDataType(DataTypeToEnum<T>::value)); built_options.emplace("-DDATA_TYPE=" + DtToCLDt(DataTypeToEnum<T>::value));
built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(DataTypeToEnum<T>::value));
} else {
built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(DataTypeToEnum<T>::value));
built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(DataTypeToEnum<T>::value));
}
auto runtime = OpenCLRuntime::Get(); auto runtime = OpenCLRuntime::Get();
string kernel_name; string kernel_name;
switch (type) { switch (type) {
......
...@@ -34,8 +34,8 @@ void Conv1x1(const Tensor *input, ...@@ -34,8 +34,8 @@ void Conv1x1(const Tensor *input,
MACE_CHECK(input_batch == batch); MACE_CHECK(input_batch == batch);
std::set<std::string> built_options; std::set<std::string> built_options;
built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(dt)); built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
built_options.emplace("-DCMD_DATA_TYPE=" + DataTypeToOPENCLCMDDataType(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
built_options.emplace("-DSTRIDE=" + ToString(stride)); built_options.emplace("-DSTRIDE=" + ToString(stride));
if (bias != nullptr) { if (bias != nullptr) {
built_options.emplace("-DBIAS"); built_options.emplace("-DBIAS");
......
...@@ -26,8 +26,8 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter, ...@@ -26,8 +26,8 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter,
const index_t width_blocks = RoundUpDiv<index_t, 5>(width); const index_t width_blocks = RoundUpDiv<index_t, 5>(width);
std::set<std::string> built_options; std::set<std::string> built_options;
built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(dt)); built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
built_options.emplace("-DCMD_DATA_TYPE=" + DataTypeToOPENCLCMDDataType(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
built_options.emplace(bias != nullptr ? "-DBIAS" : ""); built_options.emplace(bias != nullptr ? "-DBIAS" : "");
built_options.emplace("-DSTRIDE=" + ToString(stride)); built_options.emplace("-DSTRIDE=" + ToString(stride));
if (fused_relu) { if (fused_relu) {
......
...@@ -32,7 +32,7 @@ static void InnerDepthwiseConvOpenclK3x3S12(const Tensor *input, ...@@ -32,7 +32,7 @@ static void InnerDepthwiseConvOpenclK3x3S12(const Tensor *input,
auto runtime = OpenCLRuntime::Get(); auto runtime = OpenCLRuntime::Get();
std::set<std::string> built_options; std::set<std::string> built_options;
built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype())); built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
built_options.emplace(stride == 1 ? "-DSTRIDE_1" : ""); built_options.emplace(stride == 1 ? "-DSTRIDE_1" : "");
built_options.emplace(bias != nullptr ? "-DBIAS" : ""); built_options.emplace(bias != nullptr ? "-DBIAS" : "");
auto conv_kernel = runtime->BuildKernel("depthwise_conv_3x3", "depthwise_conv_3x3", built_options); auto conv_kernel = runtime->BuildKernel("depthwise_conv_3x3", "depthwise_conv_3x3", built_options);
......
...@@ -54,34 +54,42 @@ void CalImage2DShape(const std::vector<index_t> &shape, /* NHWC */ ...@@ -54,34 +54,42 @@ void CalImage2DShape(const std::vector<index_t> &shape, /* NHWC */
} }
std::string DataTypeToCLType(const DataType dt) { std::string DtToCLDt(const DataType dt) {
switch (dt) {
case DT_FLOAT:
return "float";
case DT_HALF:
return "half";
default:
LOG(FATAL) << "Unsupported data type";
return "";
}
}
std::string DtToCLCMDDt(const DataType dt) {
switch (dt) {
case DT_FLOAT:
return "f";
case DT_HALF:
return "h";
default:
LOG(FATAL) << "Not supported data type for opencl cmd data type";
return "";
}
}
std::string DtToUpstreamCLDt(const DataType dt) {
switch (dt) { switch (dt) {
case DT_FLOAT: case DT_FLOAT:
case DT_HALF: case DT_HALF:
return "float"; return "float";
case DT_UINT8:
return "uchar";
case DT_INT8:
return "char";
case DT_DOUBLE:
return "double";
case DT_INT32:
return "int";
case DT_UINT32:
return "int";
case DT_UINT16:
return "ushort";
case DT_INT16:
return "short";
case DT_INT64:
return "long";
default: default:
LOG(FATAL) << "Unsupported data type"; LOG(FATAL) << "Unsupported data type";
return ""; return "";
} }
} }
std::string DataTypeToOPENCLCMDDataType(const DataType dt) { std::string DtToUpstreamCLCMDDt(const DataType dt) {
switch (dt) { switch (dt) {
case DT_FLOAT: case DT_FLOAT:
case DT_HALF: case DT_HALF:
......
...@@ -19,10 +19,13 @@ void CalImage2DShape(const std::vector<index_t> &shape, /* NHWC */ ...@@ -19,10 +19,13 @@ void CalImage2DShape(const std::vector<index_t> &shape, /* NHWC */
const BufferType type, const BufferType type,
std::vector<size_t> &image_shape); std::vector<size_t> &image_shape);
std::string DataTypeToOPENCLCMDDataType(const DataType dt); std::string DtToCLCMDDt(const DataType dt);
std::string DataTypeToCLType(const DataType dt); std::string DtToUpstreamCLCMDDt(const DataType dt);
std::string DtToCLDt(const DataType dt);
std::string DtToUpstreamCLDt(const DataType dt);
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
......
...@@ -32,7 +32,7 @@ static void Pooling3(const Tensor *input, ...@@ -32,7 +32,7 @@ static void Pooling3(const Tensor *input,
auto runtime = OpenCLRuntime::Get(); auto runtime = OpenCLRuntime::Get();
std::set<std::string> built_options; std::set<std::string> built_options;
built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype())); built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
built_options.emplace(stride[0] == 1 ? "-DSTRIDE_1" : ""); built_options.emplace(stride[0] == 1 ? "-DSTRIDE_1" : "");
auto pooling_kernel = runtime->BuildKernel("pooling", "pooling3", built_options); auto pooling_kernel = runtime->BuildKernel("pooling", "pooling3", built_options);
...@@ -80,7 +80,7 @@ static void PoolingN(const Tensor *input, ...@@ -80,7 +80,7 @@ static void PoolingN(const Tensor *input,
auto runtime = OpenCLRuntime::Get(); auto runtime = OpenCLRuntime::Get();
std::set<std::string> built_options; std::set<std::string> built_options;
built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype())); built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
auto pooling_kernel = runtime->BuildKernel("pooling", "poolingn", built_options); auto pooling_kernel = runtime->BuildKernel("pooling", "poolingn", built_options);
const uint32_t lws[3] = {1, 8, 128}; const uint32_t lws[3] = {1, 8, 128};
......
...@@ -23,7 +23,7 @@ void ReluFunctor<DeviceType::OPENCL, float>::operator()(const Tensor *input, ...@@ -23,7 +23,7 @@ void ReluFunctor<DeviceType::OPENCL, float>::operator()(const Tensor *input,
auto program = runtime->program(); auto program = runtime->program();
std::set<std::string> built_options; std::set<std::string> built_options;
built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(input->dtype())); built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(input->dtype()));
if (max_limit_ < 0) { if (max_limit_ < 0) {
auto relu_kernel = runtime->BuildKernel("relu", "relu", built_options); auto relu_kernel = runtime->BuildKernel("relu", "relu", built_options);
const uint32_t lws = runtime->GetKernelMaxWorkGroupSize(relu_kernel); const uint32_t lws = runtime->GetKernelMaxWorkGroupSize(relu_kernel);
......
...@@ -41,8 +41,8 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()( ...@@ -41,8 +41,8 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, T>::operator()(
auto runtime = OpenCLRuntime::Get(); auto runtime = OpenCLRuntime::Get();
std::set<std::string> built_options; std::set<std::string> built_options;
auto dt = DataTypeToEnum<T>::value; auto dt = DataTypeToEnum<T>::value;
built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(dt)); built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt));
built_options.emplace("-DCMD_DATA_TYPE=" + DataTypeToOPENCLCMDDataType(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt));
auto rb_kernel = runtime->BuildKernel("resize_bilinear", "resize_bilinear_nocache", built_options); auto rb_kernel = runtime->BuildKernel("resize_bilinear", "resize_bilinear_nocache", built_options);
const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(rb_kernel); const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(rb_kernel);
......
...@@ -20,7 +20,7 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, float>::operator()(Tensor *space_te ...@@ -20,7 +20,7 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, float>::operator()(Tensor *space_te
Tensor *batch_tensor) { Tensor *batch_tensor) {
auto runtime = OpenCLRuntime::Get(); auto runtime = OpenCLRuntime::Get();
std::set<std::string> built_options; std::set<std::string> built_options;
built_options.emplace("-DDATA_TYPE=" + DataTypeToCLType(space_tensor->dtype())); built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(space_tensor->dtype()));
auto s2b_kernel = runtime->BuildKernel("space_to_batch", "space_to_batch", built_options); auto s2b_kernel = runtime->BuildKernel("space_to_batch", "space_to_batch", built_options);
uint32_t idx = 0; uint32_t idx = 0;
......
...@@ -43,7 +43,7 @@ TEST(BufferToImageTest, ArgSmall) { ...@@ -43,7 +43,7 @@ TEST(BufferToImageTest, ArgSmall) {
} }
TEST(BufferToImageTest, ArgHalfSmall) { TEST(BufferToImageTest, ArgHalfSmall) {
TestBidirectionTransform<DeviceType::OPENCL, half>(kernels::ARGUMENT, {1}); TestBidirectionTransform<DeviceType::OPENCL, half>(kernels::ARGUMENT, {11});
} }
TEST(BufferToImageTest, ArgMedia) { TEST(BufferToImageTest, ArgMedia) {
...@@ -97,3 +97,37 @@ TEST(BufferToImageTest, Filter3x3Meida) { ...@@ -97,3 +97,37 @@ TEST(BufferToImageTest, Filter3x3Meida) {
TEST(BufferToImageTest, Filter3x3Large) { TEST(BufferToImageTest, Filter3x3Large) {
TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::FILTER, {3, 3, 128, 256}); TestBidirectionTransform<DeviceType::OPENCL, float>(kernels::FILTER, {3, 3, 128, 256});
} }
template<DeviceType D, typename T>
void TestDiffTypeBidirectionTransform(const int type, const std::vector<index_t> &input_shape) {
OpsTestNet net;
OpDefBuilder("BufferToImage", "BufferToImageTest")
.Input("Input")
.Output("B2IOutput")
.AddIntArg("buffer_type", type)
.AddIntArg("T", DataTypeToEnum<T>::value)
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<D, float>("Input", input_shape);
// Run
net.RunOp(D);
OpDefBuilder("ImageToBuffer", "ImageToBufferTest")
.Input("B2IOutput")
.Output("I2BOutput")
.AddIntArg("buffer_type", type)
.AddIntArg("T", DataTypeToEnum<T>::value)
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
// Check
ExpectTensorNear<float, T>(*net.GetOutput("Input"), *net.GetOutput("I2BOutput"), 1e-2);
}
TEST(BufferToImageTest, ArgFloatToHalfSmall) {
TestDiffTypeBidirectionTransform<DeviceType::OPENCL, half>(kernels::ARGUMENT, {11});
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册