// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lite/kernels/cuda/layout_compute.h" #include #include #include namespace paddle { namespace lite { namespace kernels { namespace cuda { #define IN(n, c, h, w) \ input_data[w + h * input_w + c * input_h * input_w + \ n * input_c * input_h * input_w] #define OUT(n, c, h, w) \ output_data[w + h * output_w + c * output_h * output_w + \ n * output_c * output_h * output_w] template void nchw2nhwc_ref(lite::Tensor* input, lite::Tensor* output) { auto* input_data = input->data(); auto* output_data = output->mutable_data(); int input_n = input->dims()[0]; int input_c = input->dims()[1]; int input_h = input->dims()[2]; int input_w = input->dims()[3]; int output_c = output->dims()[1]; int output_h = output->dims()[2]; int output_w = output->dims()[3]; for (int n = 0; n < input_n; ++n) { for (int c = 0; c < input_c; ++c) { for (int h = 0; h < input_h; ++h) { for (int w = 0; w < input_w; ++w) { OUT(n, h, w, c) = IN(n, c, h, w); } } } } } #undef IN #undef OUT #define IN(n, h, w, c) \ input_data[c + w * input_c + h * input_w * input_c + \ n * input_h * input_w * input_c] #define OUT(n, h, w, c) \ output_data[c + w * output_c + h * output_w * output_c + \ n * output_h * output_w * output_c] template void nhwc2nchw_ref(lite::Tensor* input, lite::Tensor* output) { auto* input_data = input->data(); auto* output_data = output->mutable_data(); int input_n = input->dims()[0]; int input_h = input->dims()[1]; int input_w = input->dims()[2]; int input_c = input->dims()[3]; int output_h = output->dims()[1]; int output_w = output->dims()[2]; int output_c = output->dims()[3]; for (int n = 0; n < input_n; ++n) { for (int c = 0; c < input_c; ++c) { for (int h = 0; h < input_h; ++h) { for (int w = 0; w < input_w; ++w) { OUT(n, c, h, w) = IN(n, h, w, c); } } } } } template void test_reformat(LayOutCompute* layout_kernel, bool nchw2nhwc) { std::unique_ptr ctx(new KernelContext); auto& context = ctx->As(); operators::LayoutParam param; lite::Tensor x, x_cpu, x_ref; lite::Tensor out, out_cpu, out_ref; int N = 5, C = 6, H = 7, W = 8; if (nchw2nhwc) { x.Resize({N, C, H, W}); out.Resize({N, H, W, C}); x_cpu.Resize({N, C, H, W}); out_cpu.Resize({N, H, W, C}); x_ref.Resize({N, C, H, W}); out_ref.Resize({N, H, W, C}); } else { x.Resize({N, H, W, C}); out.Resize({N, C, H, W}); x_cpu.Resize({N, H, W, C}); out_cpu.Resize({N, C, H, W}); x_ref.Resize({N, H, W, C}); out_ref.Resize({N, C, H, W}); } auto* x_cpu_data = x_cpu.mutable_data(); auto* out_cpu_data = out_cpu.mutable_data(); auto* x_ref_data = x_ref.mutable_data(); for (int i = 0; i < x_cpu.numel(); ++i) { x_cpu_data[i] = static_cast((i + 1) % 127); x_ref_data[i] = static_cast((i + 1) % 127); } x.Assign(x_cpu_data, x_cpu.dims()); param.x = &x; param.y = &out; cudaStream_t stream; cudaStreamCreate(&stream); context.SetExecStream(stream); layout_kernel->SetParam(param); layout_kernel->SetContext(std::move(ctx)); layout_kernel->Launch(); cudaDeviceSynchronize(); auto* out_data = out.mutable_data(TARGET(kCUDA)); CopySync( out_cpu_data, out_data, sizeof(Dtype) * out.numel(), IoDirection::DtoH); if (nchw2nhwc) { nchw2nhwc_ref(&x_ref, &out_ref); } else { nhwc2nchw_ref(&x_ref, &out_ref); } auto* out_ref_data = out_ref.mutable_data(); for (int i = 0; i < out.numel(); i++) { EXPECT_NEAR(static_cast(out_cpu_data[i]), static_cast(out_ref_data[i]), 1e-5); } } TEST(normal, nchw2nhwc) { LayOutCompute* layout_k = new NCHWToNHWCCompute(); test_reformat(layout_k, true); delete layout_k; } /* TEST(normal, nhwc2nchw) { LayOutCompute * layout_k = new NHWCToNCHWCompute(); test_reformat(layout_k, false); delete layout_k; } TEST(normal, nchw2nhwcint8) { LayOutCompute * layout_k = new NCHWToNHWCCompute(); test_reformat(layout_k, true); delete layout_k; } TEST(normal, nhwc2nchwint8) { LayOutCompute * layout_k = new NHWCToNCHWCompute(); test_reformat(layout_k, false); delete layout_k; } */ } // namespace cuda } // namespace kernels } // namespace lite } // namespace paddle