// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "lite/kernels/cuda/elementwise_add_compute.h" #include #include #include namespace paddle { namespace lite { namespace kernels { namespace cuda { using Tensor = lite::Tensor; static void ElementwiseAddRef(float* x, float* y, float* out, int num) { for (int i = 0; i < num; ++i) { out[i] = x[i] + y[i]; // LOG(INFO) << x[i] << " + " << y[i] << " = " << out[i]; } } TEST(elementwise_add, normal) { ElementwiseAddCompute elementwise_add_kernel; std::unique_ptr ctx(new KernelContext); auto& context = ctx->As(); operators::ElementwiseParam param; Tensor x, y, out; Tensor x_cpu, y_cpu, out_cpu; Tensor x_ref, y_ref, out_ref; const int n = 1; const int c = 3; const int h = 2000; const int w = 2000; x.Resize({n, c, h, w}); y.Resize({n, c, h, w}); out.Resize({n, c, h, w}); x_cpu.Resize({n, c, h, w}); y_cpu.Resize({n, c, h, w}); out_cpu.Resize({n, c, h, w}); x_ref.Resize({n, c, h, w}); y_ref.Resize({n, c, h, w}); out_ref.Resize({n, c, h, w}); auto* x_data = x.mutable_data(TARGET(kCUDA)); auto* y_data = y.mutable_data(TARGET(kCUDA)); auto* out_data = out.mutable_data(TARGET(kCUDA)); auto* x_cpu_data = x_cpu.mutable_data(); auto* y_cpu_data = y_cpu.mutable_data(); auto* out_cpu_data = out_cpu.mutable_data(); auto* x_ref_data = x_ref.mutable_data(); auto* y_ref_data = y_ref.mutable_data(); auto* out_ref_data = out_ref.mutable_data(); for (int i = 0; i < x_cpu.numel(); ++i) { x_cpu_data[i] = i + 5.0; x_ref_data[i] = i + 5.0; } for (int i = 0; i < y_cpu.numel(); ++i) { y_cpu_data[i] = i - 5.0; y_ref_data[i] = i - 5.0; } x.Assign(x_cpu_data, x_cpu.dims()); y.Assign(y_cpu_data, y_cpu.dims()); param.X = &x; param.Y = &y; param.Out = &out; elementwise_add_kernel.SetParam(param); cudaStream_t stream; cudaStreamCreate(&stream); context.SetExecStream(stream); elementwise_add_kernel.SetContext(std::move(ctx)); elementwise_add_kernel.Launch(); cudaDeviceSynchronize(); CopySync( out_cpu_data, out_data, sizeof(float) * out.numel(), IoDirection::DtoH); ElementwiseAddRef(x_ref_data, y_ref_data, out_ref_data, out.numel()); for (int i = 0; i < out.numel(); i++) { EXPECT_NEAR(out_cpu_data[i], out_ref_data[i], 1e-5); } } } // namespace cuda } // namespace kernels } // namespace lite } // namespace paddle