diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 16e63e433e64038ea15f75895eef3d288d043f06..d317aac8594b4c9045f06a596fb3f9292604fcd6 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -23,7 +23,6 @@ limitations under the License. */ #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/details/nan_inf_utils.h" #include "paddle/fluid/framework/op_call_stack.h" -#include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/framework/shape_inference.h" #include "paddle/fluid/framework/transfer_scope_cache.h" #include "paddle/fluid/framework/unused_var_check.h" diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index db26c66958140b9900c8848818bd313bffaf2e13..b2d55babc7e1c11ca8efa9d3a95d92e90c4967c4 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -16,7 +16,6 @@ #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/details/nan_inf_utils.h" -#include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/imperative/infer_shape_context.h" #include "paddle/pten/common/scalar.h" #include "paddle/utils/small_vector.h" diff --git a/paddle/pten/api/include/creation.h b/paddle/pten/api/include/creation.h index d7311e6cd283b4a17b9bcfee1cda603a3bf9970d..9795d88f81880b9f84491ff4c9ebe9428391510f 100644 --- a/paddle/pten/api/include/creation.h +++ b/paddle/pten/api/include/creation.h @@ -14,5 +14,26 @@ #pragma once +#include "paddle/pten/api/include/infershape.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" #include "paddle/pten/kernels/cpu/creation.h" #include "paddle/pten/kernels/cuda/creation.h" + +namespace pten { + +// TODO(YuanRisheng) This function name should be same as User API name. +// TODO(zyfncg) Automatic code generation +template +DenseTensor FillAnyLike(const ContextT& dev_ctx, + const DenseTensor& x, + const Scalar& val) { + auto out_meta = UnchangedInferShape(x.meta()); + const auto allocator = + std::make_shared( + dev_ctx.GetPlace()); + pten::DenseTensor dense_out(allocator, out_meta); + FillAnyLike(dev_ctx, x, val, &dense_out); + return dense_out; +} + +} // namespace pten diff --git a/paddle/pten/api/include/linalg.h b/paddle/pten/api/include/linalg.h index d9798c3a2e0a8117bb84829200ad08deab0953a9..0d4c7a60fbc1456d0ec4dcc06f1e3af51117e3b6 100644 --- a/paddle/pten/api/include/linalg.h +++ b/paddle/pten/api/include/linalg.h @@ -15,5 +15,24 @@ #pragma once // See Note: [ How do we organize the kernel directory ] +#include "paddle/pten/api/include/infershape.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" #include "paddle/pten/kernels/cpu/linalg.h" #include "paddle/pten/kernels/cuda/linalg.h" + +namespace pten { + +template +DenseTensor Dot(const ContextT& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + auto out_meta = DotInferShape(x.meta(), y.meta()); + const auto allocator = + std::make_shared( + dev_ctx.GetPlace()); + pten::DenseTensor dense_out(allocator, out_meta); + Dot(dev_ctx, x, y, &dense_out); + return dense_out; +} + +} // namespace pten diff --git a/paddle/pten/api/include/manipulation.h b/paddle/pten/api/include/manipulation.h index f2acad96499696ada9ec2e7d1c2b99380133c97a..1f867686a6eb7ee006ed6979939ca2e2e2531ad8 100644 --- a/paddle/pten/api/include/manipulation.h +++ b/paddle/pten/api/include/manipulation.h @@ -15,5 +15,25 @@ #pragma once // See Note: [ How do we organize the kernel directory ] +#include "paddle/pten/api/include/infershape.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" #include "paddle/pten/kernels/cpu/manipulation.h" #include "paddle/pten/kernels/cuda/manipulation.h" + +namespace pten { + +template +DenseTensor Flatten(const ContextT& dev_ctx, + const DenseTensor& x, + int start_axis, + int stop_axis) { + auto out_meta = FlattenInferShape(x.meta(), start_axis, stop_axis); + const auto allocator = + std::make_shared( + dev_ctx.GetPlace()); + pten::DenseTensor dense_out(allocator, out_meta); + Flatten(dev_ctx, x, start_axis, stop_axis, &dense_out); + return dense_out; +} + +} // namespace pten diff --git a/paddle/pten/api/include/math.h b/paddle/pten/api/include/math.h index 5145c823a5c6e07462477a3e04ba68e177a67dd3..fa512e8d6db0d7f8dfc28b642ecd91e51066b648 100644 --- a/paddle/pten/api/include/math.h +++ b/paddle/pten/api/include/math.h @@ -15,5 +15,62 @@ limitations under the License. */ #pragma once // See Note: [ How do we organize the kernel directory ] +#include "paddle/pten/api/include/infershape.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" #include "paddle/pten/kernels/cpu/math.h" #include "paddle/pten/kernels/cuda/math.h" + +namespace pten { + +template +DenseTensor Sign(const ContextT& dev_ctx, const DenseTensor& x) { + auto out_meta = UnchangedInferShape(x.meta()); + const auto allocator = + std::make_shared( + dev_ctx.GetPlace()); + pten::DenseTensor dense_out(allocator, out_meta); + Sign(dev_ctx, x, &dense_out); + return dense_out; +} + +template +DenseTensor Mean(const ContextT& dev_ctx, const DenseTensor& x) { + auto out_meta = ReductionInferShape(x.meta()); + const auto allocator = + std::make_shared( + dev_ctx.GetPlace()); + pten::DenseTensor dense_out(allocator, out_meta); + Mean(dev_ctx, x, &dense_out); + return dense_out; +} + +template +DenseTensor Scale(const ContextT& dev_ctx, + const DenseTensor& x, + float scale, + float bias, + bool bias_after_scale) { + auto out_meta = UnchangedInferShape(x.meta()); + const auto allocator = + std::make_shared( + dev_ctx.GetPlace()); + pten::DenseTensor dense_out(allocator, out_meta); + Scale(dev_ctx, x, scale, bias, bias_after_scale, &dense_out); + return dense_out; +} + +template +DenseTensor Scale(const ContextT& dev_ctx, + const DenseTensor& x, + const DenseTensor& scale, + float bias, + bool bias_after_scale) { + auto out_meta = UnchangedInferShape(x.meta()); + const auto allocator = + std::make_shared( + dev_ctx.GetPlace()); + pten::DenseTensor dense_out(allocator, out_meta); + ScaleHost(dev_ctx, x, scale, bias, bias_after_scale, &dense_out); + return dense_out; +} +} // namespace pten diff --git a/paddle/pten/hapi/lib/utils/tensor_utils.cc b/paddle/pten/hapi/lib/utils/tensor_utils.cc index a55c50db761a61f241fb74e6a4c482a836569896..f7641f424f4910d69c9ca71ddfa7968979a66c81 100644 --- a/paddle/pten/hapi/lib/utils/tensor_utils.cc +++ b/paddle/pten/hapi/lib/utils/tensor_utils.cc @@ -45,6 +45,7 @@ std::unique_ptr MakePtenDenseTensor( SetLoD(&meta.lod, src.lod()); auto shared_storage = pten::make_intrusive(src.Holder(), src.offset()); + return std::make_unique(std::move(shared_storage), std::move(meta)); } diff --git a/paddle/pten/kernels/cpu/manipulation.cc b/paddle/pten/kernels/cpu/manipulation.cc index c436e14e0caab74caf203fc1ef5ee3b7e2631987..87c76149f127feda0a9afdd00634d7767b0e5f7a 100644 --- a/paddle/pten/kernels/cpu/manipulation.cc +++ b/paddle/pten/kernels/cpu/manipulation.cc @@ -24,10 +24,9 @@ void Flatten(const CPUContext& dev_ctx, int start_axis, int stop_axis, DenseTensor* out) { - auto out_meta = FlattenInferShape(x.meta(), start_axis, stop_axis); + auto out_dims = out->dims(); pten::Copy(dev_ctx, x, out); - out->set_lod(out_meta.lod); - out->Resize(out_meta.dims); + out->Resize(out_dims); } // TODO(yuanrisheng): this kernel is for training and xshape is a Intermediate diff --git a/paddle/pten/kernels/cuda/manipulation.cu b/paddle/pten/kernels/cuda/manipulation.cu index 43614f859c58bf3435d41249a9882f782bddb603..38111f2b8c02fd37ae4063fb6d752000c6e486fd 100644 --- a/paddle/pten/kernels/cuda/manipulation.cu +++ b/paddle/pten/kernels/cuda/manipulation.cu @@ -24,10 +24,9 @@ void Flatten(const CUDAContext& dev_ctx, int start_axis, int stop_axis, DenseTensor* out) { - auto out_meta = FlattenInferShape(x.meta(), start_axis, stop_axis); + auto out_dims = out->dims(); pten::Copy(dev_ctx, x, out); - out->set_lod(out_meta.lod); - out->Resize(out_meta.dims); + out->Resize(out_dims); } // TODO(yuanrisheng): this kernel is for training and xshape is a Intermediate diff --git a/paddle/pten/tests/CMakeLists.txt b/paddle/pten/tests/CMakeLists.txt index 3dc779380527f14368da9c14d979a346e330023d..3d2da6a5afdd1a6b780074d6b272a93346c898f8 100644 --- a/paddle/pten/tests/CMakeLists.txt +++ b/paddle/pten/tests/CMakeLists.txt @@ -12,3 +12,4 @@ cc_test(test_matmul_api SRCS test_matmul_api.cc DEPS linalg_api pten_hapi_utils) cc_test(test_fill_api SRCS test_fill_api.cc DEPS creation_api pten_hapi_utils) cc_test(test_copy_api SRCS test_copy_api.cc DEPS utils_cpu pten_hapi_utils) cc_test(test_flatten_api SRCS test_flatten_api.cc DEPS utils_cpu manipulation_api pten_hapi_utils) +cc_test(test_scale_api SRCS test_scale_api.cc DEPS math_api pten_hapi_utils) diff --git a/paddle/pten/tests/test_dot_api.cc b/paddle/pten/tests/test_dot_api.cc index 69e785904fe3c98fbc1e812f0b32729785b368f9..5401b66544473940938e8fdf55ab86ff580b2463 100644 --- a/paddle/pten/tests/test_dot_api.cc +++ b/paddle/pten/tests/test_dot_api.cc @@ -21,6 +21,8 @@ limitations under the License. */ #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/hapi/lib/utils/allocator.h" +#include "paddle/pten/api/include/linalg.h" + PT_DECLARE_MODULE(LinalgCPU); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -82,3 +84,55 @@ TEST(API, dot) { ASSERT_NEAR(expect_result[1], actual_result1, 1e-6f); ASSERT_NEAR(expect_result[2], actual_result2, 1e-6f); } + +// TODO(YuanRisheng) This unitest should be created in other file. +// It is convenient to make compilation decoupling. +TEST(DEV_API, dot) { + // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); + pten::DenseTensor dense_x(alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 10}), + pten::DataLayout::NCHW)); + auto* dense_x_data = dense_x.mutable_data(); + + pten::DenseTensor dense_y(alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 10}), + pten::DataLayout::NCHW)); + auto* dense_y_data = dense_y.mutable_data(); + + float sum[3] = {0.0, 0.0, 0.0}; + for (size_t i = 0; i < 3; ++i) { + for (size_t j = 0; j < 10; ++j) { + dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0; + dense_y_data[i * 10 + j] = (i * 10 + j) * 1.0; + sum[i] += (i * 10 + j) * (i * 10 + j) * 1.0; + } + } + + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(paddle::platform::CPUPlace()); + + // 2. test API + auto out = pten::Dot( + *(static_cast(dev_ctx)), + dense_x, + dense_y); + + // 3. check result + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.dims()[0], 3); + ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); + + auto expect_result = sum; + auto actual_result0 = out.data()[0]; + auto actual_result1 = out.data()[1]; + auto actual_result2 = out.data()[2]; + ASSERT_NEAR(expect_result[0], actual_result0, 1e-6f); + ASSERT_NEAR(expect_result[1], actual_result1, 1e-6f); + ASSERT_NEAR(expect_result[2], actual_result2, 1e-6f); +} diff --git a/paddle/pten/tests/test_fill_api.cc b/paddle/pten/tests/test_fill_api.cc index 4f93e03aca2f3d5cc299b3db5348c79537829f83..5a788226086dcc6953357d32fec966493e2d6baa 100644 --- a/paddle/pten/tests/test_fill_api.cc +++ b/paddle/pten/tests/test_fill_api.cc @@ -21,6 +21,8 @@ limitations under the License. */ #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/hapi/lib/utils/allocator.h" +#include "paddle/pten/api/include/creation.h" + PT_DECLARE_MODULE(CreationCPU); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -131,3 +133,38 @@ TEST(API, ones_like) { ASSERT_EQ(actual_result[i], 1); } } + +TEST(DEV_API, fill_any_like) { + // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); + pten::DenseTensor dense_x(alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 2}), + pten::DataLayout::NCHW)); + auto* dense_x_data = dense_x.mutable_data(); + dense_x_data[0] = 0; + float val = 1.0; + + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(paddle::platform::CPUPlace()); + + // 2. test API + auto out = pten::FillAnyLike( + *(static_cast(dev_ctx)), + dense_x, + val); + + // 3. check result + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.dims()[0], 3); + ASSERT_EQ(out.numel(), 6); + ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); + + auto* actual_result = out.data(); + for (auto i = 0; i < 6; i++) { + ASSERT_NEAR(actual_result[i], val, 1e-6f); + } +} diff --git a/paddle/pten/tests/test_flatten_api.cc b/paddle/pten/tests/test_flatten_api.cc index 48d2205c2ff4846de258d3d93fb2a4571eb56e34..dfb777678a94d05fbbf4ad401aa89cfdfad2decd 100644 --- a/paddle/pten/tests/test_flatten_api.cc +++ b/paddle/pten/tests/test_flatten_api.cc @@ -21,6 +21,8 @@ limitations under the License. */ #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/hapi/lib/utils/allocator.h" +#include "paddle/pten/api/include/manipulation.h" + PT_DECLARE_MODULE(ManipulationCPU); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -70,3 +72,47 @@ TEST(API, flatten) { } ASSERT_EQ(value_equal, true); } + +TEST(DEV_API, flatten) { + // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); + pten::DenseTensor dense_x( + alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 2, 2, 3}), + pten::DataLayout::NCHW)); + auto* dense_x_data = dense_x.mutable_data(); + + for (int i = 0; i < dense_x.numel(); i++) { + dense_x_data[i] = i; + } + int start_axis = 1, stop_axis = 2; + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(paddle::platform::CPUPlace()); + + // 2. test API + auto out = pten::Flatten( + *(static_cast(dev_ctx)), + dense_x, + start_axis, + stop_axis); + + // 3. check result + std::vector expect_shape = {3, 4, 3}; + ASSERT_EQ(out.dims()[0], expect_shape[0]); + ASSERT_EQ(out.dims()[1], expect_shape[1]); + ASSERT_EQ(out.dims()[2], expect_shape[2]); + ASSERT_EQ(out.numel(), 36); + ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); + + bool value_equal = true; + auto* dense_out_data = out.data(); + for (int i = 0; i < dense_x.numel(); i++) { + if (std::abs(dense_x_data[i] - dense_out_data[i]) > 1e-6f) + value_equal = false; + } + ASSERT_EQ(value_equal, true); +} diff --git a/paddle/pten/tests/test_mean_api.cc b/paddle/pten/tests/test_mean_api.cc index ee8388671b7ebe5dc642d7ad3bb8a853d3bcbf56..b3da90659d005a0d9fbcd38b0cbfa947276ad03c 100644 --- a/paddle/pten/tests/test_mean_api.cc +++ b/paddle/pten/tests/test_mean_api.cc @@ -21,6 +21,8 @@ limitations under the License. */ #include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/hapi/lib/utils/allocator.h" +#include "paddle/pten/api/include/math.h" + PT_DECLARE_MODULE(MathCPU); #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -67,3 +69,36 @@ TEST(API, mean) { auto actual_result = dense_out->data()[0]; ASSERT_NEAR(expect_result, actual_result, 1e-6f); } + +TEST(DEV_API, mean) { + // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); + pten::DenseTensor dense_x(alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 4}), + pten::DataLayout::NCHW)); + auto* dense_x_data = dense_x.mutable_data(); + + float sum = 0.0; + for (size_t i = 0; i < 12; ++i) { + dense_x_data[i] = i * 1.0; + sum += i * 1.0; + } + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(paddle::platform::CPUPlace()); + // 2. test API + auto out = pten::Mean( + *(static_cast(dev_ctx)), dense_x); + + // 3. check result + ASSERT_EQ(out.dims().size(), 1); + ASSERT_EQ(out.numel(), 1); + ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); + + auto expect_result = sum / 12; + auto actual_result = out.data()[0]; + ASSERT_NEAR(expect_result, actual_result, 1e-6f); +} diff --git a/paddle/pten/tests/test_scale_api.cc b/paddle/pten/tests/test_scale_api.cc new file mode 100644 index 0000000000000000000000000000000000000000..9f80d6d2cc126bb4687626f267d537601bca7ff2 --- /dev/null +++ b/paddle/pten/tests/test_scale_api.cc @@ -0,0 +1,118 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/pten/hapi/include/math.h" + +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/kernel_registry.h" +#include "paddle/pten/hapi/lib/utils/allocator.h" + +#include "paddle/pten/api/include/math.h" + +PT_DECLARE_MODULE(MathCPU); + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +PT_DECLARE_MODULE(MathCUDA); +#endif + +namespace framework = paddle::framework; +using DDim = paddle::framework::DDim; + +TEST(DEV_API, scale) { + // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); + pten::DenseTensor dense_x(alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 4}), + pten::DataLayout::NCHW)); + + auto* dense_x_data = dense_x.mutable_data(); + for (size_t i = 0; i < 12; ++i) { + dense_x_data[i] = i * 1.0; + } + float scale = 2; + float bias = 1; + bool bias_after_scale = true; + + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(paddle::platform::CPUPlace()); + + // 2. test API + auto out = pten::Scale( + *(static_cast(dev_ctx)), + dense_x, + scale, + bias, + bias_after_scale); + + // 3. check result + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.numel(), 12); + ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); + + auto expect_result = 23; + auto actual_result = out.data()[11]; + ASSERT_NEAR(expect_result, actual_result, 1e-6f); +} + +TEST(DEV_API, scale_host) { + // 1. create tensor + const auto alloc = std::make_shared( + paddle::platform::CPUPlace()); + pten::DenseTensor dense_x(alloc, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({3, 4}), + pten::DataLayout::NCHW)); + auto* dense_x_data = dense_x.mutable_data(); + for (size_t i = 0; i < 12; ++i) { + dense_x_data[i] = i * 1.0; + } + const auto alloc2 = std::make_shared( + paddle::platform::CPUPlace()); + pten::DenseTensor scale(alloc2, + pten::DenseTensorMeta(pten::DataType::FLOAT32, + framework::make_ddim({1}), + pten::DataLayout::NCHW)); + scale.mutable_data()[0] = 2; + float bias = 1; + bool bias_after_scale = true; + + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + auto* dev_ctx = pool.Get(paddle::platform::CPUPlace()); + + // 2. test API + auto out = pten::Scale( + *(static_cast(dev_ctx)), + dense_x, + scale, + bias, + bias_after_scale); + + // 3. check result + ASSERT_EQ(out.dims().size(), 2); + ASSERT_EQ(out.numel(), 12); + ASSERT_EQ(out.meta().type, pten::DataType::FLOAT32); + ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW); + + auto expect_result = 23; + auto actual_result = out.data()[11]; + ASSERT_NEAR(expect_result, actual_result, 1e-6f); +}