test_matmul_api.cc 5.5 KB
Newer Older
Z
zyfncg 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include <gtest/gtest.h>
#include <memory>

18
#include "paddle/pten/api/include/api.h"
Z
zyfncg 已提交
19

20
#include "paddle/pten/api/lib/utils/allocator.h"
W
Wilber 已提交
21
#include "paddle/pten/backends/gpu/gpu_context.h"
Z
zyfncg 已提交
22 23
#include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h"
24
#include "paddle/pten/kernels/copy_kernel.h"
Z
zyfncg 已提交
25

26 27
// See Note [ Why still include the fluid headers? ]
#include "paddle/fluid/platform/device_context.h"
28 29 30
namespace paddle {
namespace tests {

Z
zyfncg 已提交
31
namespace framework = paddle::framework;
32
using DDim = pten::framework::DDim;
Z
zyfncg 已提交
33 34 35

TEST(API, matmul_cpu) {
  // 1. create tensor
36
  const auto alloc = std::make_unique<paddle::experimental::DefaultAllocator>(
Z
zyfncg 已提交
37 38
      paddle::platform::CPUPlace());
  auto dense_x = std::make_shared<pten::DenseTensor>(
39
      alloc.get(),
Z
zyfncg 已提交
40
      pten::DenseTensorMeta(pten::DataType::FLOAT32,
41
                            pten::framework::make_ddim({3, 3}),
Z
zyfncg 已提交
42 43
                            pten::DataLayout::NCHW));

44 45
  auto* dense_x_data =
      dense_x->mutable_data<float>(paddle::platform::CPUPlace());
Z
zyfncg 已提交
46 47

  auto dense_y = std::make_shared<pten::DenseTensor>(
48
      alloc.get(),
Z
zyfncg 已提交
49
      pten::DenseTensorMeta(pten::DataType::FLOAT32,
50
                            pten::framework::make_ddim({3, 3}),
Z
zyfncg 已提交
51
                            pten::DataLayout::NCHW));
52 53
  auto* dense_y_data =
      dense_y->mutable_data<float>(paddle::platform::CPUPlace());
Z
zyfncg 已提交
54 55 56 57 58 59 60 61 62 63 64 65 66 67

  for (size_t i = 0; i < 9; ++i) {
    dense_x_data[i] = 1.0;
    dense_y_data[i] = 2.0;
  }
  std::vector<float> sum(9, 6.0);

  paddle::experimental::Tensor x(dense_x);
  paddle::experimental::Tensor y(dense_y);

  // 2. test API
  auto out = paddle::experimental::matmul(x, y, false, false);

  // 3. check result
68 69 70
  ASSERT_EQ(out.dims().size(), 2);
  ASSERT_EQ(out.dims()[0], 3);
  ASSERT_EQ(out.dims()[1], 3);
Z
zyfncg 已提交
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
  ASSERT_EQ(out.numel(), 9);
  ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
  ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
  ASSERT_EQ(out.initialized(), true);

  auto dense_out = std::dynamic_pointer_cast<pten::DenseTensor>(out.impl());

  for (size_t i = 0; i < 9; i++) {
    ASSERT_NEAR(sum[i], dense_out->data<float>()[i], 1e-6f);
  }
}

#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
TEST(API, matmul_cuda) {
  // Prepare CPU Dense Tensor
  const auto alloc_cpu =
87
      std::make_unique<paddle::experimental::DefaultAllocator>(
Z
zyfncg 已提交
88 89
          paddle::platform::CPUPlace());
  auto ref_x = std::make_shared<pten::DenseTensor>(
90
      alloc_cpu.get(),
Z
zyfncg 已提交
91
      pten::DenseTensorMeta(pten::DataType::FLOAT32,
92
                            pten::framework::make_ddim({3, 3}),
Z
zyfncg 已提交
93 94
                            pten::DataLayout::NCHW));

95
  auto* ref_x_data = ref_x->mutable_data<float>(paddle::platform::CPUPlace());
Z
zyfncg 已提交
96 97

  auto ref_y = std::make_shared<pten::DenseTensor>(
98
      alloc_cpu.get(),
Z
zyfncg 已提交
99
      pten::DenseTensorMeta(pten::DataType::FLOAT32,
100
                            pten::framework::make_ddim({3, 3}),
Z
zyfncg 已提交
101
                            pten::DataLayout::NCHW));
102
  auto* ref_y_data = ref_y->mutable_data<float>(paddle::platform::CPUPlace());
Z
zyfncg 已提交
103 104 105 106 107 108 109 110 111

  for (size_t i = 0; i < 9; ++i) {
    ref_x_data[i] = 1.0;
    ref_y_data[i] = 2.0;
  }
  std::vector<float> sum(9, 6.0);

  // 1. create tensor
  const auto alloc_cuda =
112
      std::make_unique<paddle::experimental::DefaultAllocator>(
Z
zyfncg 已提交
113 114
          paddle::platform::CUDAPlace());
  auto dense_x = std::make_shared<pten::DenseTensor>(
115
      alloc_cuda.get(),
Z
zyfncg 已提交
116
      pten::DenseTensorMeta(pten::DataType::FLOAT32,
117
                            pten::framework::make_ddim({3, 3}),
Z
zyfncg 已提交
118 119 120
                            pten::DataLayout::NCHW));

  auto dense_y = std::make_shared<pten::DenseTensor>(
121
      alloc_cuda.get(),
Z
zyfncg 已提交
122
      pten::DenseTensorMeta(pten::DataType::FLOAT32,
123
                            pten::framework::make_ddim({3, 3}),
Z
zyfncg 已提交
124 125 126 127
                            pten::DataLayout::NCHW));

  auto& pool = paddle::platform::DeviceContextPool::Instance();
  auto place = paddle::platform::CUDAPlace();
W
Wilber 已提交
128
  auto* dev_ctx = static_cast<const pten::GPUContext*>(pool.GetByPlace(place));
Z
zyfncg 已提交
129

130 131
  pten::Copy(*dev_ctx, *ref_x.get(), false, dense_x.get());
  pten::Copy(*dev_ctx, *ref_y.get(), false, dense_y.get());
Z
zyfncg 已提交
132 133 134 135 136 137 138 139

  paddle::experimental::Tensor x(dense_x);
  paddle::experimental::Tensor y(dense_y);

  // 2. test API
  auto out = paddle::experimental::matmul(x, y, false, false);

  // 3. check result
140 141 142
  ASSERT_EQ(out.dims().size(), 2);
  ASSERT_EQ(out.dims()[0], 3);
  ASSERT_EQ(out.dims()[1], 3);
Z
zyfncg 已提交
143 144 145 146 147 148 149 150
  ASSERT_EQ(out.numel(), 9);
  ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
  ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
  ASSERT_EQ(out.initialized(), true);

  auto dense_out = std::dynamic_pointer_cast<pten::DenseTensor>(out.impl());

  auto ref_out = std::make_shared<pten::DenseTensor>(
151
      alloc_cpu.get(),
Z
zyfncg 已提交
152
      pten::DenseTensorMeta(
153
          pten::DataType::FLOAT32, out.dims(), pten::DataLayout::NCHW));
Z
zyfncg 已提交
154

155
  pten::Copy(*dev_ctx, *dense_out.get(), false, ref_out.get());
Z
zyfncg 已提交
156 157 158 159 160 161 162

  for (size_t i = 0; i < 9; i++) {
    ASSERT_NEAR(sum[i], ref_out->data<float>()[i], 1e-6f);
  }
}

#endif
163 164 165

}  // namespace tests
}  // namespace paddle