FunctionTest.h 14.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "Function.h"
16 17
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
18
#include "paddle/math/tests/TensorCheck.h"
H
hedaoyuan 已提交
19
#include "paddle/testing/TestUtil.h"
20 21 22

namespace paddle {

H
hedaoyuan 已提交
23 24
typedef std::shared_ptr<BufferArg> BufferArgPtr;

25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
namespace test {
template <DeviceType DType>
struct Allocator;

template <>
struct Allocator<DEVICE_TYPE_CPU> {
  using type = CpuMemoryHandle;
};

template <>
struct Allocator<DEVICE_TYPE_GPU> {
  using type = GpuMemoryHandle;
};

// Copy argument1 to argument2
template <DeviceType DType1, DeviceType DType2>
class CopyArgument {
public:
  void operator()(const BufferArg& arg1, BufferArg& arg2) {
    CHECK_EQ(arg1.valueType(), arg2.valueType());
    CHECK_LE(arg1.shape().getElements(), arg2.shape().getElements());

    if (arg1.valueType() == VALUE_TYPE_INT32) {
      IVectorPtr vector1 =
          IVector::create((int*)arg1.data(),
                          arg1.shape().getElements(),
                          DType1 == DEVICE_TYPE_CPU ? false : true);
      IVectorPtr vector2 =
          IVector::create((int*)arg2.data(),
                          arg2.shape().getElements(),
                          DType2 == DEVICE_TYPE_CPU ? false : true);
      vector2->copyFrom(*vector1);
    } else {
      VectorPtr vector1 =
          Vector::create((real*)arg1.data(),
                         arg1.shape().getElements(),
                         DType1 == DEVICE_TYPE_CPU ? false : true);
      VectorPtr vector2 =
          Vector::create((real*)arg2.data(),
                         arg2.shape().getElements(),
                         DType2 == DEVICE_TYPE_CPU ? false : true);
      vector2->copyFrom(*vector1);
    }
  }
};
}  // namespace test

H
hedaoyuan 已提交
72
/**
73 74 75
 * \brief A class for comparing two Functions of different implementations.
 *        For example, can be used to compare the CPU and GPU implementation
 *        of the function is consistent.
H
hedaoyuan 已提交
76 77 78 79
 *
 * Use case:
 *  // Initializes a test object, the corresponding cpu and gpu Function
 *  // are constructed according to FunctionName and FuncConfig.
80
 *  CpuGpuFuncCompare test(FunctionName, FuncConfig);
H
hedaoyuan 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 *  // Prepare inputs and outputs arguments.
 *  // Here the input and output can not contain real data,
 *  // only contains the argument type and shape.
 *  test.addInputs(input1);
 *  test.addInputs(input2);
 *  test.addOutputs(output1);
 *  test.addOutputs(output2);
 *  // Run.
 *  // Will according to the type and shape of arguments(inputs_/outputs_),
 *  // automatic initialization cpu and gpu function required arguments
 *  // (cpuInputs_/cpuOutputs_/gpuInputs_/gpuOutputs_).
 *  // Call the CPU and GPU Function calculation results.
 *  // Compares CPU and GPU calculation results for consistency.
 *  test.run();
 */
96 97
template <DeviceType DType1, DeviceType DType2>
class Compare2Function {
98
public:
99 100 101 102 103 104 105 106 107 108 109 110 111 112
  typedef typename test::Allocator<DType1>::type Allocator1;
  typedef typename test::Allocator<DType2>::type Allocator2;
  typedef typename Tensor<real, DType1>::Vector Vector1;
  typedef typename Tensor<real, DType2>::Vector Vector2;
  typedef typename Tensor<real, DType1>::SparseMatrix SparseMatrix1;
  typedef typename Tensor<real, DType2>::SparseMatrix SparseMatrix2;

  Compare2Function(const std::string& name1,
                   const std::string& name2,
                   const FuncConfig& config)
      : function1_(FunctionBase::funcRegistrar_.createByType(name1)),
        function2_(FunctionBase::funcRegistrar_.createByType(name2)) {
    function1_->init(config);
    function2_->init(config);
H
hedaoyuan 已提交
113 114
  }

115
  ~Compare2Function() {}
H
hedaoyuan 已提交
116 117 118 119 120

  // input need only contains shape, do not contains data.
  void addInputs(const BufferArg& input) {
    size_t size =
        input.shape().getElements() * sizeOfValuType(input.valueType());
121 122
    func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
    func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
H
hedaoyuan 已提交
123

124 125 126 127
    func1Inputs_.emplace_back(std::make_shared<BufferArg>(
        func1Memory_.back()->getBuf(), input.valueType(), input.shape()));
    func2Inputs_.emplace_back(std::make_shared<BufferArg>(
        func2Memory_.back()->getBuf(), input.valueType(), input.shape()));
H
hedaoyuan 已提交
128 129
  }

130 131 132 133 134 135
  // assume one copy of sequence is shared by different SequenceArgs
  void addSequence(const SequenceIdArg& input) {
    CHECK_EQ(input.shape().ndims(), 1UL);
    size_t batchSize = input.shape()[0];
    size_t numSeqs = batchSize / 10 + 1;
    size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32);
136 137 138 139 140 141
    func1Memory_.emplace_back(std::make_shared<Allocator1>(sizeId));
    func2Memory_.emplace_back(std::make_shared<Allocator2>(sizeId));
    seq1_ = std::make_shared<SequenceIdArg>(func1Memory_.back()->getBuf(),
                                            TensorShape{numSeqs + 1});
    seq2_ = std::make_shared<SequenceIdArg>(func2Memory_.back()->getBuf(),
                                            TensorShape{numSeqs + 1});
142
    /// init sequence Id
143
    initArg(*seq1_, batchSize);
144

145
    copyArg_(*seq1_, *seq2_);
146 147 148 149 150
  }

  void addInputs(const SequenceArg& input) {
    CHECK_EQ(input.shape().ndims(), 2UL);
    size_t batchSize = input.shape()[0];
151
    if (!seq1_ || !seq2_) {  // sequence not exist
152 153 154 155 156
      addSequence(SequenceIdArg(TensorShape{batchSize}));
    }

    size_t size =
        input.shape().getElements() * sizeOfValuType(input.valueType());
157 158
    func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
    func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
159 160

    /// SequenceArg
161 162
    func1Inputs_.emplace_back(
        std::make_shared<SequenceArg>(func1Memory_.back()->getBuf(),
163 164
                                      input.valueType(),
                                      input.shape(),
165 166 167
                                      *seq1_));
    func2Inputs_.emplace_back(
        std::make_shared<SequenceArg>(func2Memory_.back()->getBuf(),
168 169
                                      input.valueType(),
                                      input.shape(),
170
                                      *seq2_));
171 172
  }

H
hedaoyuan 已提交
173
  // output need only contains shape, do not contains data.
X
xutianbing 已提交
174
  void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) {
H
hedaoyuan 已提交
175 176
    size_t size =
        output.shape().getElements() * sizeOfValuType(output.valueType());
177 178
    func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
    func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
H
hedaoyuan 已提交
179

180 181
    func1Outputs_.emplace_back(
        std::make_shared<BufferArg>(func1Memory_.back()->getBuf(),
182 183 184
                                    output.valueType(),
                                    output.shape(),
                                    argType));
185 186
    func2Outputs_.emplace_back(
        std::make_shared<BufferArg>(func2Memory_.back()->getBuf(),
187 188 189
                                    output.valueType(),
                                    output.shape(),
                                    argType));
190 191
  }

192 193
  /// add and init output sparse matrix
  void addOutputs(const SparseMatrixArg& output, ArgType argType = ASSIGN_TO) {
194
    sparse1_ = std::make_shared<SparseMatrix1>(
195 196 197 198 199 200
        output.shape()[0],
        output.shape()[1],
        output.nnz(),
        static_cast<SparseValueType>(output.dataType()),
        static_cast<SparseFormat>(output.dataFormat()));

201
    sparse2_ = std::make_shared<SparseMatrix2>(
202 203 204 205 206
        output.shape()[0],
        output.shape()[1],
        output.nnz(),
        static_cast<SparseValueType>(output.dataType()),
        static_cast<SparseFormat>(output.dataFormat()));
207 208 209

    /// init sparse matrix
    hl_stream_t stream(HPPL_STREAM_1);
210 211
    sparse1_->randomizeUniform();
    sparse2_->copyFrom(*sparse1_, stream);
212 213
    hl_stream_synchronize(stream);

214 215 216 217
    func1Outputs_.emplace_back(
        std::make_shared<SparseMatrixArg>(*sparse1_, argType));
    func2Outputs_.emplace_back(
        std::make_shared<SparseMatrixArg>(*sparse2_, argType));
218 219
  }

220 221 222
  void addOutputs(const SequenceArg& output, ArgType argType = ASSIGN_TO) {
    CHECK_EQ(output.shape().ndims(), 2UL);
    size_t batchSize = output.shape()[0];
H
hedaoyuan 已提交
223

224
    if (!seq1_ || !seq2_) {  // sequence not exist
225 226
      addSequence(SequenceIdArg(TensorShape{batchSize}));
    }
H
hedaoyuan 已提交
227
    size_t size =
228
        output.shape().getElements() * sizeOfValuType(output.valueType());
229 230
    func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
    func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
H
hedaoyuan 已提交
231

232
    /// SequenceArg
233 234
    func1Outputs_.emplace_back(
        std::make_shared<SequenceArg>(func1Memory_.back()->getBuf(),
235 236
                                      output.valueType(),
                                      output.shape(),
237
                                      *seq1_,
238
                                      argType));
239 240
    func2Outputs_.emplace_back(
        std::make_shared<SequenceArg>(func2Memory_.back()->getBuf(),
241 242
                                      output.valueType(),
                                      output.shape(),
243
                                      *seq2_,
244
                                      argType));
H
hedaoyuan 已提交
245
  }
H
hedaoyuan 已提交
246

247
  void addInputs(const SparseMatrixArg& input) {
248
    sparse1_ = std::make_shared<SparseMatrix1>(
249 250 251 252 253 254
        input.shape()[0],
        input.shape()[1],
        input.nnz(),
        static_cast<SparseValueType>(input.dataType()),
        static_cast<SparseFormat>(input.dataFormat()));

255
    sparse2_ = std::make_shared<SparseMatrix2>(
256 257 258 259 260
        input.shape()[0],
        input.shape()[1],
        input.nnz(),
        static_cast<SparseValueType>(input.dataType()),
        static_cast<SparseFormat>(input.dataFormat()));
261 262 263

    /// init sparse matrix
    hl_stream_t stream(HPPL_STREAM_1);
264 265
    sparse1_->randomizeUniform();
    sparse2_->copyFrom(*sparse1_, stream);
266 267
    hl_stream_synchronize(stream);

268 269
    func1Inputs_.emplace_back(std::make_shared<SparseMatrixArg>(*sparse1_));
    func2Inputs_.emplace_back(std::make_shared<SparseMatrixArg>(*sparse2_));
270 271
  }

H
hedaoyuan 已提交
272 273
  void run() {
    // prepare cpu/gpu arguments
H
hedaoyuan 已提交
274
    initInputs();
H
hedaoyuan 已提交
275

276
    initOutputs();
H
hedaoyuan 已提交
277
    // function calculate
H
hedaoyuan 已提交
278 279 280 281 282 283 284
    auto callFunction = [](FunctionBase* function,
                           std::vector<BufferArgPtr>& inputs,
                           std::vector<BufferArgPtr>& outputs) {
      BufferArgs inArgs;
      BufferArgs outArgs;
      for (auto arg : inputs) {
        inArgs.addArg(*arg);
H
hedaoyuan 已提交
285
      }
H
hedaoyuan 已提交
286 287
      for (auto arg : outputs) {
        outArgs.addArg(*arg);
288
      }
H
hedaoyuan 已提交
289
      function->calc(inArgs, outArgs);
290 291
    };

292 293
    callFunction(function1_.get(), func1Inputs_, func1Outputs_);
    callFunction(function2_.get(), func2Inputs_, func2Outputs_);
294

295
    // check outputs
H
hedaoyuan 已提交
296
    compareOutputs();
297 298
  }

299
  std::shared_ptr<FunctionBase> getCpuFunction() const { return function1_; }
300

301
  std::shared_ptr<FunctionBase> getGpuFunction() const { return function2_; }
302

H
hedaoyuan 已提交
303
protected:
304 305
  // only init cpu argument, gpu argument copy from cpu argument.
  void initArg(BufferArg& arg) {
306
    Vector1 vector(arg.shape().getElements(), (real*)arg.data());
307 308 309 310 311
    vector.uniform(0.001, 1);
  }

  void initArg(SequenceArg& arg) {
    /// init only matrix
312
    Vector1 vector(arg.shape().getElements(), (real*)arg.data());
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330
    vector.uniform(0.001, 1);
  }

  void initArg(SequenceIdArg& arg, size_t batchSize) {
    size_t numSeqs = arg.numSeqs();
    int* buf = reinterpret_cast<int*>(arg.data());
    int pos = 0;
    size_t maxLen = 2 * batchSize / numSeqs;
    for (int i = 0; i < (int)numSeqs; ++i) {
      int len = 1 + uniformRandom(std::min<int64_t>(
                        maxLen, batchSize - pos - numSeqs + i));
      buf[i] = pos;
      pos += len;
      VLOG(1) << " len=" << len;
    }
    buf[numSeqs] = batchSize;
  }

H
hedaoyuan 已提交
331
  void initInputs() {
332 333
    for (size_t i = 0; i < func1Inputs_.size(); i++) {
      if (func1Inputs_[i]->isSparseArg()) {
334 335 336
        continue;  /// sparse matrix already init
      }

337 338
      if (func1Inputs_[i]->isSequenceArg()) {
        initArg(dynamic_cast<SequenceArg&>(*func1Inputs_[i]));
339
      } else {
340
        initArg(*func1Inputs_[i]);
341
      }
H
hedaoyuan 已提交
342

343
      copyArg_(*func1Inputs_[i], *func2Inputs_[i]);
H
hedaoyuan 已提交
344
    }
H
hedaoyuan 已提交
345 346
  }

347
  void initOutputs() {
348 349
    for (size_t i = 0; i < func1Outputs_.size(); i++) {
      if (func1Outputs_[i]->isSparseArg()) {
350
        continue;  /// sparse matrix already init
351 352
      }

353 354
      if (func1Outputs_[i]->isSequenceArg()) {
        initArg(dynamic_cast<SequenceArg&>(*func1Outputs_[i]));
355
      } else {
356
        initArg(*func1Outputs_[i]);
357
      }
358

359
      copyArg_(*func1Outputs_[i], *func2Outputs_[i]);
360 361 362
    }
  }

H
hedaoyuan 已提交
363
  void compareOutputs() {
364
    for (size_t i = 0; i < func1Outputs_.size(); i++) {
H
hedaoyuan 已提交
365
      // TODO, Need a BufferCheck used to compare the two buffers.
366 367
      const auto cpu = func1Outputs_[i];
      const auto gpu = func2Outputs_[i];
368
      CHECK_EQ(cpu->numElements(), gpu->numElements());
369 370
      Vector1 cpuVector(cpu->numElements(), (real*)cpu->data());
      Vector2 gpuVector(gpu->numElements(), (real*)gpu->data());
H
hedaoyuan 已提交
371 372
      autotest::TensorCheckErr(cpuVector, gpuVector);
    }
H
hedaoyuan 已提交
373 374
  }

375
protected:
376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
  std::shared_ptr<FunctionBase> function1_;
  std::shared_ptr<FunctionBase> function2_;
  std::vector<std::shared_ptr<Allocator1>> func1Memory_;
  std::vector<std::shared_ptr<Allocator2>> func2Memory_;
  std::vector<BufferArgPtr> func1Inputs_;
  std::vector<BufferArgPtr> func1Outputs_;
  std::vector<BufferArgPtr> func2Inputs_;
  std::vector<BufferArgPtr> func2Outputs_;
  std::shared_ptr<SparseMatrix1> sparse1_;
  std::shared_ptr<SparseMatrix2> sparse2_;
  std::shared_ptr<SequenceIdArg> seq1_;
  std::shared_ptr<SequenceIdArg> seq2_;
  test::CopyArgument<DType1, DType2> copyArg_;
};

class CpuGpuFuncCompare
    : public Compare2Function<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> {
public:
  CpuGpuFuncCompare(const std::string& name, const FuncConfig& config)
      : Compare2Function(name + "-CPU", name + "-GPU", config) {}

  ~CpuGpuFuncCompare() {}
398 399 400
};

}  // namespace paddle