FunctionTest.h 14.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */

#include "Function.h"
16 17
#include "paddle/math/Matrix.h"
#include "paddle/math/SparseMatrix.h"
18
#include "paddle/math/tests/TensorCheck.h"
H
hedaoyuan 已提交
19
#include "paddle/testing/TestUtil.h"
20 21 22

namespace paddle {

H
hedaoyuan 已提交
23 24
typedef std::shared_ptr<BufferArg> BufferArgPtr;

25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
namespace test {
template <DeviceType DType>
struct Allocator;

template <>
struct Allocator<DEVICE_TYPE_CPU> {
  using type = CpuMemoryHandle;
};

template <>
struct Allocator<DEVICE_TYPE_GPU> {
  using type = GpuMemoryHandle;
};

// Copy argument1 to argument2
template <DeviceType DType1, DeviceType DType2>
class CopyArgument {
public:
  void operator()(const BufferArg& arg1, BufferArg& arg2) {
    CHECK_EQ(arg1.valueType(), arg2.valueType());
    CHECK_LE(arg1.shape().getElements(), arg2.shape().getElements());

    if (arg1.valueType() == VALUE_TYPE_INT32) {
      IVectorPtr vector1 =
          IVector::create((int*)arg1.data(),
                          arg1.shape().getElements(),
                          DType1 == DEVICE_TYPE_CPU ? false : true);
      IVectorPtr vector2 =
          IVector::create((int*)arg2.data(),
                          arg2.shape().getElements(),
                          DType2 == DEVICE_TYPE_CPU ? false : true);
      vector2->copyFrom(*vector1);
    } else {
      VectorPtr vector1 =
          Vector::create((real*)arg1.data(),
                         arg1.shape().getElements(),
                         DType1 == DEVICE_TYPE_CPU ? false : true);
      VectorPtr vector2 =
          Vector::create((real*)arg2.data(),
                         arg2.shape().getElements(),
                         DType2 == DEVICE_TYPE_CPU ? false : true);
      vector2->copyFrom(*vector1);
    }
  }
};
}  // namespace test

H
hedaoyuan 已提交
72
/**
73 74 75
 * \brief A class for comparing two Functions of different implementations.
 *        For example, can be used to compare the CPU and GPU implementation
 *        of the function is consistent.
H
hedaoyuan 已提交
76 77 78 79
 *
 * Use case:
 *  // Initializes a test object, the corresponding cpu and gpu Function
 *  // are constructed according to FunctionName and FuncConfig.
80
 *  CpuGpuFuncCompare test(FunctionName, FuncConfig);
H
hedaoyuan 已提交
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
 *  // Prepare inputs and outputs arguments.
 *  // Here the input and output can not contain real data,
 *  // only contains the argument type and shape.
 *  test.addInputs(input1);
 *  test.addInputs(input2);
 *  test.addOutputs(output1);
 *  test.addOutputs(output2);
 *  // Run.
 *  // Will according to the type and shape of arguments(inputs_/outputs_),
 *  // automatic initialization cpu and gpu function required arguments
 *  // (cpuInputs_/cpuOutputs_/gpuInputs_/gpuOutputs_).
 *  // Call the CPU and GPU Function calculation results.
 *  // Compares CPU and GPU calculation results for consistency.
 *  test.run();
 */
96 97
template <DeviceType DType1, DeviceType DType2>
class Compare2Function {
98
public:
99 100 101 102 103 104 105 106 107 108 109 110 111 112
  typedef typename test::Allocator<DType1>::type Allocator1;
  typedef typename test::Allocator<DType2>::type Allocator2;
  typedef typename Tensor<real, DType1>::Vector Vector1;
  typedef typename Tensor<real, DType2>::Vector Vector2;
  typedef typename Tensor<real, DType1>::SparseMatrix SparseMatrix1;
  typedef typename Tensor<real, DType2>::SparseMatrix SparseMatrix2;

  Compare2Function(const std::string& name1,
                   const std::string& name2,
                   const FuncConfig& config)
      : function1_(FunctionBase::funcRegistrar_.createByType(name1)),
        function2_(FunctionBase::funcRegistrar_.createByType(name2)) {
    function1_->init(config);
    function2_->init(config);
Y
yangyaming 已提交
113
    initArgsCallBack_ = nullptr;
H
hedaoyuan 已提交
114 115
  }

116
  ~Compare2Function() {}
H
hedaoyuan 已提交
117 118 119 120 121

  // input need only contains shape, do not contains data.
  void addInputs(const BufferArg& input) {
    size_t size =
        input.shape().getElements() * sizeOfValuType(input.valueType());
122 123
    func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
    func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
H
hedaoyuan 已提交
124

125 126 127 128
    func1Inputs_.emplace_back(std::make_shared<BufferArg>(
        func1Memory_.back()->getBuf(), input.valueType(), input.shape()));
    func2Inputs_.emplace_back(std::make_shared<BufferArg>(
        func2Memory_.back()->getBuf(), input.valueType(), input.shape()));
H
hedaoyuan 已提交
129 130
  }

131 132 133 134 135 136
  // assume one copy of sequence is shared by different SequenceArgs
  void addSequence(const SequenceIdArg& input) {
    CHECK_EQ(input.shape().ndims(), 1UL);
    size_t batchSize = input.shape()[0];
    size_t numSeqs = batchSize / 10 + 1;
    size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32);
137 138 139 140 141 142
    func1Memory_.emplace_back(std::make_shared<Allocator1>(sizeId));
    func2Memory_.emplace_back(std::make_shared<Allocator2>(sizeId));
    seq1_ = std::make_shared<SequenceIdArg>(func1Memory_.back()->getBuf(),
                                            TensorShape{numSeqs + 1});
    seq2_ = std::make_shared<SequenceIdArg>(func2Memory_.back()->getBuf(),
                                            TensorShape{numSeqs + 1});
143
    /// init sequence Id
144
    initArg(*seq1_, batchSize);
145

146
    copyArg_(*seq1_, *seq2_);
147 148 149 150 151
  }

  void addInputs(const SequenceArg& input) {
    CHECK_EQ(input.shape().ndims(), 2UL);
    size_t batchSize = input.shape()[0];
152
    if (!seq1_ || !seq2_) {  // sequence not exist
153 154 155 156 157
      addSequence(SequenceIdArg(TensorShape{batchSize}));
    }

    size_t size =
        input.shape().getElements() * sizeOfValuType(input.valueType());
158 159
    func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
    func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
160 161

    /// SequenceArg
162 163
    func1Inputs_.emplace_back(
        std::make_shared<SequenceArg>(func1Memory_.back()->getBuf(),
164 165
                                      input.valueType(),
                                      input.shape(),
166 167 168
                                      *seq1_));
    func2Inputs_.emplace_back(
        std::make_shared<SequenceArg>(func2Memory_.back()->getBuf(),
169 170
                                      input.valueType(),
                                      input.shape(),
171
                                      *seq2_));
172 173
  }

Y
yangyaming 已提交
174 175 176 177
  void registerInitCallBack(std::function<void(BufferArg&, size_t)> callback) {
    initArgsCallBack_ = callback;
  }

H
hedaoyuan 已提交
178
  // output need only contains shape, do not contains data.
X
xutianbing 已提交
179
  void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) {
H
hedaoyuan 已提交
180 181
    size_t size =
        output.shape().getElements() * sizeOfValuType(output.valueType());
182 183
    func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
    func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
H
hedaoyuan 已提交
184

185 186
    func1Outputs_.emplace_back(
        std::make_shared<BufferArg>(func1Memory_.back()->getBuf(),
187 188 189
                                    output.valueType(),
                                    output.shape(),
                                    argType));
190 191
    func2Outputs_.emplace_back(
        std::make_shared<BufferArg>(func2Memory_.back()->getBuf(),
192 193 194
                                    output.valueType(),
                                    output.shape(),
                                    argType));
195 196
  }

197 198
  /// add and init output sparse matrix
  void addOutputs(const SparseMatrixArg& output, ArgType argType = ASSIGN_TO) {
199
    sparse1_ = std::make_shared<SparseMatrix1>(
200 201 202 203 204 205
        output.shape()[0],
        output.shape()[1],
        output.nnz(),
        static_cast<SparseValueType>(output.dataType()),
        static_cast<SparseFormat>(output.dataFormat()));

206
    sparse2_ = std::make_shared<SparseMatrix2>(
207 208 209 210 211
        output.shape()[0],
        output.shape()[1],
        output.nnz(),
        static_cast<SparseValueType>(output.dataType()),
        static_cast<SparseFormat>(output.dataFormat()));
212 213 214

    /// init sparse matrix
    hl_stream_t stream(HPPL_STREAM_1);
215 216
    sparse1_->randomizeUniform();
    sparse2_->copyFrom(*sparse1_, stream);
217 218
    hl_stream_synchronize(stream);

219 220 221 222
    func1Outputs_.emplace_back(
        std::make_shared<SparseMatrixArg>(*sparse1_, argType));
    func2Outputs_.emplace_back(
        std::make_shared<SparseMatrixArg>(*sparse2_, argType));
223 224
  }

225 226 227
  void addOutputs(const SequenceArg& output, ArgType argType = ASSIGN_TO) {
    CHECK_EQ(output.shape().ndims(), 2UL);
    size_t batchSize = output.shape()[0];
H
hedaoyuan 已提交
228

229
    if (!seq1_ || !seq2_) {  // sequence not exist
230 231
      addSequence(SequenceIdArg(TensorShape{batchSize}));
    }
H
hedaoyuan 已提交
232
    size_t size =
233
        output.shape().getElements() * sizeOfValuType(output.valueType());
234 235
    func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
    func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
H
hedaoyuan 已提交
236

237
    /// SequenceArg
238 239
    func1Outputs_.emplace_back(
        std::make_shared<SequenceArg>(func1Memory_.back()->getBuf(),
240 241
                                      output.valueType(),
                                      output.shape(),
242
                                      *seq1_,
243
                                      argType));
244 245
    func2Outputs_.emplace_back(
        std::make_shared<SequenceArg>(func2Memory_.back()->getBuf(),
246 247
                                      output.valueType(),
                                      output.shape(),
248
                                      *seq2_,
249
                                      argType));
H
hedaoyuan 已提交
250
  }
H
hedaoyuan 已提交
251

252
  void addInputs(const SparseMatrixArg& input) {
253
    sparse1_ = std::make_shared<SparseMatrix1>(
254 255 256 257 258 259
        input.shape()[0],
        input.shape()[1],
        input.nnz(),
        static_cast<SparseValueType>(input.dataType()),
        static_cast<SparseFormat>(input.dataFormat()));

260
    sparse2_ = std::make_shared<SparseMatrix2>(
261 262 263 264 265
        input.shape()[0],
        input.shape()[1],
        input.nnz(),
        static_cast<SparseValueType>(input.dataType()),
        static_cast<SparseFormat>(input.dataFormat()));
266 267 268

    /// init sparse matrix
    hl_stream_t stream(HPPL_STREAM_1);
269 270
    sparse1_->randomizeUniform();
    sparse2_->copyFrom(*sparse1_, stream);
271 272
    hl_stream_synchronize(stream);

273 274
    func1Inputs_.emplace_back(std::make_shared<SparseMatrixArg>(*sparse1_));
    func2Inputs_.emplace_back(std::make_shared<SparseMatrixArg>(*sparse2_));
275 276
  }

H
hedaoyuan 已提交
277 278
  void run() {
    // prepare cpu/gpu arguments
H
hedaoyuan 已提交
279
    initInputs();
H
hedaoyuan 已提交
280

281
    initOutputs();
H
hedaoyuan 已提交
282
    // function calculate
H
hedaoyuan 已提交
283 284 285 286 287 288 289
    auto callFunction = [](FunctionBase* function,
                           std::vector<BufferArgPtr>& inputs,
                           std::vector<BufferArgPtr>& outputs) {
      BufferArgs inArgs;
      BufferArgs outArgs;
      for (auto arg : inputs) {
        inArgs.addArg(*arg);
H
hedaoyuan 已提交
290
      }
H
hedaoyuan 已提交
291 292
      for (auto arg : outputs) {
        outArgs.addArg(*arg);
293
      }
H
hedaoyuan 已提交
294
      function->calc(inArgs, outArgs);
295 296
    };

297 298
    callFunction(function1_.get(), func1Inputs_, func1Outputs_);
    callFunction(function2_.get(), func2Inputs_, func2Outputs_);
299

300
    // check outputs
H
hedaoyuan 已提交
301
    compareOutputs();
302 303
  }

H
hedaoyuan 已提交
304
  std::shared_ptr<FunctionBase> getFunction1() const { return function1_; }
305

H
hedaoyuan 已提交
306
  std::shared_ptr<FunctionBase> getFunction2() const { return function2_; }
307

H
hedaoyuan 已提交
308
protected:
309 310
  // only init cpu argument, gpu argument copy from cpu argument.
  void initArg(BufferArg& arg) {
311
    Vector1 vector(arg.shape().getElements(), (real*)arg.data());
312 313 314 315 316
    vector.uniform(0.001, 1);
  }

  void initArg(SequenceArg& arg) {
    /// init only matrix
317
    Vector1 vector(arg.shape().getElements(), (real*)arg.data());
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
    vector.uniform(0.001, 1);
  }

  void initArg(SequenceIdArg& arg, size_t batchSize) {
    size_t numSeqs = arg.numSeqs();
    int* buf = reinterpret_cast<int*>(arg.data());
    int pos = 0;
    size_t maxLen = 2 * batchSize / numSeqs;
    for (int i = 0; i < (int)numSeqs; ++i) {
      int len = 1 + uniformRandom(std::min<int64_t>(
                        maxLen, batchSize - pos - numSeqs + i));
      buf[i] = pos;
      pos += len;
      VLOG(1) << " len=" << len;
    }
    buf[numSeqs] = batchSize;
  }

H
hedaoyuan 已提交
336
  void initInputs() {
337 338
    for (size_t i = 0; i < func1Inputs_.size(); i++) {
      if (func1Inputs_[i]->isSparseArg()) {
339 340 341
        continue;  /// sparse matrix already init
      }

342 343
      if (func1Inputs_[i]->isSequenceArg()) {
        initArg(dynamic_cast<SequenceArg&>(*func1Inputs_[i]));
344
      } else {
345
        initArg(*func1Inputs_[i]);
346
      }
H
hedaoyuan 已提交
347

Y
yangyaming 已提交
348 349 350 351
      if (initArgsCallBack_ != nullptr) {
        initArgsCallBack_(*func1Inputs_[i], i);
      }

352
      copyArg_(*func1Inputs_[i], *func2Inputs_[i]);
H
hedaoyuan 已提交
353
    }
H
hedaoyuan 已提交
354 355
  }

356
  void initOutputs() {
357 358
    for (size_t i = 0; i < func1Outputs_.size(); i++) {
      if (func1Outputs_[i]->isSparseArg()) {
359
        continue;  /// sparse matrix already init
360 361
      }

362 363
      if (func1Outputs_[i]->isSequenceArg()) {
        initArg(dynamic_cast<SequenceArg&>(*func1Outputs_[i]));
364
      } else {
365
        initArg(*func1Outputs_[i]);
366
      }
367

368
      copyArg_(*func1Outputs_[i], *func2Outputs_[i]);
369 370 371
    }
  }

H
hedaoyuan 已提交
372
  void compareOutputs() {
373
    for (size_t i = 0; i < func1Outputs_.size(); i++) {
H
hedaoyuan 已提交
374
      // TODO, Need a BufferCheck used to compare the two buffers.
375 376
      const auto cpu = func1Outputs_[i];
      const auto gpu = func2Outputs_[i];
377
      CHECK_EQ(cpu->numElements(), gpu->numElements());
378 379
      Vector1 cpuVector(cpu->numElements(), (real*)cpu->data());
      Vector2 gpuVector(gpu->numElements(), (real*)gpu->data());
H
hedaoyuan 已提交
380 381
      autotest::TensorCheckErr(cpuVector, gpuVector);
    }
H
hedaoyuan 已提交
382 383
  }

384
protected:
385 386 387 388 389 390 391 392 393 394 395 396 397
  std::shared_ptr<FunctionBase> function1_;
  std::shared_ptr<FunctionBase> function2_;
  std::vector<std::shared_ptr<Allocator1>> func1Memory_;
  std::vector<std::shared_ptr<Allocator2>> func2Memory_;
  std::vector<BufferArgPtr> func1Inputs_;
  std::vector<BufferArgPtr> func1Outputs_;
  std::vector<BufferArgPtr> func2Inputs_;
  std::vector<BufferArgPtr> func2Outputs_;
  std::shared_ptr<SparseMatrix1> sparse1_;
  std::shared_ptr<SparseMatrix2> sparse2_;
  std::shared_ptr<SequenceIdArg> seq1_;
  std::shared_ptr<SequenceIdArg> seq2_;
  test::CopyArgument<DType1, DType2> copyArg_;
Y
yangyaming 已提交
398
  std::function<void(BufferArg&, size_t)> initArgsCallBack_;
399 400 401 402 403 404 405 406 407
};

class CpuGpuFuncCompare
    : public Compare2Function<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> {
public:
  CpuGpuFuncCompare(const std::string& name, const FuncConfig& config)
      : Compare2Function(name + "-CPU", name + "-GPU", config) {}

  ~CpuGpuFuncCompare() {}
408 409 410
};

}  // namespace paddle