提交 1879332a 编写于 作者: H hedaoyuan

Modify FunctionCompare to Compare2Function to support comparison of two CPU functions.

上级 1846d9e1
...@@ -28,7 +28,7 @@ void testMatrixProjectionForward(int context_start, ...@@ -28,7 +28,7 @@ void testMatrixProjectionForward(int context_start,
std::max(0, (int)(context_start + context_length - 1)); std::max(0, (int)(context_start + context_length - 1));
if (pad == 0) is_padding = false; if (pad == 0) is_padding = false;
FunctionCompare test( CpuGpuFuncCompare test(
"ContextProjectionForward", "ContextProjectionForward",
FuncConfig() FuncConfig()
.set("context_length", context_length) .set("context_length", context_length)
...@@ -60,7 +60,7 @@ void testMatrixProjectionBackward(int context_start, ...@@ -60,7 +60,7 @@ void testMatrixProjectionBackward(int context_start,
std::max(0, (int)(context_start + context_length - 1)); std::max(0, (int)(context_start + context_length - 1));
if (pad == 0) is_padding = false; if (pad == 0) is_padding = false;
FunctionCompare test( CpuGpuFuncCompare test(
"ContextProjectionBackward", "ContextProjectionBackward",
FuncConfig() FuncConfig()
.set("context_length", context_length) .set("context_length", context_length)
......
...@@ -22,7 +22,7 @@ void testCosSimForward(size_t height_x, ...@@ -22,7 +22,7 @@ void testCosSimForward(size_t height_x,
size_t height_y, size_t height_y,
size_t width, size_t width,
real scale) { real scale) {
FunctionCompare test("CosSimForward", FuncConfig().set("scale", scale)); CpuGpuFuncCompare test("CosSimForward", FuncConfig().set("scale", scale));
// prepare input arguments // prepare input arguments
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width})); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, width}));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width})); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_y, width}));
...@@ -36,7 +36,7 @@ void testCosSimBackward(size_t height_x, ...@@ -36,7 +36,7 @@ void testCosSimBackward(size_t height_x,
size_t height_y, size_t height_y,
size_t width, size_t width,
real scale) { real scale) {
FunctionCompare test("CosSimBackward", FuncConfig().set("scale", scale)); CpuGpuFuncCompare test("CosSimBackward", FuncConfig().set("scale", scale));
// prepare input arguments // prepare input arguments
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1})); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1}));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1})); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{height_x, 1}));
......
...@@ -28,11 +28,11 @@ TEST(CrossMapNormal, real) { ...@@ -28,11 +28,11 @@ TEST(CrossMapNormal, real) {
<< " size=" << size; << " size=" << size;
// init Test object // init Test object
FunctionCompare test("CrossMapNormal", CpuGpuFuncCompare test("CrossMapNormal",
FuncConfig() FuncConfig()
.set("size", size) .set("size", size)
.set("scale", (real)1.5) .set("scale", (real)1.5)
.set("pow", (real)0.5)); .set("pow", (real)0.5));
// prepare input arguments // prepare input arguments
TensorShape shape{numSamples, channels, imgSizeH, imgSizeW}; TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
...@@ -57,11 +57,11 @@ TEST(CrossMapNormalGrad, real) { ...@@ -57,11 +57,11 @@ TEST(CrossMapNormalGrad, real) {
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW
<< " size=" << size; << " size=" << size;
FunctionCompare test("CrossMapNormalGrad", CpuGpuFuncCompare test("CrossMapNormalGrad",
FuncConfig() FuncConfig()
.set("size", size) .set("size", size)
.set("scale", (real)1.5) .set("scale", (real)1.5)
.set("pow", (real)0.5)); .set("pow", (real)0.5));
TensorShape shape{numSamples, channels, imgSizeH, imgSizeW}; TensorShape shape{numSamples, channels, imgSizeH, imgSizeW};
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape)); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, shape));
......
...@@ -22,14 +22,62 @@ namespace paddle { ...@@ -22,14 +22,62 @@ namespace paddle {
typedef std::shared_ptr<BufferArg> BufferArgPtr; typedef std::shared_ptr<BufferArg> BufferArgPtr;
namespace test {
template <DeviceType DType>
struct Allocator;
template <>
struct Allocator<DEVICE_TYPE_CPU> {
using type = CpuMemoryHandle;
};
template <>
struct Allocator<DEVICE_TYPE_GPU> {
using type = GpuMemoryHandle;
};
// Copy argument1 to argument2
template <DeviceType DType1, DeviceType DType2>
class CopyArgument {
public:
void operator()(const BufferArg& arg1, BufferArg& arg2) {
CHECK_EQ(arg1.valueType(), arg2.valueType());
CHECK_LE(arg1.shape().getElements(), arg2.shape().getElements());
if (arg1.valueType() == VALUE_TYPE_INT32) {
IVectorPtr vector1 =
IVector::create((int*)arg1.data(),
arg1.shape().getElements(),
DType1 == DEVICE_TYPE_CPU ? false : true);
IVectorPtr vector2 =
IVector::create((int*)arg2.data(),
arg2.shape().getElements(),
DType2 == DEVICE_TYPE_CPU ? false : true);
vector2->copyFrom(*vector1);
} else {
VectorPtr vector1 =
Vector::create((real*)arg1.data(),
arg1.shape().getElements(),
DType1 == DEVICE_TYPE_CPU ? false : true);
VectorPtr vector2 =
Vector::create((real*)arg2.data(),
arg2.shape().getElements(),
DType2 == DEVICE_TYPE_CPU ? false : true);
vector2->copyFrom(*vector1);
}
}
};
} // namespace test
/** /**
* \brief A class for comparing CPU and GPU implementations of Function. * \brief A class for comparing two Functions of different implementations.
* * For example, can be used to compare the CPU and GPU implementation
* of the function is consistent.
* *
* Use case: * Use case:
* // Initializes a test object, the corresponding cpu and gpu Function * // Initializes a test object, the corresponding cpu and gpu Function
* // are constructed according to FunctionName and FuncConfig. * // are constructed according to FunctionName and FuncConfig.
* FunctionCompare test(FunctionName, FuncConfig); * CpuGpuFuncCompare test(FunctionName, FuncConfig);
* // Prepare inputs and outputs arguments. * // Prepare inputs and outputs arguments.
* // Here the input and output can not contain real data, * // Here the input and output can not contain real data,
* // only contains the argument type and shape. * // only contains the argument type and shape.
...@@ -45,28 +93,38 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr; ...@@ -45,28 +93,38 @@ typedef std::shared_ptr<BufferArg> BufferArgPtr;
* // Compares CPU and GPU calculation results for consistency. * // Compares CPU and GPU calculation results for consistency.
* test.run(); * test.run();
*/ */
class FunctionCompare { template <DeviceType DType1, DeviceType DType2>
class Compare2Function {
public: public:
FunctionCompare(const std::string& name, const FuncConfig& config) typedef typename test::Allocator<DType1>::type Allocator1;
: cpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-CPU")), typedef typename test::Allocator<DType2>::type Allocator2;
gpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-GPU")) { typedef typename Tensor<real, DType1>::Vector Vector1;
cpuFunc_->init(config); typedef typename Tensor<real, DType2>::Vector Vector2;
gpuFunc_->init(config); typedef typename Tensor<real, DType1>::SparseMatrix SparseMatrix1;
typedef typename Tensor<real, DType2>::SparseMatrix SparseMatrix2;
Compare2Function(const std::string& name1,
const std::string& name2,
const FuncConfig& config)
: function1_(FunctionBase::funcRegistrar_.createByType(name1)),
function2_(FunctionBase::funcRegistrar_.createByType(name2)) {
function1_->init(config);
function2_->init(config);
} }
~FunctionCompare() {} ~Compare2Function() {}
// input need only contains shape, do not contains data. // input need only contains shape, do not contains data.
void addInputs(const BufferArg& input) { void addInputs(const BufferArg& input) {
size_t size = size_t size =
input.shape().getElements() * sizeOfValuType(input.valueType()); input.shape().getElements() * sizeOfValuType(input.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size)); func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
cpuInputs_.emplace_back(std::make_shared<BufferArg>( func1Inputs_.emplace_back(std::make_shared<BufferArg>(
cpuMemory_.back()->getBuf(), input.valueType(), input.shape())); func1Memory_.back()->getBuf(), input.valueType(), input.shape()));
gpuInputs_.emplace_back(std::make_shared<BufferArg>( func2Inputs_.emplace_back(std::make_shared<BufferArg>(
gpuMemory_.back()->getBuf(), input.valueType(), input.shape())); func2Memory_.back()->getBuf(), input.valueType(), input.shape()));
} }
// assume one copy of sequence is shared by different SequenceArgs // assume one copy of sequence is shared by different SequenceArgs
...@@ -75,62 +133,57 @@ public: ...@@ -75,62 +133,57 @@ public:
size_t batchSize = input.shape()[0]; size_t batchSize = input.shape()[0];
size_t numSeqs = batchSize / 10 + 1; size_t numSeqs = batchSize / 10 + 1;
size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32);
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(sizeId)); func1Memory_.emplace_back(std::make_shared<Allocator1>(sizeId));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(sizeId)); func2Memory_.emplace_back(std::make_shared<Allocator2>(sizeId));
cpuSeq_ = std::make_shared<SequenceIdArg>(cpuMemory_.back()->getBuf(), seq1_ = std::make_shared<SequenceIdArg>(func1Memory_.back()->getBuf(),
TensorShape{numSeqs + 1}); TensorShape{numSeqs + 1});
gpuSeq_ = std::make_shared<SequenceIdArg>(gpuMemory_.back()->getBuf(), seq2_ = std::make_shared<SequenceIdArg>(func2Memory_.back()->getBuf(),
TensorShape{numSeqs + 1}); TensorShape{numSeqs + 1});
/// init sequence Id /// init sequence Id
initArg(*cpuSeq_, batchSize); initArg(*seq1_, batchSize);
// todo(tianbing), delete it copyArg_(*seq1_, *seq2_);
CHECK_EQ(cpuSeq_->shape().getElements(), cpuSeq_->numSeqs() + 1);
CpuIVector cpuSeq(cpuSeq_->shape().getElements(), (int*)cpuSeq_->data());
GpuIVector gpuSeq(gpuSeq_->shape().getElements(), (int*)gpuSeq_->data());
gpuSeq.copyFrom(cpuSeq);
} }
void addInputs(const SequenceArg& input) { void addInputs(const SequenceArg& input) {
CHECK_EQ(input.shape().ndims(), 2UL); CHECK_EQ(input.shape().ndims(), 2UL);
size_t batchSize = input.shape()[0]; size_t batchSize = input.shape()[0];
if (!cpuSeq_ || !gpuSeq_) { // sequence not exist if (!seq1_ || !seq2_) { // sequence not exist
addSequence(SequenceIdArg(TensorShape{batchSize})); addSequence(SequenceIdArg(TensorShape{batchSize}));
} }
size_t size = size_t size =
input.shape().getElements() * sizeOfValuType(input.valueType()); input.shape().getElements() * sizeOfValuType(input.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size)); func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
/// SequenceArg /// SequenceArg
cpuInputs_.emplace_back( func1Inputs_.emplace_back(
std::make_shared<SequenceArg>(cpuMemory_.back()->getBuf(), std::make_shared<SequenceArg>(func1Memory_.back()->getBuf(),
input.valueType(), input.valueType(),
input.shape(), input.shape(),
*cpuSeq_)); *seq1_));
gpuInputs_.emplace_back( func2Inputs_.emplace_back(
std::make_shared<SequenceArg>(gpuMemory_.back()->getBuf(), std::make_shared<SequenceArg>(func2Memory_.back()->getBuf(),
input.valueType(), input.valueType(),
input.shape(), input.shape(),
*gpuSeq_)); *seq2_));
} }
// output need only contains shape, do not contains data. // output need only contains shape, do not contains data.
void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) { void addOutputs(const BufferArg& output, ArgType argType = ASSIGN_TO) {
size_t size = size_t size =
output.shape().getElements() * sizeOfValuType(output.valueType()); output.shape().getElements() * sizeOfValuType(output.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size)); func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
cpuOutputs_.emplace_back( func1Outputs_.emplace_back(
std::make_shared<BufferArg>(cpuMemory_.back()->getBuf(), std::make_shared<BufferArg>(func1Memory_.back()->getBuf(),
output.valueType(), output.valueType(),
output.shape(), output.shape(),
argType)); argType));
gpuOutputs_.emplace_back( func2Outputs_.emplace_back(
std::make_shared<BufferArg>(gpuMemory_.back()->getBuf(), std::make_shared<BufferArg>(func2Memory_.back()->getBuf(),
output.valueType(), output.valueType(),
output.shape(), output.shape(),
argType)); argType));
...@@ -138,14 +191,14 @@ public: ...@@ -138,14 +191,14 @@ public:
/// add and init output sparse matrix /// add and init output sparse matrix
void addOutputs(const SparseMatrixArg& output, ArgType argType = ASSIGN_TO) { void addOutputs(const SparseMatrixArg& output, ArgType argType = ASSIGN_TO) {
cpuSparse_ = std::make_shared<CpuSparseMatrix>( sparse1_ = std::make_shared<SparseMatrix1>(
output.shape()[0], output.shape()[0],
output.shape()[1], output.shape()[1],
output.nnz(), output.nnz(),
static_cast<SparseValueType>(output.dataType()), static_cast<SparseValueType>(output.dataType()),
static_cast<SparseFormat>(output.dataFormat())); static_cast<SparseFormat>(output.dataFormat()));
gpuSparse_ = std::make_shared<GpuSparseMatrix>( sparse2_ = std::make_shared<SparseMatrix2>(
output.shape()[0], output.shape()[0],
output.shape()[1], output.shape()[1],
output.nnz(), output.nnz(),
...@@ -154,52 +207,52 @@ public: ...@@ -154,52 +207,52 @@ public:
/// init sparse matrix /// init sparse matrix
hl_stream_t stream(HPPL_STREAM_1); hl_stream_t stream(HPPL_STREAM_1);
cpuSparse_->randomizeUniform(); sparse1_->randomizeUniform();
gpuSparse_->copyFrom(*cpuSparse_, stream); sparse2_->copyFrom(*sparse1_, stream);
hl_stream_synchronize(stream); hl_stream_synchronize(stream);
cpuOutputs_.emplace_back( func1Outputs_.emplace_back(
std::make_shared<SparseMatrixArg>(*cpuSparse_, argType)); std::make_shared<SparseMatrixArg>(*sparse1_, argType));
gpuOutputs_.emplace_back( func2Outputs_.emplace_back(
std::make_shared<SparseMatrixArg>(*gpuSparse_, argType)); std::make_shared<SparseMatrixArg>(*sparse2_, argType));
} }
void addOutputs(const SequenceArg& output, ArgType argType = ASSIGN_TO) { void addOutputs(const SequenceArg& output, ArgType argType = ASSIGN_TO) {
CHECK_EQ(output.shape().ndims(), 2UL); CHECK_EQ(output.shape().ndims(), 2UL);
size_t batchSize = output.shape()[0]; size_t batchSize = output.shape()[0];
if (!cpuSeq_ || !gpuSeq_) { // sequence not exist if (!seq1_ || !seq2_) { // sequence not exist
addSequence(SequenceIdArg(TensorShape{batchSize})); addSequence(SequenceIdArg(TensorShape{batchSize}));
} }
size_t size = size_t size =
output.shape().getElements() * sizeOfValuType(output.valueType()); output.shape().getElements() * sizeOfValuType(output.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); func1Memory_.emplace_back(std::make_shared<Allocator1>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size)); func2Memory_.emplace_back(std::make_shared<Allocator2>(size));
/// SequenceArg /// SequenceArg
cpuOutputs_.emplace_back( func1Outputs_.emplace_back(
std::make_shared<SequenceArg>(cpuMemory_.back()->getBuf(), std::make_shared<SequenceArg>(func1Memory_.back()->getBuf(),
output.valueType(), output.valueType(),
output.shape(), output.shape(),
*cpuSeq_, *seq1_,
argType)); argType));
gpuOutputs_.emplace_back( func2Outputs_.emplace_back(
std::make_shared<SequenceArg>(gpuMemory_.back()->getBuf(), std::make_shared<SequenceArg>(func2Memory_.back()->getBuf(),
output.valueType(), output.valueType(),
output.shape(), output.shape(),
*gpuSeq_, *seq2_,
argType)); argType));
} }
void addInputs(const SparseMatrixArg& input) { void addInputs(const SparseMatrixArg& input) {
cpuSparse_ = std::make_shared<CpuSparseMatrix>( sparse1_ = std::make_shared<SparseMatrix1>(
input.shape()[0], input.shape()[0],
input.shape()[1], input.shape()[1],
input.nnz(), input.nnz(),
static_cast<SparseValueType>(input.dataType()), static_cast<SparseValueType>(input.dataType()),
static_cast<SparseFormat>(input.dataFormat())); static_cast<SparseFormat>(input.dataFormat()));
gpuSparse_ = std::make_shared<GpuSparseMatrix>( sparse2_ = std::make_shared<SparseMatrix2>(
input.shape()[0], input.shape()[0],
input.shape()[1], input.shape()[1],
input.nnz(), input.nnz(),
...@@ -208,12 +261,12 @@ public: ...@@ -208,12 +261,12 @@ public:
/// init sparse matrix /// init sparse matrix
hl_stream_t stream(HPPL_STREAM_1); hl_stream_t stream(HPPL_STREAM_1);
cpuSparse_->randomizeUniform(); sparse1_->randomizeUniform();
gpuSparse_->copyFrom(*cpuSparse_, stream); sparse2_->copyFrom(*sparse1_, stream);
hl_stream_synchronize(stream); hl_stream_synchronize(stream);
cpuInputs_.emplace_back(std::make_shared<SparseMatrixArg>(*cpuSparse_)); func1Inputs_.emplace_back(std::make_shared<SparseMatrixArg>(*sparse1_));
gpuInputs_.emplace_back(std::make_shared<SparseMatrixArg>(*gpuSparse_)); func2Inputs_.emplace_back(std::make_shared<SparseMatrixArg>(*sparse2_));
} }
void run() { void run() {
...@@ -236,27 +289,27 @@ public: ...@@ -236,27 +289,27 @@ public:
function->calc(inArgs, outArgs); function->calc(inArgs, outArgs);
}; };
callFunction(cpuFunc_.get(), cpuInputs_, cpuOutputs_); callFunction(function1_.get(), func1Inputs_, func1Outputs_);
callFunction(gpuFunc_.get(), gpuInputs_, gpuOutputs_); callFunction(function2_.get(), func2Inputs_, func2Outputs_);
// check outputs // check outputs
compareOutputs(); compareOutputs();
} }
std::shared_ptr<FunctionBase> getCpuFunction() const { return cpuFunc_; } std::shared_ptr<FunctionBase> getCpuFunction() const { return function1_; }
std::shared_ptr<FunctionBase> getGpuFunction() const { return gpuFunc_; } std::shared_ptr<FunctionBase> getGpuFunction() const { return function2_; }
protected: protected:
// only init cpu argument, gpu argument copy from cpu argument. // only init cpu argument, gpu argument copy from cpu argument.
void initArg(BufferArg& arg) { void initArg(BufferArg& arg) {
CpuVector vector(arg.shape().getElements(), (real*)arg.data()); Vector1 vector(arg.shape().getElements(), (real*)arg.data());
vector.uniform(0.001, 1); vector.uniform(0.001, 1);
} }
void initArg(SequenceArg& arg) { void initArg(SequenceArg& arg) {
/// init only matrix /// init only matrix
CpuVector vector(arg.shape().getElements(), (real*)arg.data()); Vector1 vector(arg.shape().getElements(), (real*)arg.data());
vector.uniform(0.001, 1); vector.uniform(0.001, 1);
} }
...@@ -276,73 +329,72 @@ protected: ...@@ -276,73 +329,72 @@ protected:
} }
void initInputs() { void initInputs() {
for (size_t i = 0; i < cpuInputs_.size(); i++) { for (size_t i = 0; i < func1Inputs_.size(); i++) {
if (cpuInputs_[i]->isSparseArg()) { if (func1Inputs_[i]->isSparseArg()) {
continue; /// sparse matrix already init continue; /// sparse matrix already init
} }
if (cpuInputs_[i]->isSequenceArg()) { if (func1Inputs_[i]->isSequenceArg()) {
initArg(dynamic_cast<SequenceArg&>(*cpuInputs_[i])); initArg(dynamic_cast<SequenceArg&>(*func1Inputs_[i]));
} else { } else {
initArg(*cpuInputs_[i]); initArg(*func1Inputs_[i]);
} }
// TODO: Need a BufferCopy used to copy from one BufferArg to another.
CpuVector cpuVector(cpuInputs_[i]->shape().getElements(),
(real*)cpuInputs_[i]->data());
GpuVector gpuVector(gpuInputs_[i]->shape().getElements(),
(real*)gpuInputs_[i]->data());
gpuVector.copyFrom(cpuVector); copyArg_(*func1Inputs_[i], *func2Inputs_[i]);
} }
} }
void initOutputs() { void initOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) { for (size_t i = 0; i < func1Outputs_.size(); i++) {
if (cpuOutputs_[i]->isSparseArg()) { if (func1Outputs_[i]->isSparseArg()) {
continue; /// sparse matrix already init continue; /// sparse matrix already init
} }
if (cpuOutputs_[i]->isSequenceArg()) { if (func1Outputs_[i]->isSequenceArg()) {
initArg(dynamic_cast<SequenceArg&>(*cpuOutputs_[i])); initArg(dynamic_cast<SequenceArg&>(*func1Outputs_[i]));
} else { } else {
initArg(*cpuOutputs_[i]); initArg(*func1Outputs_[i]);
} }
// TODO: Need a BufferCopy used to copy from one BufferArg to another. copyArg_(*func1Outputs_[i], *func2Outputs_[i]);
CpuVector cpuVector(cpuOutputs_[i]->shape().getElements(),
(real*)cpuOutputs_[i]->data());
GpuVector gpuVector(gpuOutputs_[i]->shape().getElements(),
(real*)gpuOutputs_[i]->data());
gpuVector.copyFrom(cpuVector);
} }
} }
void compareOutputs() { void compareOutputs() {
for (size_t i = 0; i < cpuOutputs_.size(); i++) { for (size_t i = 0; i < func1Outputs_.size(); i++) {
// TODO, Need a BufferCheck used to compare the two buffers. // TODO, Need a BufferCheck used to compare the two buffers.
const auto cpu = cpuOutputs_[i]; const auto cpu = func1Outputs_[i];
const auto gpu = gpuOutputs_[i]; const auto gpu = func2Outputs_[i];
CHECK_EQ(cpu->numElements(), gpu->numElements()); CHECK_EQ(cpu->numElements(), gpu->numElements());
CpuVector cpuVector(cpu->numElements(), (real*)cpu->data()); Vector1 cpuVector(cpu->numElements(), (real*)cpu->data());
GpuVector gpuVector(gpu->numElements(), (real*)gpu->data()); Vector2 gpuVector(gpu->numElements(), (real*)gpu->data());
autotest::TensorCheckErr(cpuVector, gpuVector); autotest::TensorCheckErr(cpuVector, gpuVector);
} }
} }
protected: protected:
std::shared_ptr<FunctionBase> cpuFunc_; std::shared_ptr<FunctionBase> function1_;
std::shared_ptr<FunctionBase> gpuFunc_; std::shared_ptr<FunctionBase> function2_;
std::vector<CpuMemHandlePtr> cpuMemory_; std::vector<std::shared_ptr<Allocator1>> func1Memory_;
std::vector<GpuMemHandlePtr> gpuMemory_; std::vector<std::shared_ptr<Allocator2>> func2Memory_;
std::vector<BufferArgPtr> cpuInputs_; std::vector<BufferArgPtr> func1Inputs_;
std::vector<BufferArgPtr> cpuOutputs_; std::vector<BufferArgPtr> func1Outputs_;
std::vector<BufferArgPtr> gpuInputs_; std::vector<BufferArgPtr> func2Inputs_;
std::vector<BufferArgPtr> gpuOutputs_; std::vector<BufferArgPtr> func2Outputs_;
std::shared_ptr<CpuSparseMatrix> cpuSparse_; std::shared_ptr<SparseMatrix1> sparse1_;
std::shared_ptr<GpuSparseMatrix> gpuSparse_; std::shared_ptr<SparseMatrix2> sparse2_;
std::shared_ptr<SequenceIdArg> cpuSeq_; std::shared_ptr<SequenceIdArg> seq1_;
std::shared_ptr<SequenceIdArg> gpuSeq_; std::shared_ptr<SequenceIdArg> seq2_;
test::CopyArgument<DType1, DType2> copyArg_;
};
class CpuGpuFuncCompare
: public Compare2Function<DEVICE_TYPE_CPU, DEVICE_TYPE_GPU> {
public:
CpuGpuFuncCompare(const std::string& name, const FuncConfig& config)
: Compare2Function(name + "-CPU", name + "-GPU", config) {}
~CpuGpuFuncCompare() {}
}; };
} // namespace paddle } // namespace paddle
...@@ -35,7 +35,7 @@ void testFuncDDDMatrix( ...@@ -35,7 +35,7 @@ void testFuncDDDMatrix(
size_t heightC = dimM; size_t heightC = dimM;
size_t widthC = dimN; size_t widthC = dimN;
// init Test object // init Test object
FunctionCompare test( CpuGpuFuncCompare test(
"MulOp", FuncConfig().set("aTrans", transa).set("bTrans", transb)); "MulOp", FuncConfig().set("aTrans", transa).set("bTrans", transb));
// prepare input arguments // prepare input arguments
/// matrix A : HA * WA /// matrix A : HA * WA
...@@ -81,8 +81,8 @@ void testFuncDSparseDMatrix( ...@@ -81,8 +81,8 @@ void testFuncDSparseDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0; real scaleT = 1.0;
// init Test object // init Test object
FunctionCompare test("MulOp", CpuGpuFuncCompare test(
FuncConfig().set("aTrans", false).set("bTrans", false)); "MulOp", FuncConfig().set("aTrans", false).set("bTrans", false));
// prepare input arguments // prepare input arguments
/// sparse matrix A : M * K /// sparse matrix A : M * K
test.addInputs(SparseMatrixArg( test.addInputs(SparseMatrixArg(
...@@ -126,8 +126,8 @@ void testFuncDDSparseMatrix( ...@@ -126,8 +126,8 @@ void testFuncDDSparseMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0; real scaleT = 1.0;
// init Test object // init Test object
FunctionCompare test("MulOp", CpuGpuFuncCompare test(
FuncConfig().set("aTrans", false).set("bTrans", false)); "MulOp", FuncConfig().set("aTrans", false).set("bTrans", false));
// prepare input arguments // prepare input arguments
/// matrix A : M * K /// matrix A : M * K
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK})); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK}));
...@@ -172,8 +172,8 @@ void testFuncSparseDDMatrix( ...@@ -172,8 +172,8 @@ void testFuncSparseDDMatrix(
size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) { size_t dimM, size_t dimN, size_t dimK, size_t nnz, SparseFormat FORMAT) {
real scaleT = 1.0; real scaleT = 1.0;
// init Test object // init Test object
FunctionCompare test("MulOp", CpuGpuFuncCompare test(
FuncConfig().set("aTrans", false).set("bTrans", false)); "MulOp", FuncConfig().set("aTrans", false).set("bTrans", false));
// prepare input arguments // prepare input arguments
/// matrix A : M * K /// matrix A : M * K
test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK})); test.addInputs(BufferArg(VALUE_TYPE_FLOAT, TensorShape{dimM, dimK}));
......
...@@ -25,7 +25,7 @@ TEST(Pad, real) { ...@@ -25,7 +25,7 @@ TEST(Pad, real) {
VLOG(3) << " numSamples=" << numSamples << " channels=" << channels VLOG(3) << " numSamples=" << numSamples << " channels=" << channels
<< " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW; << " imgSizeH=" << imgSizeH << " imgSizeW=" << imgSizeW;
for (bool test_grad : {false, true}) { for (bool test_grad : {false, true}) {
FunctionCompare compare( CpuGpuFuncCompare compare(
test_grad ? "PadGrad" : "Pad", test_grad ? "PadGrad" : "Pad",
FuncConfig() FuncConfig()
.set<std::vector<uint32_t>>("channel", {2, 3}) .set<std::vector<uint32_t>>("channel", {2, 3})
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册