提交 67817433 编写于 作者: H hedaoyuan

Implement the FunctionTest

上级 039c0bf2
...@@ -75,8 +75,17 @@ public: ...@@ -75,8 +75,17 @@ public:
// Tensor can be Matrix, Vector, IVector. // Tensor can be Matrix, Vector, IVector.
// For inputs, do not need argType. // For inputs, do not need argType.
// For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO. // For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO.
template <typename Tensor> void addArg(const Matrix& arg, ArgType argType = UNSPECIFIED) {
void addArg(const Tensor& arg, ArgType argType = UNSPECIFIED) { _args_.push_back(new BufferArg(arg, argType));
addArg(*_args_.back());
}
void addArg(const Vector& arg, ArgType argType = UNSPECIFIED) {
_args_.push_back(new BufferArg(arg, argType));
addArg(*_args_.back());
}
void addArg(const IVector& arg, ArgType argType = UNSPECIFIED) {
_args_.push_back(new BufferArg(arg, argType)); _args_.push_back(new BufferArg(arg, argType));
addArg(*_args_.back()); addArg(*_args_.back());
} }
......
...@@ -19,6 +19,8 @@ limitations under the License. */ ...@@ -19,6 +19,8 @@ limitations under the License. */
namespace paddle { namespace paddle {
typedef std::shared_ptr<BufferArg> BufferArgPtr;
/** /**
* \brief A class for comparing CPU and GPU implementations of Function. * \brief A class for comparing CPU and GPU implementations of Function.
* *
...@@ -45,143 +47,121 @@ namespace paddle { ...@@ -45,143 +47,121 @@ namespace paddle {
class FunctionCompare { class FunctionCompare {
public: public:
FunctionCompare(const std::string& name, const FuncConfig& config) FunctionCompare(const std::string& name, const FuncConfig& config)
: cpu(FunctionBase::funcRegistrar_.createByType(name + "-CPU")), : cpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-CPU")),
gpu(FunctionBase::funcRegistrar_.createByType(name + "-GPU")) { gpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-GPU")) {
cpu->init(config); cpuFunc_->init(config);
gpu->init(config); gpuFunc_->init(config);
}
~FunctionCompare() {}
// input need only contains shape, do not contains data.
void addInputs(const BufferArg& input) {
size_t size =
input.shape().getElements() * sizeOfValuType(input.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuInputs_.emplace_back(std::make_shared<BufferArg>(
cpuMemory_.back()->getBuf(), input.valueType(), input.shape()));
gpuInputs_.emplace_back(std::make_shared<BufferArg>(
gpuMemory_.back()->getBuf(), input.valueType(), input.shape()));
}
// output need only contains shape, do not contains data.
void addOutputs(const BufferArg& output) {
size_t size =
output.shape().getElements() * sizeOfValuType(output.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuOutputs_.emplace_back(
std::make_shared<BufferArg>(cpuMemory_.back()->getBuf(),
output.valueType(),
output.shape(),
ASSIGN_TO));
gpuOutputs_.emplace_back(
std::make_shared<BufferArg>(gpuMemory_.back()->getBuf(),
output.valueType(),
output.shape(),
ASSIGN_TO));
} }
void addInputs(const BufferArg& input) { inputs.push_back(input); } void addInputs(const SequenceArg& input) {
size_t batchSize = input.shape()[0];
size_t numSeqs = batchSize / 10 + 1;
size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32);
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(sizeId));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(sizeId));
void addOutputs(const BufferArg& output) { outputs.push_back(output); } TensorShape seqsId({numSeqs + 1});
// void* cpuBuffer = cpuMemory_.back()->getBuf();
// void* gpuBuffer = gpuMemory_.back()->getBuf();
size_t size =
input.shape().getElements() * sizeOfValuType(input.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
// TODO: need be implemented.
}
void run() { void run() {
// prepare cpu/gpu arguments // prepare cpu/gpu arguments
prepareArgs(); initInputs();
// function calculate // function calculate
cpu->calc(cpuInputs, cpuOutputs); auto callFunction = [](FunctionBase* function,
gpu->calc(gpuInputs, gpuOutputs); std::vector<BufferArgPtr>& inputs,
std::vector<BufferArgPtr>& outputs) {
// check outputs and inouts BufferArgs inArgs;
auto checkArgs = [=](const BufferArgs& cpuArgs, const BufferArgs& gpuArgs) { BufferArgs outArgs;
for (size_t i = 0; i < cpuArgs.size(); i++) { for (auto arg : inputs) {
auto cpu = cpuArgs[i]; inArgs.addArg(*arg);
auto gpu = gpuArgs[i];
CpuVector cpuVector(cpu.shape().getElements(), (real*)cpu.getData());
GpuVector gpuVector(cpu.shape().getElements(), (real*)gpu.getData());
autotest::TensorCheckErr(cpuVector, gpuVector);
} }
}; for (auto arg : outputs) {
checkArgs(cpuOutputs, gpuOutputs); outArgs.addArg(*arg);
}
#if 0
void cmpWithArg(const Arguments& inputs,
const Arguments& outputs,
const Arguments& inouts) {
// init cpu and gpu arguments
auto initArgs = [=](
Arguments& cpuArgs, Arguments& gpuArgs, const Arguments& inArgs) {
for (const auto arg : inArgs) {
size_t size = sizeof(real);
for (const auto dim : arg.dims_) {
size *= dim;
}
if (arg.getData()) {
// todo(tianbing), waste unnecessary mem here
cpuMemory.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_));
gpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_));
// already init outside
} else {
cpuMemory.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuArgs.emplace_back(
Tensor((real*)cpuMemory.back()->getBuf(), arg.dims_));
gpuArgs.emplace_back(
Tensor((real*)gpuMemory.back()->getBuf(), arg.dims_));
// will use an api to refactor this code.
CpuVector cpuVector(size / sizeof(real),
(real*)cpuArgs.back().getData());
GpuVector gpuVector(size / sizeof(real),
(real*)gpuArgs.back().getData());
cpuVector.uniform(0.001, 1);
gpuVector.copyFrom(cpuVector);
}
} }
function->calc(inArgs, outArgs);
}; };
initArgs(cpuInputs, gpuInputs, inputs);
initArgs(cpuOutputs, gpuOutputs, outputs);
// function calculate callFunction(cpuFunc_.get(), cpuInputs_, cpuOutputs_);
cpu->calc(cpuInputs, cpuOutputs); callFunction(gpuFunc_.get(), gpuInputs_, gpuOutputs_);
gpu->calc(gpuInputs, gpuOutputs);
// check outputs and inouts // check outputs and inouts
auto checkArgs = [=](const Arguments& cpuArgs, const Arguments& gpuArgs) { compareOutputs();
for (size_t i = 0; i < cpuArgs.size(); i++) {
auto cpu = cpuArgs[i];
auto gpu = gpuArgs[i];
size_t size = 1;
for (auto dim : cpu.dims_) {
size *= dim;
}
CpuVector cpuVector(size, (real*)cpu.getData());
GpuVector gpuVector(size, (real*)gpu.getData());
autotest::TensorCheckErr(cpuVector, gpuVector);
}
};
checkArgs(cpuOutputs, gpuOutputs);
} }
#endif
std::shared_ptr<FunctionBase> getCpuFunction() const { return cpu; } std::shared_ptr<FunctionBase> getCpuFunction() const { return cpuFunc_; }
std::shared_ptr<FunctionBase> getGpuFunction() const { return gpu; } std::shared_ptr<FunctionBase> getGpuFunction() const { return gpuFunc_; }
protected: protected:
void prepareArgs() { void initInputs() {
// TODO, if inputs has data for (size_t i = 0; i < cpuInputs_.size(); i++) {
} initArg(*cpuInputs_[i]);
void createArg(BufferArgs& cpuArgs, BufferArgs& gpuArgs, BufferArg& arg) { // TODO: Need a BufferCopy used to copy from one BufferArg to another.
size_t size = arg.shape().getElements() * sizeOfValuType(arg.valueType()); CpuVector cpuVector(cpuInputs_[i]->shape().getElements(),
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); (real*)cpuInputs_[i]->data());
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size)); GpuVector gpuVector(gpuInputs_[i]->shape().getElements(),
(real*)gpuInputs_[i]->data());
cpuArgs.emplace_back( gpuVector.copyFrom(cpuVector);
BufferArg(cpuMemory_.back()->getBuf()), arg.valueType(), arg.shape()); }
gpuArgs.emplace_back(
BufferArg(gpuMemory_.back()->getBuf()), arg.valueType(), arg.shape());
} }
void createArg(BufferArgs& cpuArgs, BufferArgs& gpuArgs, SequenceArg& arg) { void compareOutputs() {
size_t batchSize = arg.shape()[0]; for (size_t i = 0; i < cpuOutputs_.size(); i++) {
size_t numSeqs = batchSize / 10 + 1; // TODO, Need a BufferCheck used to compare the two buffers.
auto cpu = cpuOutputs_[i];
auto gpu = gpuOutputs_[i];
CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data());
GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data());
size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); autotest::TensorCheckErr(cpuVector, gpuVector);
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); }
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
TensorShape seqsId({numSeqs + 1});
void* cpuBuffer = cpuMemory_.back()->getBuf();
void* gpuBuffer = gpuMemory_.back()->getBuf();
size_t size = arg.shape().getElements() * sizeOfValuType(arg.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuArgs.emplace_back(SequenceArg(cpuMemory_.back()->getBuf(),
arg.valueType(),
arg.shape(),
SequenceIdArg(cpuBuffer, seqsId)));
gpuArgs.emplace_back(SequenceArg(gpuMemory_.back()->getBuf(),
arg.valueType(),
arg.shape(),
SequenceIdArg(gpuBuffer, seqsId)));
} }
// only init cpu argument, gpu argument copy from cpu argument. // only init cpu argument, gpu argument copy from cpu argument.
...@@ -192,10 +172,10 @@ protected: ...@@ -192,10 +172,10 @@ protected:
void initArg(SequenceIdArg& arg, size_t batchSize) { void initArg(SequenceIdArg& arg, size_t batchSize) {
size_t numSeqs = arg.numSeqs(); size_t numSeqs = arg.numSeqs();
int* buf = arg.data(); int* buf = (int*)arg.data();
int pos = 0; int pos = 0;
size_t maxLen = 2 * batchSize / numSeqs; size_t maxLen = 2 * batchSize / numSeqs;
for (int i = 0; i < numSeqs; ++i) { for (int i = 0; i < (int)numSeqs; ++i) {
int len = uniformRandom( int len = uniformRandom(
std::min<int64_t>(maxLen, batchSize - pos - numSeqs + i)) + std::min<int64_t>(maxLen, batchSize - pos - numSeqs + i)) +
1; 1;
...@@ -207,17 +187,14 @@ protected: ...@@ -207,17 +187,14 @@ protected:
} }
protected: protected:
std::shared_ptr<FunctionBase> cpu; std::shared_ptr<FunctionBase> cpuFunc_;
std::shared_ptr<FunctionBase> gpu; std::shared_ptr<FunctionBase> gpuFunc_;
std::vector<CpuMemHandlePtr> cpuMemory_; std::vector<CpuMemHandlePtr> cpuMemory_;
std::vector<GpuMemHandlePtr> gpuMemory_; std::vector<GpuMemHandlePtr> gpuMemory_;
// inputs and outputs std::vector<BufferArgPtr> cpuInputs_;
BufferArgs inputs; std::vector<BufferArgPtr> cpuOutputs_;
BufferArgs outputs; std::vector<BufferArgPtr> gpuInputs_;
BufferArgs cpuInputs_; std::vector<BufferArgPtr> gpuOutputs_;
BufferArgs cpuOutputs_;
BufferArgs gpuInputs_;
BufferArgs gpuOutputs_;
}; };
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册