提交 67817433 编写于 作者: H hedaoyuan

Implement the FunctionTest

上级 039c0bf2
...@@ -75,8 +75,17 @@ public: ...@@ -75,8 +75,17 @@ public:
// Tensor can be Matrix, Vector, IVector. // Tensor can be Matrix, Vector, IVector.
// For inputs, do not need argType. // For inputs, do not need argType.
// For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO. // For outputs, the argType needs to be specified as ASSIGN_TO or ADD_TO.
template <typename Tensor> void addArg(const Matrix& arg, ArgType argType = UNSPECIFIED) {
void addArg(const Tensor& arg, ArgType argType = UNSPECIFIED) { _args_.push_back(new BufferArg(arg, argType));
addArg(*_args_.back());
}
void addArg(const Vector& arg, ArgType argType = UNSPECIFIED) {
_args_.push_back(new BufferArg(arg, argType));
addArg(*_args_.back());
}
void addArg(const IVector& arg, ArgType argType = UNSPECIFIED) {
_args_.push_back(new BufferArg(arg, argType)); _args_.push_back(new BufferArg(arg, argType));
addArg(*_args_.back()); addArg(*_args_.back());
} }
......
...@@ -19,6 +19,8 @@ limitations under the License. */ ...@@ -19,6 +19,8 @@ limitations under the License. */
namespace paddle { namespace paddle {
typedef std::shared_ptr<BufferArg> BufferArgPtr;
/** /**
* \brief A class for comparing CPU and GPU implementations of Function. * \brief A class for comparing CPU and GPU implementations of Function.
* *
...@@ -45,143 +47,121 @@ namespace paddle { ...@@ -45,143 +47,121 @@ namespace paddle {
class FunctionCompare { class FunctionCompare {
public: public:
FunctionCompare(const std::string& name, const FuncConfig& config) FunctionCompare(const std::string& name, const FuncConfig& config)
: cpu(FunctionBase::funcRegistrar_.createByType(name + "-CPU")), : cpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-CPU")),
gpu(FunctionBase::funcRegistrar_.createByType(name + "-GPU")) { gpuFunc_(FunctionBase::funcRegistrar_.createByType(name + "-GPU")) {
cpu->init(config); cpuFunc_->init(config);
gpu->init(config); gpuFunc_->init(config);
} }
void addInputs(const BufferArg& input) { inputs.push_back(input); } ~FunctionCompare() {}
void addOutputs(const BufferArg& output) { outputs.push_back(output); }
void run() { // input need only contains shape, do not contains data.
// prepare cpu/gpu arguments void addInputs(const BufferArg& input) {
prepareArgs(); size_t size =
input.shape().getElements() * sizeOfValuType(input.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
// function calculate cpuInputs_.emplace_back(std::make_shared<BufferArg>(
cpu->calc(cpuInputs, cpuOutputs); cpuMemory_.back()->getBuf(), input.valueType(), input.shape()));
gpu->calc(gpuInputs, gpuOutputs); gpuInputs_.emplace_back(std::make_shared<BufferArg>(
gpuMemory_.back()->getBuf(), input.valueType(), input.shape()));
}
// check outputs and inouts // output need only contains shape, do not contains data.
auto checkArgs = [=](const BufferArgs& cpuArgs, const BufferArgs& gpuArgs) { void addOutputs(const BufferArg& output) {
for (size_t i = 0; i < cpuArgs.size(); i++) { size_t size =
auto cpu = cpuArgs[i]; output.shape().getElements() * sizeOfValuType(output.valueType());
auto gpu = gpuArgs[i]; cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
CpuVector cpuVector(cpu.shape().getElements(), (real*)cpu.getData()); gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
GpuVector gpuVector(cpu.shape().getElements(), (real*)gpu.getData());
autotest::TensorCheckErr(cpuVector, gpuVector); cpuOutputs_.emplace_back(
} std::make_shared<BufferArg>(cpuMemory_.back()->getBuf(),
}; output.valueType(),
checkArgs(cpuOutputs, gpuOutputs); output.shape(),
ASSIGN_TO));
gpuOutputs_.emplace_back(
std::make_shared<BufferArg>(gpuMemory_.back()->getBuf(),
output.valueType(),
output.shape(),
ASSIGN_TO));
} }
#if 0
void cmpWithArg(const Arguments& inputs, void addInputs(const SequenceArg& input) {
const Arguments& outputs, size_t batchSize = input.shape()[0];
const Arguments& inouts) { size_t numSeqs = batchSize / 10 + 1;
// init cpu and gpu arguments
auto initArgs = [=]( size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32);
Arguments& cpuArgs, Arguments& gpuArgs, const Arguments& inArgs) { cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(sizeId));
for (const auto arg : inArgs) { gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(sizeId));
size_t size = sizeof(real);
for (const auto dim : arg.dims_) { TensorShape seqsId({numSeqs + 1});
size *= dim; // void* cpuBuffer = cpuMemory_.back()->getBuf();
// void* gpuBuffer = gpuMemory_.back()->getBuf();
size_t size =
input.shape().getElements() * sizeOfValuType(input.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
// TODO: need be implemented.
} }
if (arg.getData()) {
// todo(tianbing), waste unnecessary mem here void run() {
cpuMemory.emplace_back(std::make_shared<CpuMemoryHandle>(size)); // prepare cpu/gpu arguments
gpuMemory.emplace_back(std::make_shared<GpuMemoryHandle>(size)); initInputs();
cpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_));
gpuArgs.emplace_back(Tensor((real*)arg.getData(), arg.dims_)); // function calculate
// already init outside auto callFunction = [](FunctionBase* function,
} else { std::vector<BufferArgPtr>& inputs,
cpuMemory.emplace_back(std::make_shared<CpuMemoryHandle>(size)); std::vector<BufferArgPtr>& outputs) {
gpuMemory.emplace_back(std::make_shared<GpuMemoryHandle>(size)); BufferArgs inArgs;
cpuArgs.emplace_back( BufferArgs outArgs;
Tensor((real*)cpuMemory.back()->getBuf(), arg.dims_)); for (auto arg : inputs) {
gpuArgs.emplace_back( inArgs.addArg(*arg);
Tensor((real*)gpuMemory.back()->getBuf(), arg.dims_));
// will use an api to refactor this code.
CpuVector cpuVector(size / sizeof(real),
(real*)cpuArgs.back().getData());
GpuVector gpuVector(size / sizeof(real),
(real*)gpuArgs.back().getData());
cpuVector.uniform(0.001, 1);
gpuVector.copyFrom(cpuVector);
} }
for (auto arg : outputs) {
outArgs.addArg(*arg);
} }
function->calc(inArgs, outArgs);
}; };
initArgs(cpuInputs, gpuInputs, inputs);
initArgs(cpuOutputs, gpuOutputs, outputs);
// function calculate callFunction(cpuFunc_.get(), cpuInputs_, cpuOutputs_);
cpu->calc(cpuInputs, cpuOutputs); callFunction(gpuFunc_.get(), gpuInputs_, gpuOutputs_);
gpu->calc(gpuInputs, gpuOutputs);
// check outputs and inouts // check outputs and inouts
auto checkArgs = [=](const Arguments& cpuArgs, const Arguments& gpuArgs) { compareOutputs();
for (size_t i = 0; i < cpuArgs.size(); i++) {
auto cpu = cpuArgs[i];
auto gpu = gpuArgs[i];
size_t size = 1;
for (auto dim : cpu.dims_) {
size *= dim;
} }
CpuVector cpuVector(size, (real*)cpu.getData());
GpuVector gpuVector(size, (real*)gpu.getData());
autotest::TensorCheckErr(cpuVector, gpuVector); std::shared_ptr<FunctionBase> getCpuFunction() const { return cpuFunc_; }
}
};
checkArgs(cpuOutputs, gpuOutputs);
}
#endif
std::shared_ptr<FunctionBase> getCpuFunction() const { return cpu; }
std::shared_ptr<FunctionBase> getGpuFunction() const { return gpu; } std::shared_ptr<FunctionBase> getGpuFunction() const { return gpuFunc_; }
protected: protected:
void prepareArgs() { void initInputs() {
// TODO, if inputs has data for (size_t i = 0; i < cpuInputs_.size(); i++) {
} initArg(*cpuInputs_[i]);
void createArg(BufferArgs& cpuArgs, BufferArgs& gpuArgs, BufferArg& arg) { // TODO: Need a BufferCopy used to copy from one BufferArg to another.
size_t size = arg.shape().getElements() * sizeOfValuType(arg.valueType()); CpuVector cpuVector(cpuInputs_[i]->shape().getElements(),
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); (real*)cpuInputs_[i]->data());
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size)); GpuVector gpuVector(gpuInputs_[i]->shape().getElements(),
(real*)gpuInputs_[i]->data());
cpuArgs.emplace_back( gpuVector.copyFrom(cpuVector);
BufferArg(cpuMemory_.back()->getBuf()), arg.valueType(), arg.shape()); }
gpuArgs.emplace_back(
BufferArg(gpuMemory_.back()->getBuf()), arg.valueType(), arg.shape());
} }
void createArg(BufferArgs& cpuArgs, BufferArgs& gpuArgs, SequenceArg& arg) { void compareOutputs() {
size_t batchSize = arg.shape()[0]; for (size_t i = 0; i < cpuOutputs_.size(); i++) {
size_t numSeqs = batchSize / 10 + 1; // TODO, Need a BufferCheck used to compare the two buffers.
auto cpu = cpuOutputs_[i];
size_t sizeId = (numSeqs + 1) * sizeOfValuType(VALUE_TYPE_INT32); auto gpu = gpuOutputs_[i];
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size)); CpuVector cpuVector(cpu->shape().getElements(), (real*)cpu->data());
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size)); GpuVector gpuVector(cpu->shape().getElements(), (real*)gpu->data());
TensorShape seqsId({numSeqs + 1});
void* cpuBuffer = cpuMemory_.back()->getBuf();
void* gpuBuffer = gpuMemory_.back()->getBuf();
size_t size = arg.shape().getElements() * sizeOfValuType(arg.valueType());
cpuMemory_.emplace_back(std::make_shared<CpuMemoryHandle>(size));
gpuMemory_.emplace_back(std::make_shared<GpuMemoryHandle>(size));
cpuArgs.emplace_back(SequenceArg(cpuMemory_.back()->getBuf(), autotest::TensorCheckErr(cpuVector, gpuVector);
arg.valueType(), }
arg.shape(),
SequenceIdArg(cpuBuffer, seqsId)));
gpuArgs.emplace_back(SequenceArg(gpuMemory_.back()->getBuf(),
arg.valueType(),
arg.shape(),
SequenceIdArg(gpuBuffer, seqsId)));
} }
// only init cpu argument, gpu argument copy from cpu argument. // only init cpu argument, gpu argument copy from cpu argument.
...@@ -192,10 +172,10 @@ protected: ...@@ -192,10 +172,10 @@ protected:
void initArg(SequenceIdArg& arg, size_t batchSize) { void initArg(SequenceIdArg& arg, size_t batchSize) {
size_t numSeqs = arg.numSeqs(); size_t numSeqs = arg.numSeqs();
int* buf = arg.data(); int* buf = (int*)arg.data();
int pos = 0; int pos = 0;
size_t maxLen = 2 * batchSize / numSeqs; size_t maxLen = 2 * batchSize / numSeqs;
for (int i = 0; i < numSeqs; ++i) { for (int i = 0; i < (int)numSeqs; ++i) {
int len = uniformRandom( int len = uniformRandom(
std::min<int64_t>(maxLen, batchSize - pos - numSeqs + i)) + std::min<int64_t>(maxLen, batchSize - pos - numSeqs + i)) +
1; 1;
...@@ -207,17 +187,14 @@ protected: ...@@ -207,17 +187,14 @@ protected:
} }
protected: protected:
std::shared_ptr<FunctionBase> cpu; std::shared_ptr<FunctionBase> cpuFunc_;
std::shared_ptr<FunctionBase> gpu; std::shared_ptr<FunctionBase> gpuFunc_;
std::vector<CpuMemHandlePtr> cpuMemory_; std::vector<CpuMemHandlePtr> cpuMemory_;
std::vector<GpuMemHandlePtr> gpuMemory_; std::vector<GpuMemHandlePtr> gpuMemory_;
// inputs and outputs std::vector<BufferArgPtr> cpuInputs_;
BufferArgs inputs; std::vector<BufferArgPtr> cpuOutputs_;
BufferArgs outputs; std::vector<BufferArgPtr> gpuInputs_;
BufferArgs cpuInputs_; std::vector<BufferArgPtr> gpuOutputs_;
BufferArgs cpuOutputs_;
BufferArgs gpuInputs_;
BufferArgs gpuOutputs_;
}; };
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册