提交 4db23bba 编写于 作者: H hedaoyuan 提交者: GitHub

Merge pull request #2618 from hedaoyuan/fix_copyFrom

Change the CpuMatrix::copyFrom and CpuVector::copyFrom with the strea…
......@@ -191,6 +191,11 @@ void Layer::addOutputArgument(int deviceId) {
void Layer::copyOutputToOtherDevice() {
for (size_t i = 0; i != outputOtherDevice_.size(); i++) {
SetDevice device(outputOtherDevice_[i].deviceId);
// If outputOtherDevice_[i].value is a CpuMatrix,
// the copyFrom is a synchronous interface.
// If outputOtherDevice_[i].value is a GpuMatrix, since subsequent
// calculations are all on HPPL_STREAM_DEFAULT,
// copyFrom can be an asynchronous interface.
outputOtherDevice_[i].value->copyFrom(*getOutputValue(),
HPPL_STREAM_DEFAULT);
outputOtherDevice_[i].sequenceStartPositions =
......
......@@ -1565,6 +1565,8 @@ void CpuMatrix::copyFrom(const Matrix& src, hl_stream_t stream) {
const_cast<real*>(src.getData()),
sizeof(real) * elementCnt_,
stream);
// There is a need to add synchronization to ensure that the data is copied.
hl_stream_synchronize(stream);
} else if (typeid(src) == typeid(CpuMatrix)) {
memcpy(data_, src.getData(), sizeof(real) * elementCnt_);
} else {
......
......@@ -239,7 +239,8 @@ public:
LOG(FATAL) << "Not implemented";
}
// asynchronous copy
// For GpuMatrix this is an asynchronous copy interface
// For CpuMatrix this is an synchronous copy interface
virtual void copyFrom(const Matrix& src, hl_stream_t stream) {
LOG(FATAL) << "Not implemented";
}
......
......@@ -657,6 +657,8 @@ void CpuVectorT<T>::copyFrom(const VectorT<T>& src, hl_stream_t stream) {
(void*)src.getData(),
sizeof(T) * this->getSize(),
stream);
// There is a need to add synchronization to ensure that the data is copied.
hl_stream_synchronize(stream);
} else {
src.copyTo(this);
}
......
......@@ -168,11 +168,11 @@ public:
virtual void copyFrom(const VectorT<T>& src) = 0;
/**
* If use_gpu, this function will push the copy-task to the specifed-stream
* and return immediately.
* If GpuVector, this function is an asynchronous interface,
* will push the copy-task to the specifed-stream and return immediately.
*
* If not use GPU, this function is same as
* the copyFrom(const VectorT<T>& src), which use stream HPPL_STREAM_DEFAULT.
* If CpuVector, this function is an synchronous interface,
* same as the copyFrom(const VectorT<T>& src).
*/
virtual void copyFrom(const VectorT<T>& src, hl_stream_t stream) = 0;
......
......@@ -1127,4 +1127,18 @@ TEST(Matrix, MaxOutFwdBwd) {
}
}
TEST(CpuMatrix, copyFrom) {
const size_t height = 1000;
const size_t width = 1000;
CpuMatrix cpu(height, width);
GpuMatrix gpu(height, width);
CpuMatrix copy(height, width);
cpu.randomizeUniform();
gpu.copyFrom(cpu);
copy.copyFrom(gpu, HPPL_STREAM_DEFAULT);
TensorCheckEqual(cpu, copy);
}
#endif
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册