diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 6839abc1a7bce319ec63b8c87588a9e378aa71ea..11c1f677ae5b308558b54bf49caf168cf6023444 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -296,7 +296,7 @@ function(go_library TARGET_NAME) COMMAND rm -rf ${PADDLE_IN_GOPATH} COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} # Automatically get all dependencies specified in the source code - COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./.. + COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./... # Golang build source code COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h index b33e10e6820129a874f5355d14d8a3e990186025..72c4a7a2a1d1cf93a784f24e687727ee8481484c 100644 --- a/paddle/framework/variable.h +++ b/paddle/framework/variable.h @@ -25,21 +25,24 @@ class Variable { public: template const T& Get() const { - PADDLE_ASSERT(holder_ != nullptr); - PADDLE_ASSERT(std::type_index(typeid(T)) == - std::type_index(holder_->Type())); + PADDLE_ASSERT(IsType()); return *static_cast(holder_->Ptr()); } template T* GetMutable() { - if (holder_ == nullptr || - std::type_index(typeid(T)) != std::type_index(holder_->Type())) { + if (!IsType()) { holder_.reset(new PlaceholderImpl(new T())); } return static_cast(holder_->Ptr()); } + template + bool IsType() const { + return holder_ != nullptr && + std::type_index(typeid(T)) == std::type_index(holder_->Type()); + } + private: struct Placeholder { virtual ~Placeholder() {} diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp index 125aaf947f3c9d976b117667d1d1b7700a029cc6..4b92b5d163ad107c0783beae45f8c936112fcccf 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/gserver/layers/Layer.cpp @@ -191,6 +191,11 @@ void Layer::addOutputArgument(int deviceId) { void Layer::copyOutputToOtherDevice() { for (size_t i = 0; i != outputOtherDevice_.size(); i++) { SetDevice device(outputOtherDevice_[i].deviceId); + // If outputOtherDevice_[i].value is a CpuMatrix, + // the copyFrom is a synchronous interface. + // If outputOtherDevice_[i].value is a GpuMatrix, since subsequent + // calculations are all on HPPL_STREAM_DEFAULT, + // copyFrom can be an asynchronous interface. outputOtherDevice_[i].value->copyFrom(*getOutputValue(), HPPL_STREAM_DEFAULT); outputOtherDevice_[i].sequenceStartPositions = diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index c910146164ebfb0737583c72c48ce6dbc5b49939..4431d613f655c1d0c8da13bb5ac9225980c650ad 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -1565,6 +1565,8 @@ void CpuMatrix::copyFrom(const Matrix& src, hl_stream_t stream) { const_cast(src.getData()), sizeof(real) * elementCnt_, stream); + // There is a need to add synchronization to ensure that the data is copied. + hl_stream_synchronize(stream); } else if (typeid(src) == typeid(CpuMatrix)) { memcpy(data_, src.getData(), sizeof(real) * elementCnt_); } else { diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index 748be850b4c902d1b48c1dafbb0d5ea2bf197e6e..7dfd593225065e18830b2b0c0ce854fe7a2d5178 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -239,7 +239,8 @@ public: LOG(FATAL) << "Not implemented"; } - // asynchronous copy + // For GpuMatrix this is an asynchronous copy interface + // For CpuMatrix this is an synchronous copy interface virtual void copyFrom(const Matrix& src, hl_stream_t stream) { LOG(FATAL) << "Not implemented"; } diff --git a/paddle/math/Vector.cpp b/paddle/math/Vector.cpp index c519ca500afb1dbfdff6e8d211786f4e18ccf1fd..eb87ee9bb7936d27c0c32a1a4b35ff49871c0a10 100644 --- a/paddle/math/Vector.cpp +++ b/paddle/math/Vector.cpp @@ -657,6 +657,8 @@ void CpuVectorT::copyFrom(const VectorT& src, hl_stream_t stream) { (void*)src.getData(), sizeof(T) * this->getSize(), stream); + // There is a need to add synchronization to ensure that the data is copied. + hl_stream_synchronize(stream); } else { src.copyTo(this); } diff --git a/paddle/math/Vector.h b/paddle/math/Vector.h index 9af6e30c9e13895ad95653a787ec1c1ad77a248f..80b9775fccf10c57bb48145ef56165ec7c86d8b8 100644 --- a/paddle/math/Vector.h +++ b/paddle/math/Vector.h @@ -168,11 +168,11 @@ public: virtual void copyFrom(const VectorT& src) = 0; /** - * If use_gpu, this function will push the copy-task to the specifed-stream - * and return immediately. + * If GpuVector, this function is an asynchronous interface, + * will push the copy-task to the specifed-stream and return immediately. * - * If not use GPU, this function is same as - * the copyFrom(const VectorT& src), which use stream HPPL_STREAM_DEFAULT. + * If CpuVector, this function is an synchronous interface, + * same as the copyFrom(const VectorT& src). */ virtual void copyFrom(const VectorT& src, hl_stream_t stream) = 0; diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 5a0dffe086c4e265d17c79dba435b66c0873e3c7..354f58df39365410ff9aec2576c768e58db9e0d2 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -1127,4 +1127,18 @@ TEST(Matrix, MaxOutFwdBwd) { } } +TEST(CpuMatrix, copyFrom) { + const size_t height = 1000; + const size_t width = 1000; + CpuMatrix cpu(height, width); + GpuMatrix gpu(height, width); + CpuMatrix copy(height, width); + + cpu.randomizeUniform(); + gpu.copyFrom(cpu); + copy.copyFrom(gpu, HPPL_STREAM_DEFAULT); + + TensorCheckEqual(cpu, copy); +} + #endif diff --git a/paddle/scripts/docker/build.sh b/paddle/scripts/docker/build.sh index 2b48e4dc0f875be9a87797fa14885926999a5010..a182e5f4aef9de8c6f20681328d5ba6c0e6944ef 100644 --- a/paddle/scripts/docker/build.sh +++ b/paddle/scripts/docker/build.sh @@ -31,6 +31,7 @@ Configuring cmake in /paddle/build ... -DWITH_DOC=OFF -DWITH_GPU=${WITH_GPU:-OFF} -DWITH_AVX=${WITH_AVX:-OFF} + -DWITH_GOLANG=${WITH_GOLANG:-OFF} -DWITH_SWIG_PY=ON -DCUDNN_ROOT=/usr/ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} @@ -43,6 +44,7 @@ cmake .. \ -DWITH_DOC=OFF \ -DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \ + -DWITH_GOLANG=${WITH_GOLANG:-OFF} \ -DWITH_SWIG_PY=ON \ -DCUDNN_ROOT=/usr/ \ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \