提交 fa4f00d9 编写于 作者: Q qiaolongfei

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into scope-impl

...@@ -296,7 +296,7 @@ function(go_library TARGET_NAME) ...@@ -296,7 +296,7 @@ function(go_library TARGET_NAME)
COMMAND rm -rf ${PADDLE_IN_GOPATH} COMMAND rm -rf ${PADDLE_IN_GOPATH}
COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH} COMMAND ln -sf ${CMAKE_SOURCE_DIR} ${PADDLE_IN_GOPATH}
# Automatically get all dependencies specified in the source code # Automatically get all dependencies specified in the source code
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./.. COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} get -d ./...
# Golang build source code # Golang build source code
COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE} COMMAND env GOPATH=${GOPATH} ${CMAKE_Go_COMPILER} build ${BUILD_MODE}
-o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}" -o "${CMAKE_CURRENT_BINARY_DIR}/${LIB_NAME}"
......
...@@ -25,21 +25,24 @@ class Variable { ...@@ -25,21 +25,24 @@ class Variable {
public: public:
template <typename T> template <typename T>
const T& Get() const { const T& Get() const {
PADDLE_ASSERT(holder_ != nullptr); PADDLE_ASSERT(IsType<T>());
PADDLE_ASSERT(std::type_index(typeid(T)) ==
std::type_index(holder_->Type()));
return *static_cast<const T*>(holder_->Ptr()); return *static_cast<const T*>(holder_->Ptr());
} }
template <typename T> template <typename T>
T* GetMutable() { T* GetMutable() {
if (holder_ == nullptr || if (!IsType<T>()) {
std::type_index(typeid(T)) != std::type_index(holder_->Type())) {
holder_.reset(new PlaceholderImpl<T>(new T())); holder_.reset(new PlaceholderImpl<T>(new T()));
} }
return static_cast<T*>(holder_->Ptr()); return static_cast<T*>(holder_->Ptr());
} }
template <typename T>
bool IsType() const {
return holder_ != nullptr &&
std::type_index(typeid(T)) == std::type_index(holder_->Type());
}
private: private:
struct Placeholder { struct Placeholder {
virtual ~Placeholder() {} virtual ~Placeholder() {}
......
...@@ -191,6 +191,11 @@ void Layer::addOutputArgument(int deviceId) { ...@@ -191,6 +191,11 @@ void Layer::addOutputArgument(int deviceId) {
void Layer::copyOutputToOtherDevice() { void Layer::copyOutputToOtherDevice() {
for (size_t i = 0; i != outputOtherDevice_.size(); i++) { for (size_t i = 0; i != outputOtherDevice_.size(); i++) {
SetDevice device(outputOtherDevice_[i].deviceId); SetDevice device(outputOtherDevice_[i].deviceId);
// If outputOtherDevice_[i].value is a CpuMatrix,
// the copyFrom is a synchronous interface.
// If outputOtherDevice_[i].value is a GpuMatrix, since subsequent
// calculations are all on HPPL_STREAM_DEFAULT,
// copyFrom can be an asynchronous interface.
outputOtherDevice_[i].value->copyFrom(*getOutputValue(), outputOtherDevice_[i].value->copyFrom(*getOutputValue(),
HPPL_STREAM_DEFAULT); HPPL_STREAM_DEFAULT);
outputOtherDevice_[i].sequenceStartPositions = outputOtherDevice_[i].sequenceStartPositions =
......
...@@ -1565,6 +1565,8 @@ void CpuMatrix::copyFrom(const Matrix& src, hl_stream_t stream) { ...@@ -1565,6 +1565,8 @@ void CpuMatrix::copyFrom(const Matrix& src, hl_stream_t stream) {
const_cast<real*>(src.getData()), const_cast<real*>(src.getData()),
sizeof(real) * elementCnt_, sizeof(real) * elementCnt_,
stream); stream);
// There is a need to add synchronization to ensure that the data is copied.
hl_stream_synchronize(stream);
} else if (typeid(src) == typeid(CpuMatrix)) { } else if (typeid(src) == typeid(CpuMatrix)) {
memcpy(data_, src.getData(), sizeof(real) * elementCnt_); memcpy(data_, src.getData(), sizeof(real) * elementCnt_);
} else { } else {
......
...@@ -239,7 +239,8 @@ public: ...@@ -239,7 +239,8 @@ public:
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
// asynchronous copy // For GpuMatrix this is an asynchronous copy interface
// For CpuMatrix this is an synchronous copy interface
virtual void copyFrom(const Matrix& src, hl_stream_t stream) { virtual void copyFrom(const Matrix& src, hl_stream_t stream) {
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
......
...@@ -657,6 +657,8 @@ void CpuVectorT<T>::copyFrom(const VectorT<T>& src, hl_stream_t stream) { ...@@ -657,6 +657,8 @@ void CpuVectorT<T>::copyFrom(const VectorT<T>& src, hl_stream_t stream) {
(void*)src.getData(), (void*)src.getData(),
sizeof(T) * this->getSize(), sizeof(T) * this->getSize(),
stream); stream);
// There is a need to add synchronization to ensure that the data is copied.
hl_stream_synchronize(stream);
} else { } else {
src.copyTo(this); src.copyTo(this);
} }
......
...@@ -168,11 +168,11 @@ public: ...@@ -168,11 +168,11 @@ public:
virtual void copyFrom(const VectorT<T>& src) = 0; virtual void copyFrom(const VectorT<T>& src) = 0;
/** /**
* If use_gpu, this function will push the copy-task to the specifed-stream * If GpuVector, this function is an asynchronous interface,
* and return immediately. * will push the copy-task to the specifed-stream and return immediately.
* *
* If not use GPU, this function is same as * If CpuVector, this function is an synchronous interface,
* the copyFrom(const VectorT<T>& src), which use stream HPPL_STREAM_DEFAULT. * same as the copyFrom(const VectorT<T>& src).
*/ */
virtual void copyFrom(const VectorT<T>& src, hl_stream_t stream) = 0; virtual void copyFrom(const VectorT<T>& src, hl_stream_t stream) = 0;
......
...@@ -1127,4 +1127,18 @@ TEST(Matrix, MaxOutFwdBwd) { ...@@ -1127,4 +1127,18 @@ TEST(Matrix, MaxOutFwdBwd) {
} }
} }
TEST(CpuMatrix, copyFrom) {
const size_t height = 1000;
const size_t width = 1000;
CpuMatrix cpu(height, width);
GpuMatrix gpu(height, width);
CpuMatrix copy(height, width);
cpu.randomizeUniform();
gpu.copyFrom(cpu);
copy.copyFrom(gpu, HPPL_STREAM_DEFAULT);
TensorCheckEqual(cpu, copy);
}
#endif #endif
...@@ -31,6 +31,7 @@ Configuring cmake in /paddle/build ... ...@@ -31,6 +31,7 @@ Configuring cmake in /paddle/build ...
-DWITH_DOC=OFF -DWITH_DOC=OFF
-DWITH_GPU=${WITH_GPU:-OFF} -DWITH_GPU=${WITH_GPU:-OFF}
-DWITH_AVX=${WITH_AVX:-OFF} -DWITH_AVX=${WITH_AVX:-OFF}
-DWITH_GOLANG=${WITH_GOLANG:-OFF}
-DWITH_SWIG_PY=ON -DWITH_SWIG_PY=ON
-DCUDNN_ROOT=/usr/ -DCUDNN_ROOT=/usr/
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF}
...@@ -43,6 +44,7 @@ cmake .. \ ...@@ -43,6 +44,7 @@ cmake .. \
-DWITH_DOC=OFF \ -DWITH_DOC=OFF \
-DWITH_GPU=${WITH_GPU:-OFF} \ -DWITH_GPU=${WITH_GPU:-OFF} \
-DWITH_AVX=${WITH_AVX:-OFF} \ -DWITH_AVX=${WITH_AVX:-OFF} \
-DWITH_GOLANG=${WITH_GOLANG:-OFF} \
-DWITH_SWIG_PY=ON \ -DWITH_SWIG_PY=ON \
-DCUDNN_ROOT=/usr/ \ -DCUDNN_ROOT=/usr/ \
-DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \ -DWITH_STYLE_CHECK=${WITH_STYLE_CHECK:-OFF} \
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册