diff --git a/doc/design/if_else_op.md b/doc/design/if_else_op.md index 7370c2a24fa644a64e738f202bac9b9209642e08..954a19c0733358c235eae3cffe134c23dac94c95 100644 --- a/doc/design/if_else_op.md +++ b/doc/design/if_else_op.md @@ -1,22 +1,4 @@ -IfOp should have only one branch. An IfOp operator takes a `cond` variable whose value must be a vector of N boolean elements. Its return value has M (M<=N) instances, each corresponds to a true element in `cond`. - -```python -import paddle as pd - -x = var() -y = var() -cond = var() - -b = pd.create_ifop(inputs=[x], output_num=1) -with b.true_block(): - x = b.inputs(0) - z = operator.add(x, y) - b.set_output(0, operator.softmax(z)) - -out = b(cond) -``` - -If we want the output still has N instances, we can use IfElseOp with a default value, whose minibatch size must be N: +IfOp should have only one branch. An IfOp operator takes a `cond` variable whose value must be a vector of N boolean elements. Its return value has N instances. If cond[i] == True, input instance input[i] will go through true_block() and generate output[i]; otherwise it will produce output from false_bloack(). ```python import paddle as pd @@ -39,7 +21,7 @@ with b.false_block(): out = b(cond) ``` -If only true_block is set in an IfElseOp, we can have a default value for false as: +If only true_block is set in an IfElseOp, a special case is that we can have a default value for false as: ```python import paddle as pd diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 642b53efc7095d25712ca324638f5fe9b8316c0c..ed166935f76be9d25062b5e69536c7b7ac19045d 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -22,7 +22,7 @@ namespace framework { template inline void Tensor::check_memory_size() const { PADDLE_ENFORCE_NOT_NULL( - holder_, "Tenosr holds no memory. Call Tensor::mutable_data first."); + holder_, "Tensor holds no memory. Call Tensor::mutable_data first."); PADDLE_ENFORCE_GE( holder_->size(), numel() * sizeof(T) + offset_, "Tensor's dims_ is out of bound. Call Tensor::mutable_data " diff --git a/paddle/framework/tensor_test.cc b/paddle/framework/tensor_test.cc index 55302ea47120f420e952b26830c8ea4cbcce6435..e2ec738de35c90c6a06c9a46b062d4cce55f5eda 100644 --- a/paddle/framework/tensor_test.cc +++ b/paddle/framework/tensor_test.cc @@ -36,7 +36,7 @@ TEST(Tensor, DataAssert) { } catch (paddle::platform::EnforceNotMet err) { caught = true; std::string msg = - "holder_ should not be null\nTenosr holds no memory. Call " + "holder_ should not be null\nTensor holds no memory. Call " "Tensor::mutable_data first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ++i) { @@ -112,7 +112,7 @@ TEST(Tensor, ShareDataWith) { } catch (paddle::platform::EnforceNotMet err) { caught = true; std::string msg = - "holder_ should not be null\nTenosr holds no memory. Call " + "holder_ should not be null\nTensor holds no memory. Call " "Tensor::mutable_data first."; const char* what = err.what(); for (size_t i = 0; i < msg.length(); ++i) { @@ -274,4 +274,4 @@ TEST(Tensor, ReshapeToMatrix) { Tensor res = ReshapeToMatrix(src, 2); ASSERT_EQ(res.dims()[0], 2 * 3); ASSERT_EQ(res.dims()[1], 4 * 9); -} \ No newline at end of file +} diff --git a/paddle/gserver/layers/MKLDNNConvLayer.cpp b/paddle/gserver/layers/MKLDNNConvLayer.cpp index f8c06c5f868f8d48a9a222b92315ee0ef2cf265e..9088744beebd25ac105737fe3b012de143c66a7c 100644 --- a/paddle/gserver/layers/MKLDNNConvLayer.cpp +++ b/paddle/gserver/layers/MKLDNNConvLayer.cpp @@ -285,10 +285,9 @@ void MKLDNNConvLayer::resetWgtBiasValue( wgt = MKLDNNMatrix::create(weight_->getW(), pd->weights_primitive_desc()); VLOG(MKLDNN_FMTS) << "Weight value format: " << wgt->getFormat(); - bias = nullptr; - if (biases_ && biases_->getW()) { - bias = MKLDNNMatrix::create(biases_->getW(), pd->bias_primitive_desc()); - } + bias = (biases_ && biases_->getW()) + ? MKLDNNMatrix::create(biases_->getW(), pd->bias_primitive_desc()) + : nullptr; } void MKLDNNConvLayer::resetOutValue( @@ -356,6 +355,7 @@ void MKLDNNConvLayer::resetBwdWgtPD( void MKLDNNConvLayer::resetBwdDataPD( std::shared_ptr& pd) { + pd = nullptr; if (inputLayers_[0]->getOutput().grad == nullptr) { return; } @@ -476,6 +476,7 @@ void MKLDNNConvLayer::resetWgtBiasGrad( << "primitive desc of weight grad and value should be equal"; VLOG(MKLDNN_FMTS) << "weight grad format: " << wgt->getFormat(); + bias = nullptr; if (biasVal_ == nullptr) { return; } diff --git a/paddle/gserver/layers/MKLDNNFcLayer.cpp b/paddle/gserver/layers/MKLDNNFcLayer.cpp index f70343251ad4fbb99f9614618f6d1bff1174f15e..f60e221a6ec2ff513789a24e9f59bb25aef437b5 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.cpp +++ b/paddle/gserver/layers/MKLDNNFcLayer.cpp @@ -17,9 +17,6 @@ limitations under the License. */ using namespace mkldnn; // NOLINT typedef memory::format format; -typedef inner_product_forward fc_fwd; -typedef inner_product_backward_weights fc_bwdWgt; -typedef inner_product_backward_data fc_bwdData; namespace paddle { @@ -93,35 +90,88 @@ void MKLDNNFcLayer::reshape( printSizeInfo(); } -void MKLDNNFcLayer::resetFwd(std::vector& pipeline, +void MKLDNNFcLayer::resetFwd(std::vector& pipeline, MKLDNNMatrixPtr& in, MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias, MKLDNNMatrixPtr& out) { - pipeline.clear(); - bool hasBias = biases_ && biases_->getW(); - const MatrixPtr& wgtVal = weight_->getW(); - const MatrixPtr& biasVal = hasBias ? biases_->getW() : nullptr; - const MatrixPtr& outVal = output_.value; + resetFwdBuffers(in, wgt, bias, out); + + resetFwdPD(fwdPD_, in, wgt, bias, out); + + resetFwdPipeline(pipeline, fwdPD_, in, wgt, bias, out); + + printValueFormatFlow(); +} + +void MKLDNNFcLayer::resetBwd(std::vector& pipeline, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + std::shared_ptr bwdWgtPD; + std::shared_ptr bwdDataPD; + + resetBwdBuffers(in, wgt, bias, out); + + resetBwdWgtPD(bwdWgtPD, wgt, bias, out); + + resetBwdDataPD(bwdDataPD, in, out); + + resetBwdPipeline(pipeline, bwdWgtPD, bwdDataPD, in, wgt, bias, out); + + printGradFormatFlow(); +} + +void MKLDNNFcLayer::updateInputData() { + inVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); +} +void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) { + weight_->getParameterPtr()->incUpdate(callback); + if (biases_ && biases_->getWGrad()) { + biases_->getParameterPtr()->incUpdate(callback); + } +} + +void MKLDNNFcLayer::resetFwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + resetInValue(in); + + resetWgtBiasValue(wgt, bias); + + resetOutValue(out); +} + +void MKLDNNFcLayer::resetInValue(MKLDNNMatrixPtr& in) { if (inputIsOnlyMKLDNN()) { - const MatrixPtr& inVal = getInputValue(0); - in = std::dynamic_pointer_cast(inVal); + const MatrixPtr& dnnIn = getInputValue(0); + in = std::dynamic_pointer_cast(dnnIn); CHECK(in) << "Input should be MKLDNNMatrix"; } else { CHECK_EQ(getPrev(0)->getDeviceId(), CPU_DEVICE) << "Only support CPU yet"; - const MatrixPtr& inVal = getInputValue(0, CPU_DEVICE); + const MatrixPtr& cpuIn = getInputValue(0, CPU_DEVICE); in = MKLDNNMatrix::create( - inVal, memory::dims{bs_, ic_, ih_, iw_}, format::nchw, engine_); + cpuIn, {bs_, ic_, ih_, iw_}, format::nchw, engine_); } in->downSpatial(); +} + +void MKLDNNFcLayer::resetWgtBiasValue(MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { wgt = MKLDNNMatrix::create( - wgtVal, memory::dims{oc_, ic_, ih_, iw_}, format::oihw, engine_); + weight_->getW(), {oc_, ic_, ih_, iw_}, format::oihw, engine_); wgt->downSpatial(); - bias = hasBias ? MKLDNNMatrix::create(biasVal, {oc_}, format::x, engine_) - : nullptr; - out = MKLDNNMatrix::create(outVal, {bs_, oc_}, format::nc, engine_); + bias = (biases_ && biases_->getW()) + ? MKLDNNMatrix::create(biases_->getW(), {oc_}, format::x, engine_) + : nullptr; +} + +void MKLDNNFcLayer::resetOutValue(MKLDNNMatrixPtr& out) { + out = MKLDNNMatrix::create(output_.value, {bs_, oc_}, format::nc, engine_); // change original output value to mkldnn output value output_.value = std::dynamic_pointer_cast(out); if (!outputIsOnlyMKLDNN()) { @@ -129,46 +179,59 @@ void MKLDNNFcLayer::resetFwd(std::vector& pipeline, // just share point getOutput(CPU_DEVICE).value->setData(output_.value->getData()); } +} - // create forward handle +void MKLDNNFcLayer::resetFwdPD(std::shared_ptr& pd, + MKLDNNMatrixPtr in, + MKLDNNMatrixPtr wgt, + MKLDNNMatrixPtr bias, + MKLDNNMatrixPtr out) { + CHECK(in); + CHECK(wgt); + CHECK(out); prop_kind pk = prop_kind::forward; - fc_fwd::desc fwdDesc = hasBias ? fc_fwd::desc(pk, - in->getMemoryDesc(), - wgt->getMemoryDesc(), - bias->getMemoryDesc(), - out->getMemoryDesc()) - : fc_fwd::desc(pk, - in->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); - fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - if (hasBias) { - fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *bias, *out)); + fc_fwd::desc fwdDesc = bias != nullptr ? fc_fwd::desc(pk, + in->getMemoryDesc(), + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) + : fc_fwd::desc(pk, + in->getMemoryDesc(), + wgt->getMemoryDesc(), + out->getMemoryDesc()); + pd.reset(new fc_fwd::primitive_desc(fwdDesc, engine_)); +} + +void MKLDNNFcLayer::resetFwdPipeline( + std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + + if (bias) { + fwd_.reset(new fc_fwd(*pd, *in, *wgt, *bias, *out)); } else { - fwd_.reset(new fc_fwd(fwdPD, *in, *wgt, *out)); + fwd_.reset(new fc_fwd(*pd, *in, *wgt, *out)); } - printValueFormatFlow(); pipeline.push_back(*fwd_); } -void MKLDNNFcLayer::resetBwd(std::vector& pipeline, - MKLDNNMatrixPtr& in, - MKLDNNMatrixPtr& wgt, - MKLDNNMatrixPtr& bias, - MKLDNNMatrixPtr& out) { - pipeline.clear(); - if (!needResetBwd_) { - return; - } - needResetBwd_ = false; - bool hasBias = biases_ && biases_->getWGrad(); +void MKLDNNFcLayer::resetBwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + resetOutGrad(out); + + resetWgtBiasGrad(wgt, bias); - /// backward weight - CHECK(inVal_) << "Should have input value"; - const MatrixPtr& wgtGrad = weight_->getWGrad(); - const MatrixPtr& biasGrad = hasBias ? biases_->getWGrad() : nullptr; + resetInGrad(in); +} +void MKLDNNFcLayer::resetOutGrad(MKLDNNMatrixPtr& out) { // TODO(TJ): merge outgrad int device = outputIsOnlyMKLDNN() ? MKLDNN_DEVICE : CPU_DEVICE; // for MKLDNN device: @@ -178,66 +241,88 @@ void MKLDNNFcLayer::resetBwd(std::vector& pipeline, // for CPU device: // fc do not need to convert from cpu device since output is always nc format // only need create from cpu device - const MatrixPtr& outGrad = getOutput(device).grad; - out = MKLDNNMatrix::create(outGrad, outVal_->getPrimitiveDesc()); - wgt = MKLDNNMatrix::create(wgtGrad, wgtVal_->getPrimitiveDesc()); - bias = hasBias ? MKLDNNMatrix::create(biasGrad, biasVal_->getPrimitiveDesc()) - : nullptr; - - // create memory primitive desc - fc_fwd::desc fwdDesc = fc_fwd::desc(prop_kind::forward, - inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); - fc_fwd::primitive_desc fwdPD = fc_fwd::primitive_desc(fwdDesc, engine_); - fc_bwdWgt::desc bwdWgtDesc = hasBias - ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - bias->getMemoryDesc(), - out->getMemoryDesc()) - : fc_bwdWgt::desc(inVal_->getMemoryDesc(), - wgt->getMemoryDesc(), - out->getMemoryDesc()); - fc_bwdWgt::primitive_desc bwdWgtPD = - fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, fwdPD); - - if (hasBias) { - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt, *bias)); - } else { - bwdWgt_.reset(new fc_bwdWgt(bwdWgtPD, *inVal_, *out, *wgt)); + CHECK(outVal_); + out = + MKLDNNMatrix::create(getOutput(device).grad, outVal_->getPrimitiveDesc()); +} + +void MKLDNNFcLayer::resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias) { + CHECK(wgtVal_); + wgt = MKLDNNMatrix::create(weight_->getWGrad(), wgtVal_->getPrimitiveDesc()); + + bias = nullptr; + if (biasVal_ == nullptr) { + return; } - pipeline.push_back(*bwdWgt_); + bias = + MKLDNNMatrix::create(biases_->getWGrad(), biasVal_->getPrimitiveDesc()); +} - /// backward data +void MKLDNNFcLayer::resetInGrad(MKLDNNMatrixPtr& in) { + in = nullptr; const MatrixPtr& inGrad = inputLayers_[0]->getOutput().grad; if (inGrad == nullptr) { return; } - if (getInput(0, MKLDNN_DEVICE).getAllCount() > 1) { - // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done - } else { - in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); - } - - fc_bwdData::desc bwdDataDesc = fc_bwdData::desc( - inVal_->getMemoryDesc(), wgt->getMemoryDesc(), out->getMemoryDesc()); - fc_bwdData::primitive_desc bwdDataPD = - fc_bwdData::primitive_desc(bwdDataDesc, engine_, fwdPD); + // TODO(TJ): use outputMaps_ ways to get the inGrad_ when merge outgrad done + CHECK(inVal_); + in = MKLDNNMatrix::create(inGrad, inVal_->getPrimitiveDesc()); +} - CHECK(wgtVal_) << "Should have weight memory"; - bwdData_.reset(new fc_bwdData(bwdDataPD, *out, *wgtVal_, *in)); - printGradFormatFlow(); - pipeline.push_back(*bwdData_); +void MKLDNNFcLayer::resetBwdWgtPD( + std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + CHECK(inVal_); + fc_bwdWgt::desc bwdWgtDesc = bias ? fc_bwdWgt::desc(inVal_->getMemoryDesc(), + wgt->getMemoryDesc(), + bias->getMemoryDesc(), + out->getMemoryDesc()) + : fc_bwdWgt::desc(inVal_->getMemoryDesc(), + wgt->getMemoryDesc(), + out->getMemoryDesc()); + pd.reset(new fc_bwdWgt::primitive_desc(bwdWgtDesc, engine_, *fwdPD_)); } -void MKLDNNFcLayer::updateInputData() { - inVal_->setData(getInputValue(0, CPU_DEVICE)->getData()); +void MKLDNNFcLayer::resetBwdDataPD( + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& out) { + pd = nullptr; + if (in == nullptr) { + return; + } + CHECK(wgtVal_); + fc_bwdData::desc bwdDataDesc = fc_bwdData::desc( + in->getMemoryDesc(), wgtVal_->getMemoryDesc(), out->getMemoryDesc()); + pd.reset(new fc_bwdData::primitive_desc(bwdDataDesc, engine_, *fwdPD_)); } -void MKLDNNFcLayer::updateWeights(const UpdateCallback& callback) { - weight_->getParameterPtr()->incUpdate(callback); - if (biases_ && biases_->getWGrad()) { - biases_->getParameterPtr()->incUpdate(callback); +void MKLDNNFcLayer::resetBwdPipeline( + std::vector& pipeline, + std::shared_ptr& bwdWgtPD, + std::shared_ptr& bwdDataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out) { + pipeline.clear(); + CHECK(inVal_); + if (bias) { + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt, *bias)); + } else { + bwdWgt_.reset(new fc_bwdWgt(*bwdWgtPD, *inVal_, *out, *wgt)); + } + pipeline.push_back(*bwdWgt_); + + if (bwdDataPD == nullptr) { + return; } + CHECK(wgtVal_) << "Should have weight memory"; + bwdData_.reset(new fc_bwdData(*bwdDataPD, *out, *wgtVal_, *in)); + pipeline.push_back(*bwdData_); } + } // namespace paddle diff --git a/paddle/gserver/layers/MKLDNNFcLayer.h b/paddle/gserver/layers/MKLDNNFcLayer.h index 3119f863496df092da13c08bf733f13c42e53780..c76878aafab7e986d2bf478eaba02f2f0aced293 100644 --- a/paddle/gserver/layers/MKLDNNFcLayer.h +++ b/paddle/gserver/layers/MKLDNNFcLayer.h @@ -18,6 +18,9 @@ limitations under the License. */ #include "mkldnn.hpp" namespace paddle { +typedef mkldnn::inner_product_forward fc_fwd; +typedef mkldnn::inner_product_backward_weights fc_bwdWgt; +typedef mkldnn::inner_product_backward_data fc_bwdData; /** * @brief A subclass of MKLDNNLayer fc layer. @@ -32,6 +35,9 @@ protected: // if has already init the weight bool hasInitedWgt_; + // save forward primitive_desc, which can be used backward + std::shared_ptr fwdPD_; + // fc weight and bias std::unique_ptr weight_; std::unique_ptr biases_; @@ -67,6 +73,59 @@ public: void convertWeightsFromPaddle() override; void convertWeightsToPaddle() override; + +protected: + /** + * Forward functions: reset buffers(input, output, weight and bias), + * reset primitive descriptor, + * reset pipeline. + */ + void resetFwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + void resetInValue(MKLDNNMatrixPtr& in); + void resetWgtBiasValue(MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias); + void resetOutValue(MKLDNNMatrixPtr& out); + void resetFwdPD(std::shared_ptr& pd, + MKLDNNMatrixPtr in, + MKLDNNMatrixPtr wgt, + MKLDNNMatrixPtr bias, + MKLDNNMatrixPtr out); + void resetFwdPipeline(std::vector& pipeline, + std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + + /** + * Backward functions: reset buffers(input, output, weight and bias), + * reset primitive descriptor for backward weight, + * reset primitive descriptor for backward data, + * reset pipeline. + */ + void resetBwdBuffers(MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + void resetOutGrad(MKLDNNMatrixPtr& out); + void resetWgtBiasGrad(MKLDNNMatrixPtr& wgt, MKLDNNMatrixPtr& bias); + void resetInGrad(MKLDNNMatrixPtr& in); + void resetBwdWgtPD(std::shared_ptr& pd, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); + void resetBwdDataPD(std::shared_ptr& pd, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& out); + void resetBwdPipeline(std::vector& pipeline, + std::shared_ptr& bwdWgtPD, + std::shared_ptr& bwdDataPD, + MKLDNNMatrixPtr& in, + MKLDNNMatrixPtr& wgt, + MKLDNNMatrixPtr& bias, + MKLDNNMatrixPtr& out); }; } // namespace paddle diff --git a/paddle/math/MKLDNNMatrix.h b/paddle/math/MKLDNNMatrix.h index 0aa130b4a0d458ad78d5d1330164af9e73b22a44..c843115eb9a5be50d6ff873f1510844228c9d89f 100644 --- a/paddle/math/MKLDNNMatrix.h +++ b/paddle/math/MKLDNNMatrix.h @@ -66,11 +66,12 @@ public: /** * Create reorder primitive. * Create a mkldnn::reorder handle for converting src MKLDNNMatrix to dst. - * checkData: for whether to check the data handle of src and dst is the same. - * if true, means check it and do not want support inplace reorder; - * otherwise do not check data which means the created reorder - * maybe inplace buffer and do not guarantee the logical is correct - * since not all format or conversion support inplace. + * checkData: whether to check the data handle of src and dst. + * if true, it will check the data and do not allow them equal; + * otherwise, it will not check them, then the reorder created + * may have inplace buffer. + * Do not set false, if you can not guarantee the inplace logical + * would work with your reorder. */ static std::shared_ptr createReorder( const MKLDNNMatrixPtr& src, diff --git a/paddle/memory/memcpy.cc b/paddle/memory/memcpy.cc index a19a3e3675e3e2e7cc0c3594f21191f932d6379f..19ec9ba9b26f5919796181a19a048b7edb508bdd 100644 --- a/paddle/memory/memcpy.cc +++ b/paddle/memory/memcpy.cc @@ -62,6 +62,24 @@ void Copy(platform::GPUPlace dst_place, } } +template <> +void Copy(platform::CPUPlace dst_place, + void* dst, + platform::GPUPlace src_place, + const void* src, size_t num) { + platform::SetDeviceId(src_place.device); + platform::GpuMemcpySync(dst, src, num, cudaMemcpyDeviceToHost); +} + +template <> +void Copy(platform::GPUPlace dst_place, + void* dst, + platform::CPUPlace src_place, + const void* src, size_t num) { + platform::SetDeviceId(dst_place.device); + platform::GpuMemcpySync(dst, src, num, cudaMemcpyHostToDevice); +} + #endif // PADDLE_ONLY_CPU } // namespace memory diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 5b65584327e8afca383d2171cb42e3984baa8654..e3e934bcccd1a5f34d88a2f33f3708a46ddabe05 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -80,9 +80,11 @@ endfunction() add_subdirectory(math) set(DEPS_OPS - recurrent_op) + recurrent_op + cond_op) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor net_op) +op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) diff --git a/paddle/operators/accuracy_op.cc b/paddle/operators/accuracy_op.cc index 4a6c6381b0341dd3531aa4c09024530ee67bb4f9..0c813748b2989a8f0c00a359345747242dd21dd8 100644 --- a/paddle/operators/accuracy_op.cc +++ b/paddle/operators/accuracy_op.cc @@ -23,10 +23,15 @@ class AccuracyOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Inference"), - "Input of Inference must be initialized."); + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("Inference"), + "Input(Inference) of AccuracyOp should not be null."); PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Label"), - "Input of Inference must be initialized."); + "Input(Label) of AccuracyOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Accuracy"), + "Output(Accuracy) of AccuracyOp should not be null."); + auto *inference = ctx.Input("Inference"); auto *label = ctx.Input("Label"); diff --git a/paddle/operators/add_op.cc b/paddle/operators/add_op.cc index b43c09d4f09c7f87cc60290bdd2a99cbe46f0d5c..e83c1efeaf897889d18a37a6bd2ca2f8f012db25 100644 --- a/paddle/operators/add_op.cc +++ b/paddle/operators/add_op.cc @@ -23,6 +23,13 @@ class AddOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of AddOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), + "Input(Y) of AddOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of AddOp should not be null."); + PADDLE_ENFORCE_EQ(ctx.Input("X")->dims(), ctx.Input("Y")->dims(), "Two input of Add Op's dimension must be same."); diff --git a/paddle/operators/concat_op.cc b/paddle/operators/concat_op.cc index 72fd179354a4be76a37e4571da168d844f7ce384..223bb0ffe6e75ce71919eb5f4cca06bedbb00764 100644 --- a/paddle/operators/concat_op.cc +++ b/paddle/operators/concat_op.cc @@ -25,6 +25,9 @@ class ConcatOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of ConcatOp should not be null."); + auto ins = ctx.MultiInput("X"); auto *out = ctx.Output("Out"); size_t axis = static_cast(ctx.Attr("axis")); diff --git a/paddle/operators/cond_op.cc b/paddle/operators/cond_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..8262a7a5c8c13c86c5f6c123a14fa89696358c57 --- /dev/null +++ b/paddle/operators/cond_op.cc @@ -0,0 +1,229 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/cond_op.h" + +#include +#include + +#include "paddle/framework/op_registry.h" +#include "paddle/operators/gather.h" +#include "paddle/operators/net_op.h" +#include "paddle/operators/scatter.h" + +namespace paddle { +namespace operators { + +using Scope = framework::Scope; +using Variable = framework::Variable; +using Tensor = framework::Tensor; +using LoDTensor = framework::LoDTensor; +using DDim = framework::DDim; + +void CondOp::CreateScope(const Scope& scope) const { + auto sub_scopes_var = scope.FindVar("SubScopes"); + PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, + "Output(SubScopes) of CondOp should not be null."); + auto sub_scopes = sub_scopes_var->GetMutable>(); + auto& sub_scope = scope.NewScope(); + sub_scopes->push_back(&sub_scope); +} + +void CondOp::CreateIndexTensor(const Scope& scope) const { + auto index_tensors_var = scope.FindVar("IndexTensors"); + PADDLE_ENFORCE_NOT_NULL(index_tensors_var, + "Output(IndexTensors) of CondOp should not be null."); + auto& index_tensors = + *index_tensors_var->GetMutable>(); + index_tensors.push_back(LoDTensor()); +} + +void CondOp::InferShape(const Scope& scope) const { + auto sub_scopes_var = scope.FindVar("SubScopes"); + PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, + "Output(SubScopes) of CondOp should not be null."); + auto& sub_scopes = *sub_scopes_var->GetMutable>(); + + for (int i = 0; i < 2; ++i) { + // Create two sub scopes for true and false branches + // sub_scopes[0] for the true branch and sub_scopes[1] for the false + // branch + CreateScope(scope); + + // Create two tensors for true and false indices + // index_tensors[0] for the true branch and index_tensors[1] for the false + // branch + CreateIndexTensor(scope); + + PADDLE_ENFORCE(!Inputs("Xs").empty(), + "Inputs(Xs) of CondOp can't be empty."); + for (auto& input : Inputs("Xs")) { + // Create a new tensor in sub-scope for input-type tensor + Variable* v = sub_scopes[i]->NewVar(input); + LoDTensor* sub_input = v->GetMutable(); + sub_input->Resize(scope.FindVar(input)->GetMutable()->dims()); + } + + for (auto& output : (*sub_net_op_[i]).Outputs()) { + for (auto& var_name : output.second) { + sub_scopes[i]->NewVar(var_name); + } + } + + // each net calls InferShape + sub_net_op_[i]->InferShape(*sub_scopes[i]); + } + + for (auto& output : Outputs("Outs")) { + LoDTensor* tensor_t_out = + sub_scopes[0]->FindVar(output)->GetMutable(); + PADDLE_ENFORCE_NOT_NULL(tensor_t_out, "True output should not be NULL"); + LoDTensor* tensor_f_out = + sub_scopes[1]->FindVar(output)->GetMutable(); + PADDLE_ENFORCE_NOT_NULL(tensor_f_out, "False output should not be NULL"); + + auto* tensor_out_var = scope.FindVar(output); + PADDLE_ENFORCE_NOT_NULL(tensor_out_var, "Output not found"); + LoDTensor* tensor_out = tensor_out_var->GetMutable(); + PADDLE_ENFORCE_NOT_NULL(tensor_t_out, + "True output tensor should not be NULL"); + + // check output size should be same + PADDLE_ENFORCE_EQ(tensor_t_out->dims(), tensor_f_out->dims(), + "Outputs not of the same shape"); + tensor_out->Resize(tensor_t_out->dims()); + // tensor_out->mutable_data(tensor_out->dims(), + // platform::CPUPlace()); + tensor_out->mutable_data(platform::CPUPlace()); + } +} + +void CondOp::Run(const Scope& scope, + const platform::DeviceContext& dev_ctx) const { + auto* sub_scopes_var = scope.FindVar("SubScopes"); + PADDLE_ENFORCE_NOT_NULL(sub_scopes_var, + "Output(SubScopes) of CondOp should not be null."); + auto sub_scopes = sub_scopes_var->Get>(); + auto* index_tensors_var = scope.FindVar("IndexTensors"); + PADDLE_ENFORCE_NOT_NULL(index_tensors_var, + "Output(IndexTensors) of CondOp should not be null."); + auto index_tensors = index_tensors_var->Get>(); + + std::string cond_name = Input("Cond"); + Variable* cond_var = scope.FindVar(cond_name); + PADDLE_ENFORCE_NOT_NULL(cond_var, + "Input(Cond) of CondOp should not be null."); + const LoDTensor* cond = cond_var->GetMutable(); + + // Step 1: get the true/false index at runtime + // index_[0]: vector, contains all index for cond[i] == true + // index_[1]: vector, contains all index for cond[i] == false + for (int i = 0; i < 2; ++i) index_[i].clear(); + + const int* cond_data = cond->data(); + for (int i = 0; i < cond->dims()[0]; ++i) { + if (cond_data[i]) + index_[0].push_back(i); + else + index_[1].push_back(i); + } + + // put index_[0] and index_[1] into two tensors: + // index_tensor_[0] and index_tensor_[1] + DDim dim = paddle::framework::make_ddim({0}); + for (int i = 0; i < 2; ++i) { + dim[0] = index_[i].size(); + int* tmp_ptr = + index_tensors[i].mutable_data(dim, platform::CPUPlace()); + index_tensors[i].Resize(dim); + memcpy(tmp_ptr, index_[i].data(), dim[0] * sizeof(int)); + } + + // Step 2: collect data by calling gather + for (int i = 0; i < 2; ++i) { + // i= 0/i for True and False branches respectively + for (auto& input : Inputs("Xs")) { + // find Tensor + Variable* v = scope.FindVar(input); + PADDLE_ENFORCE_NOT_NULL(v); + LoDTensor* tensor_parent = v->GetMutable(); + + v = sub_scopes[i]->FindVar(input); + PADDLE_ENFORCE_NOT_NULL(v); + LoDTensor* tensor_child = v->GetMutable(); + + // Resize child + DDim dim = tensor_child->dims(); + dim[0] = index_[i].size(); + tensor_child->Resize(dim); + tensor_child->mutable_data(dim, platform::CPUPlace()); + + Gather(dev_ctx.GetPlace(), tensor_parent, &index_tensors[i], + tensor_child); + } + } + + // Step 3: run + for (int i = 0; i < 2; ++i) { + sub_net_op_[i]->Run(*sub_scopes[i], dev_ctx); + } + + // Step 4: merge output results + PADDLE_ENFORCE(!Outputs("Outs").empty(), + "Outputs(Outs) of CondOp can't be empty."); + for (int i = 0; i < 2; ++i) { + // i= 0/i for True and False branches respectively + for (auto& output : Outputs("Outs")) { + // find Tensor + Variable* v = scope.FindVar(output); + PADDLE_ENFORCE_NOT_NULL(v); + LoDTensor* tensor_parent = v->GetMutable(); + + v = sub_scopes[i]->FindVar(output); + PADDLE_ENFORCE_NOT_NULL(v); + LoDTensor* tensor_child = v->GetMutable(); + + ScatterUpdate(dev_ctx.GetPlace(), tensor_child, &index_tensors[i], + tensor_parent); + } + } +} + +class CondOpProtoAndCheckerMaker : public framework::OpProtoAndCheckerMaker { + public: + CondOpProtoAndCheckerMaker(framework::OpProto* proto, + framework::OpAttrChecker* op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Cond", "The condition, which is a bool vector"); + AddInput("Xs", "Inputs of Subnets").AsDuplicable(); + AddOutput("Outs", "Outputs of Cond_Op after merge").AsDuplicable(); + + AddOutput("SubScopes", "sub scopes for true and false branches"); + AddOutput("IndexTensors", "Index Tensors contains indices for true/false"); + + AddComment(R"DOC( +Sample dependent Cond Operator: +Given Cond[i] as a 1/0 vector to indicate true/false +The equation is: +Out[i] = subnet_t[i], if Cond[i] == true +Out[i] = subnet_t[i], if Cond[i] == false +)DOC"); + } +}; + +} // namespace operators +} // namespace paddle + +REGISTER_OP_WITHOUT_GRADIENT(cond, paddle::operators::CondOp, + paddle::operators::CondOpProtoAndCheckerMaker); diff --git a/paddle/operators/cond_op.h b/paddle/operators/cond_op.h new file mode 100644 index 0000000000000000000000000000000000000000..b09e32331e66c53555c88c06d7b1456276050eaa --- /dev/null +++ b/paddle/operators/cond_op.h @@ -0,0 +1,91 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "glog/logging.h" +#include "paddle/framework/ddim.h" +#include "paddle/framework/eigen.h" +#include "paddle/framework/operator.h" +#include "paddle/framework/tensor.h" +#include "paddle/operators/net_op.h" + +namespace paddle { +namespace operators { + +/* + * @brief CondOp is a dynamic if-else Operator + * + * It has a input tensor named cond indicating which netop each instance will + * run. + * + * if cond == 1, it will run true_net, which is a NetOp. + * + * if cond == 0, it will run false_net, which is another NetOp. + */ +class CondOp : public framework::OperatorBase { + public: + CondOp(const std::string& type, const framework::VariableNameMap& inputs, + const framework::VariableNameMap& outputs, + const framework::AttributeMap& attrs) + : OperatorBase(type, inputs, outputs, attrs) { + index_.resize(2); + sub_net_op_.resize(2); + } + + CondOp(const CondOp& o) + : framework::OperatorBase( + static_cast(o)) { + // TODO(yuyang18): Implement copy ctor well. + PADDLE_THROW("Not implemented"); + } + + void CreateScope(const framework::Scope& scope) const; + + void CreateIndexTensor(const framework::Scope& scope) const; + + /* + * InferShape must be called before Run. + */ + void InferShape(const framework::Scope& scope) const override; + + /* + * Set True Block + */ + void set_truenet(std::unique_ptr&& net) { + sub_net_op_[0] = std::move(net); + } + + /* + * Set False Block + */ + void set_falsenet(std::unique_ptr&& net) { + sub_net_op_[1] = std::move(net); + } + + void Run(const framework::Scope& scope, + const platform::DeviceContext& dev_ctx) const override; + + private: + // sub_net_op_[0]: subnet_t + // sub_net_op_[1]: subnet_f + std::vector> sub_net_op_; + + // index_[0]: True_index; + // index_[1]: False_index; + mutable std::vector> index_; +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/cos_sim_op.cc b/paddle/operators/cos_sim_op.cc index 253b17d8a1b88eccc58fc458ae8274d2bbd1c323..72c446493684246959656dc048e7f0e761665423 100644 --- a/paddle/operators/cos_sim_op.cc +++ b/paddle/operators/cos_sim_op.cc @@ -26,8 +26,16 @@ class CosSimOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { // notnull check - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) must not be null."); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) must not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of CosSimOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), + "Input(Y) of CosSimOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of CosSimOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("XNorm"), + "Output(XNorm) of CosSimOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("YNorm"), + "Output(YNorm) of CosSimOp should not be null."); // shape check auto x_dims = ctx.Input("X")->dims(); diff --git a/paddle/operators/cos_sim_op.h b/paddle/operators/cos_sim_op.h index 318b63f3707cf77755de773a39b00aa30d2296d3..bcf6f758cae561a2e22f5be6c7a242647ef1c144 100644 --- a/paddle/operators/cos_sim_op.h +++ b/paddle/operators/cos_sim_op.h @@ -56,7 +56,7 @@ class CosSimKernel : public framework::OpKernel { x_norm.device(place) = x.square().sum(row_along).sqrt(); y_norm.device(place) = y.square().sum(row_along).sqrt(); if (rows_x == rows_y) { - auto xy = (x * y).sum(Eigen::array({1})); + auto xy = (x * y).sum(Eigen::array({{1}})); z.device(place) = xy / x_norm / y_norm; } else { Eigen::DSizes bcast(rows_x, 1); @@ -134,7 +134,7 @@ class CosSimGradKernel : public framework::OpKernel { out_grad_y->mutable_data(context.GetPlace()); auto dy = EigenMatrix::Reshape(*out_grad_y, 1); auto grad = x / norm_prod_bcast - z_bcast * y_bcast / y_snorm_bcast; - dy.device(place) = (dz_bcast * grad).sum(Eigen::array({0})); + dy.device(place) = (dz_bcast * grad).sum(Eigen::array({{0}})); } } } diff --git a/paddle/operators/elementwise_mul_op.cc b/paddle/operators/elementwise_mul_op.cc index e37c582adbe5b9e728f683d97cc51063ce80c3a2..ee6e975b443691bf71cec904565ced20406f3fba 100644 --- a/paddle/operators/elementwise_mul_op.cc +++ b/paddle/operators/elementwise_mul_op.cc @@ -25,8 +25,14 @@ class ElementWiseMulOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) should not be null"); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), "Input(Y) should not be null"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of ElementWiseMulOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), + "Input(Y) of ElementWiseMulOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Out"), + "Output(Out) of ElementWiseMulOp should not be null."); + auto x_dim = ctx.Input("X")->dims(); auto y_dim = ctx.Input("Y")->dims(); PADDLE_ENFORCE_GE(x_dim.size(), y_dim.size(), diff --git a/paddle/operators/elementwise_mul_op.h b/paddle/operators/elementwise_mul_op.h index e9ed6791799240039f9af42c1a4339be7126ee65..6d58da580b81b9e0a8ae170eec1a73638b190df8 100644 --- a/paddle/operators/elementwise_mul_op.h +++ b/paddle/operators/elementwise_mul_op.h @@ -13,10 +13,8 @@ limitations under the License. */ #pragma once -#include #include "paddle/framework/eigen.h" #include "paddle/framework/op_registry.h" -#include "paddle/operators/math/math_function.h" namespace paddle { namespace operators { diff --git a/paddle/operators/fill_zeros_like_op.cc b/paddle/operators/fill_zeros_like_op.cc index 0c9734892aac216709d380ec66acadf792761b14..ba7857cc65f6860a6156674c6addc2bfdce21a99 100644 --- a/paddle/operators/fill_zeros_like_op.cc +++ b/paddle/operators/fill_zeros_like_op.cc @@ -23,6 +23,13 @@ class FillZerosLikeOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("Src"), + "Input(Src) of FillZerosLikeOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Dst"), + "Output(Dst) of FillZerosLikeOp should not be null."); + ctx.Output("Dst")->Resize( ctx.Input("Src")->dims()); } diff --git a/paddle/operators/gather_op.cc b/paddle/operators/gather_op.cc index 8883d6d5fed2d8900364cf713a1e8d8b290ef83e..d445b61c1657356f2cdcf1e98d756607de2bd042 100644 --- a/paddle/operators/gather_op.cc +++ b/paddle/operators/gather_op.cc @@ -24,6 +24,13 @@ class GatherOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of GatherOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Index"), + "Input(Index) of GatherOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of GatherOp should not be null."); + int batch_size = ctx.Input("Index")->dims()[0]; PADDLE_ENFORCE_GE(batch_size, 0, "Batch size must be >0"); framework::DDim output_dims(ctx.Input("X")->dims()); diff --git a/paddle/operators/gaussian_random_op.cc b/paddle/operators/gaussian_random_op.cc index 25b0776a37488e876b0cc88aa3f2aa68e33fb270..c0e161bbc0c5486eb10408e43e6388f1b287abf8 100644 --- a/paddle/operators/gaussian_random_op.cc +++ b/paddle/operators/gaussian_random_op.cc @@ -43,8 +43,12 @@ class GaussianRandomOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; protected: - void InferShape(const framework::InferShapeContext& context) const override { - auto* tensor = context.Output("Out"); + void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Out"), + "Output(Out) of GaussianRandomOp should not be null."); + + auto* tensor = ctx.Output("Out"); auto dims = Attr>("dims"); std::vector temp; temp.reserve(dims.size()); diff --git a/paddle/operators/identity_op.cc b/paddle/operators/identity_op.cc index 10bebe8fb86fa4d9336e34012140d8efad14704d..5ab8c0fadc5658b0b511e59304af152c63c17098 100644 --- a/paddle/operators/identity_op.cc +++ b/paddle/operators/identity_op.cc @@ -42,6 +42,11 @@ class IdentityOp : public NetOp { const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) : NetOp(type, inputs, outputs, attrs) { + PADDLE_ENFORCE_NE(Input("X"), framework::kEmptyVarName, + "Input(X) of IdentityOp should not be null."); + PADDLE_ENFORCE_NE(Output("Out"), framework::kEmptyVarName, + "Output(Out) of IdentityOp should not be null."); + AppendOp(framework::OpRegistry::CreateOp( "scale", {{"X", {Input("X")}}}, {{"Out", {Output("Y")}}}, {{"scale", static_cast(1)}})); diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc index b3d15f1ec99813d242c86c99faa7385795eef3b1..07f6dfabca5879e3de6004e59d2e87f7fa68d66c 100644 --- a/paddle/operators/lookup_table_op.cc +++ b/paddle/operators/lookup_table_op.cc @@ -22,10 +22,17 @@ class LookupTableOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; protected: - void InferShape(const framework::InferShapeContext &context) const override { - auto table_t = context.Input("W"); - auto ids_t = context.Input("Ids"); - auto output_t = context.Output("Out"); + void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("W"), + "Input(W) of LookupTableOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Ids"), + "Input(Ids) of LookupTableOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of LookupTableOp should not be null."); + + auto table_t = ctx.Input("W"); + auto ids_t = ctx.Input("Ids"); + auto output_t = ctx.Output("Out"); output_t->Resize({ids_t->dims()[0], table_t->dims()[1]}); } diff --git a/paddle/operators/mean_op.cc b/paddle/operators/mean_op.cc index 3e523d31b682d70825d50c2a57b6e98cbf29dcd3..7d7eeb59a23435036dc33c1e4fe6dd1c4a1a2f62 100644 --- a/paddle/operators/mean_op.cc +++ b/paddle/operators/mean_op.cc @@ -24,7 +24,9 @@ class MeanOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), - "Input of MeanOp must be initialized."); + "Input(X) of MeanOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of MeanOp should not be null."); ctx.Output("Out")->Resize({1}); } }; diff --git a/paddle/operators/minus_op.cc b/paddle/operators/minus_op.cc index 158c6045fe99c86771dd56047060a1387844b800..a97bbecdca1779df330d1053cf359bb658aa75c2 100644 --- a/paddle/operators/minus_op.cc +++ b/paddle/operators/minus_op.cc @@ -27,6 +27,13 @@ class MinusOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of MinusOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), + "Input(Y) of MinusOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of MinusOp should not be null."); + auto *left_tensor = ctx.Input("X"); auto *right_tensor = ctx.Input("Y"); @@ -77,8 +84,6 @@ class MinusGradOp : public NetOp { } // namespace operators } // namespace paddle -USE_OP(scale); -USE_NO_KERNEL_OP(identity); namespace ops = paddle::operators; REGISTER_OP(minus, ops::MinusOp, ops::MinusOpMaker, minus_grad, ops::MinusGradOp); diff --git a/paddle/operators/mul_op.cc b/paddle/operators/mul_op.cc index 015e13de9a09bcd1931ccf91413b6a3f484f82bb..b6d320b415e02549e85cb36ab517b0b5433887d5 100644 --- a/paddle/operators/mul_op.cc +++ b/paddle/operators/mul_op.cc @@ -26,6 +26,13 @@ class MulOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of MulOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), + "Input(Y) of MulOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of MulOp should not be null."); + auto x_dims = ctx.Input("X")->dims(); auto y_dims = ctx.Input("Y")->dims(); int x_num_col_dims = Attr("x_num_col_dims"); diff --git a/paddle/operators/onehot_cross_entropy_op.cc b/paddle/operators/onehot_cross_entropy_op.cc index a9baada1cd4cd3af793bf1b0af7b029417e62b08..f38be3549f3c5d2443f61739fc32cdca74197649 100644 --- a/paddle/operators/onehot_cross_entropy_op.cc +++ b/paddle/operators/onehot_cross_entropy_op.cc @@ -23,6 +23,16 @@ class OnehotCrossEntropyOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("X"), + "Input(X) of OnehotCrossEntropyOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("label"), + "Input(label) of OnehotCrossEntropyOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Y"), + "Output(Y) of OnehotCrossEntropyOp should not be null."); + auto *X = ctx.Input("X"); auto *label = ctx.Input("label"); diff --git a/paddle/operators/pad_op.cc b/paddle/operators/pad_op.cc index 6cf7bd6f35b7592c41983efd75c1628043070687..a0b1c6b631d97a40d774f7d2ff9550fda9c32db4 100644 --- a/paddle/operators/pad_op.cc +++ b/paddle/operators/pad_op.cc @@ -25,6 +25,11 @@ class PadOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of PadOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of PadOp should not be null."); + auto x_dim = ctx.Input("X")->dims(); auto paddings = Attr>("paddings"); PADDLE_ENFORCE_EQ(x_dim.size() * 2, int64_t(paddings.size()), diff --git a/paddle/operators/reshape_op.cc b/paddle/operators/reshape_op.cc index d2817020921dfd3c044922de6a0f2ae0307936bd..0d05e344148c68f5625dd819ec59c5991892e4ce 100644 --- a/paddle/operators/reshape_op.cc +++ b/paddle/operators/reshape_op.cc @@ -28,7 +28,11 @@ class ReshapeOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { // input check - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), "Input(X) shouldn't be null"); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of ReshapeOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of ReshapeOp should not be null."); + auto shape = ctx.Attr>("shape"); PADDLE_ENFORCE(shape.size() > 0, "Attr(shape) shouldn't be empty."); for (auto dim : shape) { diff --git a/paddle/operators/rowwise_add_op.cc b/paddle/operators/rowwise_add_op.cc index c6101685a3205a7a7459347ea5b0cc8487656550..2a3fd3be941d91aaa6b014df91d3025f07767577 100644 --- a/paddle/operators/rowwise_add_op.cc +++ b/paddle/operators/rowwise_add_op.cc @@ -25,6 +25,13 @@ class RowwiseAddOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of RowwiseAddOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("b"), + "Input(b) of RowwiseAddOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of RowwiseAddOp should not be null."); + auto x_dims = ctx.Input("X")->dims(); auto b_dims = ctx.Input("b")->dims(); PADDLE_ENFORCE_GT( diff --git a/paddle/operators/scale_op.cc b/paddle/operators/scale_op.cc index 35e6b70ba94a645da2b99093b2354acfb0ef2771..d1f42e8662537d35e17429f9d436fdc0e5a1dc11 100644 --- a/paddle/operators/scale_op.cc +++ b/paddle/operators/scale_op.cc @@ -27,6 +27,11 @@ class ScaleOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of ScaleOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of ScaleOp should not be null."); + auto *in = ctx.Input("X"); auto *out = ctx.Output("Out"); out->Resize(in->dims()); diff --git a/paddle/operators/scatter_op.cc b/paddle/operators/scatter_op.cc index 0f7510983e2e34485110719c92d26cbc78cd850c..8820262732327306f4f807702751708bd1e2aa36 100644 --- a/paddle/operators/scatter_op.cc +++ b/paddle/operators/scatter_op.cc @@ -24,6 +24,15 @@ class ScatterOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Ref"), + "Input(Ref) of ScatterOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Index"), + "Input(Index) of ScatterOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Updates"), + "Input(Updates) of ScatterOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of ScatterOp should not be null."); + PADDLE_ENFORCE_EQ(ctx.Input("Index")->dims().size(), 1, "Update Index should be 1-D."); PADDLE_ENFORCE_EQ(ctx.Input("Ref")->dims().size(), diff --git a/paddle/operators/sequence_avg_pool_op.cc b/paddle/operators/sequence_avg_pool_op.cc index c15a5833deba2e198f6cb724bda7e3306c56e461..eb3e37655bc7eae1a3cf1348434e33a415947cad 100644 --- a/paddle/operators/sequence_avg_pool_op.cc +++ b/paddle/operators/sequence_avg_pool_op.cc @@ -23,9 +23,12 @@ class SequenceAvgPoolOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext& ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), - "Input of SequenceAvgPoolOp" - "must be initialized."); + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("X"), "Input(X) of SequenceAvgPoolOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Out"), + "Output(Out) of SequenceAvgPoolOp should not be null."); + auto* x = ctx.Input("X"); auto dims = x->dims(); auto lod = x->lod(); diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index 7997bf690710b3675f4014790db8cc7fc06946d3..1232e64c7f0132b9ea19b3d7e1ebe9531e1e25a5 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -23,6 +23,13 @@ class SGDOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("param"), + "Input(param) of SGDOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("grad"), + "Input(grad) of SGDOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("param_out"), + "Output(param_out) of SGDOp should not be null."); + PADDLE_ENFORCE_EQ(ctx.Input("param")->dims(), ctx.Input("grad")->dims(), "Two input of SGD Op's dimension must be same."); diff --git a/paddle/operators/sigmoid_op.cc b/paddle/operators/sigmoid_op.cc index de6a1ba77382306923ee238e5cf309c791f9f216..992b19965e0ca9ce7dba1b8b3c5b7780af06eb45 100644 --- a/paddle/operators/sigmoid_op.cc +++ b/paddle/operators/sigmoid_op.cc @@ -23,6 +23,11 @@ class SigmoidOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of SigmoidOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Y"), + "Output(Y) of SigmoidOp should not be null."); + ctx.Output("Y")->Resize( ctx.Input("X")->dims()); } diff --git a/paddle/operators/softmax_op.cc b/paddle/operators/softmax_op.cc index 239d3d141e1076a0a6a943f340311b17aa6f542a..c67eb028c882ed82ca4e6a4dd70cdea9f69cdc24 100644 --- a/paddle/operators/softmax_op.cc +++ b/paddle/operators/softmax_op.cc @@ -23,6 +23,11 @@ class SoftmaxOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), + "Input(X) of SoftmaxOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Y"), + "Output(Y) of SoftmaxOp should not be null."); + PADDLE_ENFORCE(ctx.Input("X")->dims().size() == 2UL, "The input of softmax op must be a matrix."); ctx.Output("Y")->Resize( diff --git a/paddle/operators/squared_l2_distance_op.cc b/paddle/operators/squared_l2_distance_op.cc index ebe5bd352e99e298fb86355730feed77b236d2bd..39f4305877de20d451bc35fe698a0eabf9758d57 100644 --- a/paddle/operators/squared_l2_distance_op.cc +++ b/paddle/operators/squared_l2_distance_op.cc @@ -23,12 +23,18 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext& ctx) const override { - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), - "Input of SquaredL2DistanceOp " - "must be initialized."); - PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("Y"), - "Target of SquaredL2DistanceOp " - "must be initialized."); + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("X"), + "Input(X) of SquaredL2DistanceOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.InputVar("Y"), + "Input(Y) of SquaredL2DistanceOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("sub_result"), + "Output(sub_result) of SquaredL2DistanceOp should not be null."); + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Out"), + "Output(Out) of SquaredL2DistanceOp should not be null."); auto* x = ctx.Input("X"); auto x_dims = x->dims(); diff --git a/paddle/operators/sum_op.cc b/paddle/operators/sum_op.cc index 7170e7256c206d338ef1f6f94d5d1889ca92a3de..41e05c27f9029b2664685d3979fadcfd2bf6dbce 100644 --- a/paddle/operators/sum_op.cc +++ b/paddle/operators/sum_op.cc @@ -22,6 +22,11 @@ class SumOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { + PADDLE_ENFORCE(!ctx.MultiInputVar("X").empty(), + "Input(X) of SumOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of SumOp should not be null."); + auto ins = ctx.MultiInput("X"); auto *out = ctx.Output("Out"); int N = ins.size(); diff --git a/paddle/operators/top_k_op.cc b/paddle/operators/top_k_op.cc index ff0e77a344ede7709a805d7dca4397eb49fa300c..169b815feffd86f9ff04c129ccc997230ce03a8c 100644 --- a/paddle/operators/top_k_op.cc +++ b/paddle/operators/top_k_op.cc @@ -24,7 +24,12 @@ class TopkOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext &ctx) const override { PADDLE_ENFORCE_NOT_NULL(ctx.InputVar("X"), - "Input of TopkOP must be initialized."); + "Input(X) of TopkOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Out"), + "Output(Out) of TopkOp should not be null."); + PADDLE_ENFORCE_NOT_NULL(ctx.OutputVar("Indices"), + "Output(Indices) of TopkOp should not be null."); + auto *input = ctx.Input("X"); const int k = static_cast(ctx.Attr("k")); diff --git a/paddle/operators/uniform_random_op.cc b/paddle/operators/uniform_random_op.cc index ed7973693619e7765643dda824100afd82616470..184bcbc29c0d26a214345506f126f9cc0d406b07 100644 --- a/paddle/operators/uniform_random_op.cc +++ b/paddle/operators/uniform_random_op.cc @@ -48,6 +48,10 @@ class UniformRandomOp : public framework::OperatorWithKernel { protected: void InferShape(const framework::InferShapeContext& ctx) const override { + PADDLE_ENFORCE_NOT_NULL( + ctx.OutputVar("Out"), + "Output(Out) of UniformRandomOp should not be null."); + PADDLE_ENFORCE(Attr("min") < Attr("max"), "uniform_random's min must less then max"); auto* tensor = ctx.Output("Out"); diff --git a/paddle/platform/CMakeLists.txt b/paddle/platform/CMakeLists.txt index 17bdac8749e31565b119b2cb84aed199fac0f441..8b605e51c3f4ea38fc358ce054bb36fcc82063c4 100644 --- a/paddle/platform/CMakeLists.txt +++ b/paddle/platform/CMakeLists.txt @@ -24,3 +24,4 @@ cc_library(device_context SRCS device_context.cc DEPS memory buddy_allocator nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_info) nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda) +nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place) diff --git a/paddle/platform/details/device_ptr_cast.h b/paddle/platform/details/device_ptr_cast.h new file mode 100644 index 0000000000000000000000000000000000000000..4015491fcdc3554029aa771ab7da1b2f3424321f --- /dev/null +++ b/paddle/platform/details/device_ptr_cast.h @@ -0,0 +1,56 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifndef __NVCC__ +#error device_ptr_cast must be include by .cu file +#endif + +#include + +namespace paddle { +namespace platform { +namespace details { +template +struct DevicePtrCast; + +template +struct DevicePtrCast { + using ELEM = typename std::remove_pointer::type; + using RTYPE = thrust::device_ptr; + + inline thrust::device_ptr operator()(ELEM* ele) const { + return thrust::device_pointer_cast(ele); + } +}; + +template +struct DevicePtrCast { + using RTYPE = T; + inline RTYPE operator()(RTYPE it) const { return it; } +}; + +// Cast T to thrust::device_ptr if T is a pointer. +// Otherwise, e.g., T is a iterator, return T itself. +template +auto DevPtrCast(T t) -> + typename DevicePtrCast::value>::RTYPE { + DevicePtrCast::value> cast; + return cast(t); +} + +} // namespace details +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/enforce.h b/paddle/platform/enforce.h index 64fcbd93b6c4d5d9b36f2636c3ef4f7327f08d25..df5f71ed760952ed042d7ffa40a4319a73fb93bf 100644 --- a/paddle/platform/enforce.h +++ b/paddle/platform/enforce.h @@ -25,6 +25,10 @@ limitations under the License. */ #include "paddle/string/printf.h" #include "paddle/string/to_string.h" +#ifdef __GNUC__ +#include // for __cxa_demangle +#endif + #ifndef PADDLE_ONLY_CPU #include "paddle/platform/dynload/cublas.h" @@ -42,6 +46,19 @@ limitations under the License. */ namespace paddle { namespace platform { +namespace { +#ifdef __GNUC__ +inline std::string demangle(std::string name) { + int status = -4; // some arbitrary value to eliminate the compiler warning + std::unique_ptr res{ + abi::__cxa_demangle(name.c_str(), NULL, NULL, &status), std::free}; + return (status == 0) ? res.get() : name; +} +#else +inline std::string demangle(std::string name) { return name; } +#endif +} + struct EnforceNotMet : public std::exception { std::exception_ptr exp_; std::string err_str_; @@ -61,8 +78,8 @@ struct EnforceNotMet : public std::exception { Dl_info info; for (int i = 0; i < size; ++i) { - if (dladdr(call_stack[i], &info)) { - auto demangled = info.dli_sname; + if (dladdr(call_stack[i], &info) && info.dli_sname) { + auto demangled = demangle(info.dli_sname); auto addr_offset = static_cast(call_stack[i]) - static_cast(info.dli_saddr); sout << string::Sprintf("%-3d %*0p %s + %zd\n", i, diff --git a/paddle/platform/transform.h b/paddle/platform/transform.h new file mode 100644 index 0000000000000000000000000000000000000000..3ee4acd29660f201d318ce6d39baa6f3999ae274 --- /dev/null +++ b/paddle/platform/transform.h @@ -0,0 +1,66 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/platform/enforce.h" +#include "paddle/platform/hostdevice.h" +#include "paddle/platform/place.h" + +#include +#include +#ifdef __NVCC__ +#include +#include "paddle/platform/details/device_ptr_cast.h" +#endif + +namespace paddle { +namespace platform { +// Transform on host or device. It provides the same API in std library. +template +void Transform(Place place, InputIter first, InputIter last, OutputIter result, + UnaryOperation op) { + if (is_cpu_place(place)) { + std::transform(first, last, result, op); + } else { +#ifdef __NVCC__ + using namespace details; + thrust::transform(DevPtrCast(first), DevPtrCast(last), DevPtrCast(result), + op); +#else + PADDLE_THROW("Do not invoke `Transform` in .cc file"); +#endif + } +} + +template +void Transform(Place place, InputIter1 first1, InputIter1 last1, + InputIter2 first2, OutputIter result, BinaryOperation op) { + if (is_cpu_place(place)) { + std::transform(first1, last1, first2, result, op); + } else { +#ifdef __NVCC__ + using namespace details; + thrust::transform(DevPtrCast(first1), DevPtrCast(last1), DevPtrCast(first2), + DevPtrCast(result), op); +#else + PADDLE_THROW("Do not invoke `Transform` in .cc file"); +#endif + } +}; + +} // namespace platform +} // namespace paddle diff --git a/paddle/platform/transform_test.cu b/paddle/platform/transform_test.cu new file mode 100644 index 0000000000000000000000000000000000000000..600fed8f45077a6fee91f295aa854153c9cf9c01 --- /dev/null +++ b/paddle/platform/transform_test.cu @@ -0,0 +1,84 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include "paddle/memory/memcpy.h" +#include "paddle/memory/memory.h" +#include "paddle/platform/transform.h" + +template +class Scale { + public: + explicit Scale(const T& scale) : scale_(scale) {} + + HOSTDEVICE T operator()(const T& a) const { return a * scale_; } + + private: + T scale_; +}; + +template +class Multiply { + public: + HOSTDEVICE T operator()(const T& a, const T& b) const { return a * b; } +}; + +TEST(Transform, CPUUnary) { + using namespace paddle::platform; + float buf[4] = {0.1, 0.2, 0.3, 0.4}; + Transform(CPUPlace(), buf, buf + 4, buf, Scale(10)); + for (int i = 0; i < 4; ++i) { + ASSERT_NEAR(buf[i], static_cast(i + 1), 1e-5); + } +} + +TEST(Transform, GPUUnary) { + using namespace paddle::platform; + using namespace paddle::memory; + GPUPlace gpu0(0); + float cpu_buf[4] = {0.1, 0.2, 0.3, 0.4}; + float* gpu_buf = static_cast(Alloc(gpu0, sizeof(float) * 4)); + Copy(gpu0, gpu_buf, CPUPlace(), cpu_buf, sizeof(cpu_buf)); + Transform(gpu0, gpu_buf, gpu_buf + 4, gpu_buf, Scale(10)); + Copy(CPUPlace(), cpu_buf, gpu0, gpu_buf, sizeof(cpu_buf)); + Free(gpu0, gpu_buf); + for (int i = 0; i < 4; ++i) { + ASSERT_NEAR(cpu_buf[i], static_cast(i + 1), 1e-5); + } +} + +TEST(Transform, CPUBinary) { + using namespace paddle::platform; + using namespace paddle::memory; + int buf[4] = {1, 2, 3, 4}; + Transform(CPUPlace(), buf, buf + 4, buf, buf, Multiply()); + for (int i = 0; i < 4; ++i) { + ASSERT_EQ((i + 1) * (i + 1), buf[i]); + } +} + +TEST(Transform, GPUBinary) { + using namespace paddle::platform; + using namespace paddle::memory; + int buf[4] = {1, 2, 3, 4}; + GPUPlace gpu0(0); + int* gpu_buf = static_cast(Alloc(gpu0, sizeof(buf))); + Copy(gpu0, gpu_buf, CPUPlace(), buf, sizeof(buf)); + Transform(gpu0, gpu_buf, gpu_buf + 4, gpu_buf, gpu_buf, Multiply()); + Copy(CPUPlace(), buf, gpu0, gpu_buf, sizeof(buf)); + Free(gpu0, gpu_buf); + for (int i = 0; i < 4; ++i) { + ASSERT_EQ((i + 1) * (i + 1), buf[i]); + } +} \ No newline at end of file diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index a7a38339fb2c8689778b0a86d3713f67e1447a80..c7009a604f60cda11434ad33b6c7d7caee1befdd 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -19,6 +19,7 @@ limitations under the License. */ #include "paddle/framework/backward.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/cond_op.h" #include "paddle/operators/net_op.h" #include "paddle/operators/recurrent_op.h" #include "paddle/platform/enforce.h" @@ -288,6 +289,28 @@ All parameter, weight, gradient are variables in Paddle. [](operators::RecurrentOp &self, const operators::NetOp &net) -> void { self.set_stepnet(net.Clone()); }); + // cond_op + py::class_(m, "CondOp") + .def_static("create", + [](py::bytes protobin) -> operators::CondOp * { + OpDesc desc; + PADDLE_ENFORCE(desc.ParsePartialFromString(protobin), + "Cannot parse user input to OpDesc"); + PADDLE_ENFORCE(desc.IsInitialized(), + "User OpDesc is not initialized, reason %s", + desc.InitializationErrorString()); + auto cond_op = OpRegistry::CreateOp(desc); + return static_cast(cond_op.release()); + }) + .def("set_truenet", + [](operators::CondOp &self, const operators::NetOp &net) -> void { + self.set_truenet(net.Clone()); + }) + .def("set_falsenet", + [](operators::CondOp &self, const operators::NetOp &net) -> void { + self.set_falsenet(net.Clone()); + }); + m.def("unique_integer", UniqueIntegerGenerator); m.def("is_compile_gpu", IsCompileGPU); diff --git a/python/paddle/v2/framework/op.py b/python/paddle/v2/framework/op.py index 15e0d125c495fbc0688d8dc4e66881cb9ab95a90..6cca41e43b38b8cccb65ff9b347ef226dddecd4d 100644 --- a/python/paddle/v2/framework/op.py +++ b/python/paddle/v2/framework/op.py @@ -215,5 +215,27 @@ class __RecurrentOp__(object): return core.RecurrentOp.create(proto.SerializeToString()) +class __CondOp__(object): + __proto__ = None + type = "cond" + + def __init__(self): + # cache recurrent_op's proto + if self.__proto__ is None: + for op_proto in get_all_op_protos(): + if op_proto.type == self.type: + self.__proto__ = op_proto + + def __call__(self, *args, **kwargs): + if self.type not in args and "type" not in kwargs: + kwargs["type"] = self.type + # create proto + create_method = OpDescCreationMethod(self.__proto__) + proto = create_method(*args, **kwargs) + # create condop + return core.CondOp.create(proto.SerializeToString()) + + Operator = OperatorFactory() # The default global factory RecurrentOp = __RecurrentOp__() +CondOp = __CondOp__() diff --git a/python/paddle/v2/framework/tests/test_add_two_op.py b/python/paddle/v2/framework/tests/test_add_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_add_two_op.py rename to python/paddle/v2/framework/tests/test_add_op.py diff --git a/python/paddle/v2/framework/tests/test_cond_op.py b/python/paddle/v2/framework/tests/test_cond_op.py new file mode 100644 index 0000000000000000000000000000000000000000..37177ae0b2482517c4183969c8ef0670f2b3de89 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_cond_op.py @@ -0,0 +1,116 @@ +import logging +import paddle.v2.framework.core as core +import unittest +import numpy as np +from paddle.v2.framework.op import Operator, CondOp + + +class PySimpleCond(object): + ''' + A simple implementation of dynamic if-else based on numpy + ''' + + def __init__(self): + array = [1] * 10 + for i in range(1, 10, 2): + array[i] = 0 + self.cond = np.array(array) + self.x = np.ones(shape=(10, 1)) + + def forward(self): + self.index_t = np.where(self.cond == 1) + self.index_f = np.where(self.cond == 0) + y_t = self.x[self.index_t] + y_f = self.x[self.index_f] + y_t = y_t * 2. + y_f = y_f * (-2.) + output = np.zeros(shape=(10, 1)) + output[self.index_t] = y_t + output[self.index_f] = y_f + return output + + +class PySimpleCondTest(unittest.TestCase): + def setUp(self): + self.condnn = PySimpleCond() + + def test_forward(self): + output = self.condnn.forward() + + +def create_tensor(scope, name, shape, np_data): + tensor = scope.new_var(name).get_tensor() + tensor.set_dims(shape) + tensor.set(np_data, core.CPUPlace()) + return tensor + + +class TestCondOp(unittest.TestCase): + ''' + Test CondOp + + equation: + cond = [True, False, True, False, ...] + y[index_t] = x[index_t] * 2. + y[index_f] = x[index_f] * -2. + outputs: + y + ''' + + def setUp(self): + self.py_cond = PySimpleCond() + + def forward(self): + self.scope = core.Scope() + self.create_global_variables() + self.create_cond_op() + self.create_sub_net() + ctx = core.DeviceContext.create(core.CPUPlace()) + self.condop.infer_shape(self.scope) + self.condop.run(self.scope, ctx) + return np.array(self.scope.find_var("Out").get_tensor()) + + def create_global_variables(self): + x_np_data = self.py_cond.x + create_tensor(self.scope, "X", [10, 1], x_np_data) + cond_np_data = self.py_cond.cond.astype("int32") + create_tensor(self.scope, "cond", [10, 1], cond_np_data) + self.scope.new_var("SubScopes") + self.scope.new_var("IndexTensors") + self.scope.new_var("Out") + + def create_cond_op(self): + self.condop = CondOp( + Cond="cond", + Xs=["X"], + Outs=["Out"], + SubScopes="SubScopes", + IndexTensors="IndexTensors") + + def create_sub_net(self): + truenet = core.Net.create() + scale_op_t = Operator("scale", X='X', Out='Out', scale=2.) + truenet.append_op(scale_op_t) + truenet.complete_add_op(True) + self.condop.set_truenet(truenet) + + falsenet = core.Net.create() + scale_op_t = Operator("scale", X='X', Out='Out', scale=-2.) + falsenet.append_op(scale_op_t) + falsenet.complete_add_op(True) + self.condop.set_falsenet(falsenet) + + def test_forward(self): + print 'test cond op forward' + pd_output = self.forward() + py_output = self.py_cond.forward() + print 'pd_output', pd_output + print + print 'py_output', py_output + self.assertEqual(pd_output.shape, py_output.shape) + print 'test passed' + return 0 + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_gaussian_random_op.py b/python/paddle/v2/framework/tests/test_gaussian_random_op.py index 1f9e4db783c9907a22db72c8a6ff06c7ca0735da..1888ee28f92c66496ce756d8a4a33d3e9ba57d7b 100644 --- a/python/paddle/v2/framework/tests/test_gaussian_random_op.py +++ b/python/paddle/v2/framework/tests/test_gaussian_random_op.py @@ -4,7 +4,7 @@ from paddle.v2.framework.op import Operator import numpy -class GaussianRandomTest(unittest.TestCase): +class TestGaussianRandomOp(unittest.TestCase): def test_cpu(self): self.gaussian_random_test(place=core.CPUPlace()) diff --git a/python/paddle/v2/framework/tests/test_identity_op.py b/python/paddle/v2/framework/tests/test_identity_op.py new file mode 100644 index 0000000000000000000000000000000000000000..26cec1fcc3ad003281c9c41571d475b55bd30026 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_identity_op.py @@ -0,0 +1,20 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestIdentityOp(OpTest): + def setUp(self): + self.op_type = "identity" + self.inputs = {'X': np.random.random((10, 10)).astype("float32")} + self.outputs = {'Y': self.inputs['X']} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(['X'], 'Y') + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/v2/framework/tests/test_lookup_table.py b/python/paddle/v2/framework/tests/test_lookup_table_op.py similarity index 100% rename from python/paddle/v2/framework/tests/test_lookup_table.py rename to python/paddle/v2/framework/tests/test_lookup_table_op.py diff --git a/python/paddle/v2/framework/tests/test_minus_op.py b/python/paddle/v2/framework/tests/test_minus_op.py index dea797a1fea34265d0a32e097f413f421abf2521..c56d7cb548706880dd482bad750f2989c0e9a710 100644 --- a/python/paddle/v2/framework/tests/test_minus_op.py +++ b/python/paddle/v2/framework/tests/test_minus_op.py @@ -3,7 +3,7 @@ import numpy as np from op_test import OpTest -class MinusOpTest(OpTest): +class TestMinusOp(OpTest): def setUp(self): self.op_type = "minus" self.inputs = { diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_onehot_cross_entropy_op.py similarity index 95% rename from python/paddle/v2/framework/tests/test_cross_entropy_op.py rename to python/paddle/v2/framework/tests/test_onehot_cross_entropy_op.py index 253e7b8a24465da63a7eacd7983eb831251e6230..fd3cbdb80374865ccf113768856096bf49dce643 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_onehot_cross_entropy_op.py @@ -3,7 +3,7 @@ import numpy from op_test import OpTest -class TestCrossEntropy(OpTest): +class TestOnehotCrossEntropyOp(OpTest): def setUp(self): self.op_type = "onehot_cross_entropy" batch_size = 30 diff --git a/python/paddle/v2/framework/tests/test_scale_and_identity_op.py b/python/paddle/v2/framework/tests/test_scale_op.py similarity index 57% rename from python/paddle/v2/framework/tests/test_scale_and_identity_op.py rename to python/paddle/v2/framework/tests/test_scale_op.py index 42b4d20ac9cfb88b9532e5cab4f9b06f3a9832df..2ea1e185470280730ae8c8c0ea9568bbeb43eaf5 100644 --- a/python/paddle/v2/framework/tests/test_scale_and_identity_op.py +++ b/python/paddle/v2/framework/tests/test_scale_op.py @@ -3,20 +3,7 @@ import numpy as np from op_test import OpTest -class IdentityTest(OpTest): - def setUp(self): - self.op_type = "identity" - self.inputs = {'X': np.random.random((10, 10)).astype("float32")} - self.outputs = {'Y': self.inputs['X']} - - def test_check_output(self): - self.check_output() - - def test_check_grad(self): - self.check_grad(['X'], 'Y') - - -class ScaleTest(OpTest): +class TestScaleOp(OpTest): def setUp(self): self.op_type = "scale" self.inputs = {'X': np.random.random((10, 10)).astype("float32")} diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py index 557cf15ace63e336462c7dcdbbc10f30aeedc6f4..64e54d1500c1bc134cc1efe33d41a16dbc08f2d4 100644 --- a/python/paddle/v2/framework/tests/test_sgd_op.py +++ b/python/paddle/v2/framework/tests/test_sgd_op.py @@ -3,7 +3,7 @@ import numpy as np from op_test import OpTest -class TestSGD(OpTest): +class TestSGDOp(OpTest): def setUp(self): self.op_type = "sgd" w = np.random.random((102, 105)).astype("float32") diff --git a/python/paddle/v2/framework/tests/test_sigmoid_op.py b/python/paddle/v2/framework/tests/test_sigmoid_op.py index 2316e49eff7bb1cdb53acb3889a6ef05060b59f3..d65d887db4af58c40e4e78fdbfd8e8ee668b7ee3 100644 --- a/python/paddle/v2/framework/tests/test_sigmoid_op.py +++ b/python/paddle/v2/framework/tests/test_sigmoid_op.py @@ -3,7 +3,7 @@ import numpy as np from op_test import OpTest -class TestSigmoid(OpTest): +class TestSigmoidOp(OpTest): def setUp(self): self.op_type = "sigmoid" self.inputs = { diff --git a/python/paddle/v2/framework/tests/test_top_k_op.py b/python/paddle/v2/framework/tests/test_top_k_op.py index cab799256d791889c295aa7f9048080f5caaf2dc..694f37d612d4c46e673dc894b05a0a446190732c 100644 --- a/python/paddle/v2/framework/tests/test_top_k_op.py +++ b/python/paddle/v2/framework/tests/test_top_k_op.py @@ -21,6 +21,9 @@ class TestTopkOp(OpTest): self.outputs = {'Out': output, 'Indices': indices} + def test_check_output(self): + self.check_output() + class TestTopkOp3d(OpTest): def setUp(self): @@ -42,6 +45,9 @@ class TestTopkOp3d(OpTest): self.outputs = {'Out': output, 'Indices': indices} + def test_check_output(self): + self.check_output() + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/v2/framework/tests/test_uniform_random_op.py b/python/paddle/v2/framework/tests/test_uniform_random_op.py index 76a5e36e56ab08230bdc2597d209fcf5d1d2acb0..9e8898fb5920defdfaa361bf45def7666a88beea 100644 --- a/python/paddle/v2/framework/tests/test_uniform_random_op.py +++ b/python/paddle/v2/framework/tests/test_uniform_random_op.py @@ -4,7 +4,7 @@ import paddle.v2.framework.core as core import numpy -class UniformRandomTest(unittest.TestCase): +class TestUniformRandomOp(unittest.TestCase): def test_uniform_random_cpu(self): self.uniform_random_test(place=core.CPUPlace())