diff --git a/go/master/client.go b/go/master/client.go index 62801b9b7fe85fe27147b12160f48d988623d547..f04cf50ce3cf765a79cbe555d3edb68f3dbb911e 100644 --- a/go/master/client.go +++ b/go/master/client.go @@ -63,13 +63,24 @@ func WithAddr(addr string) func(c *Client) error { // WithEtcd sets the client to use etcd for master discovery. func WithEtcd(endpoints []string, timeout time.Duration) func(*Client) error { return func(c *Client) error { - cli, err := clientv3.New(clientv3.Config{ - Endpoints: endpoints, - DialTimeout: timeout, - }) - if err != nil { + var cli *clientv3.Client + f := func() error { + var err error + cli, err = clientv3.New(clientv3.Config{ + Endpoints: endpoints, + DialTimeout: timeout, + }) return err } + for { + err := f() + if err != nil { + log.Warningln(err) + } else { + break + } + time.Sleep(time.Second) + } ch := make(chan string, 1) a, err := GetKey(cli, DefaultAddrPath, timeout) @@ -101,9 +112,6 @@ func NewClient(opts ...func(*Client) error) (*Client, error) { } } c.ch = make(chan record, c.bufSize) - // FIXME: connection is created asyncrosly in monitorMaster go routine, - // ensure the connection is ready for use before calling c.addClient. - time.Sleep(time.Second) return c, nil } diff --git a/paddle/cuda/include/hl_cuda_cudnn.h b/paddle/cuda/include/hl_cuda_cudnn.h index db18e4912b63ec18dcfff3ef3aaf0c7947e0af18..3f68c62de6d9b3aaadc9180d86159089dc728ea9 100644 --- a/paddle/cuda/include/hl_cuda_cudnn.h +++ b/paddle/cuda/include/hl_cuda_cudnn.h @@ -214,7 +214,8 @@ extern void hl_conv_workspace(hl_tensor_descriptor input, int* convBwdDataAlgo, size_t* bwdDataLimitBytes, int* convBwdFilterAlgo, - size_t* bwdFilterLimitBytes); + size_t* bwdFilterLimitBytes, + bool useDilation); /** * @brief destroy filter descriptor. @@ -242,7 +243,9 @@ extern void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, int padding_height, int padding_width, int stride_height, - int stride_width); + int stride_width, + int dilation_h = 1, + int dilation_w = 1); /** * @brief reset convolution descriptor. @@ -262,7 +265,9 @@ extern void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, int padding_height, int padding_width, int stride_height, - int stride_width); + int stride_width, + int dilation_h = 1, + int dilation_w = 1); /** * @brief destroy convolution descriptor. diff --git a/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h b/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h index abd0d6b09901a7cd124c245e359f9d38f52bda26..3afcc6fa85a4a6a03697663719b6ab685897b68b 100644 --- a/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h +++ b/paddle/cuda/include/stub/hl_cuda_cudnn_stub.h @@ -78,7 +78,9 @@ inline void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, int padding_height, int padding_width, int stride_height, - int stride_width) {} + int stride_width, + int dilation_h, + int dilation_w) {} inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, hl_tensor_descriptor image, @@ -86,7 +88,9 @@ inline void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, int padding_height, int padding_width, int stride_height, - int stride_width) {} + int stride_width, + int dilation_h, + int dilation_w) {} inline void hl_destroy_convolution_descriptor(hl_convolution_descriptor conv) {} @@ -99,7 +103,8 @@ inline void hl_conv_workspace(hl_tensor_descriptor input, int* convBwdDataAlgo, size_t* bwdDataLimitBytes, int* convBwdFilterAlgo, - size_t* bwdFilterLimitBytes) {} + size_t* bwdFilterLimitBytes, + bool useDilation) {} inline void hl_convolution_forward(hl_tensor_descriptor input, real* input_data, diff --git a/paddle/cuda/src/hl_cuda_cudnn.cc b/paddle/cuda/src/hl_cuda_cudnn.cc index 78642a17443b0b4d81defaa46579332ef20c71a1..f38ef692558b908ed65d2c84821bbb7c3b439742 100644 --- a/paddle/cuda/src/hl_cuda_cudnn.cc +++ b/paddle/cuda/src/hl_cuda_cudnn.cc @@ -201,7 +201,8 @@ void hl_conv_workspace(hl_tensor_descriptor input, int* convBwdDataAlgo, size_t* bwdDataLimitBytes, int* convBwdFilterAlgo, - size_t* bwdFilterLimitBytes) { + size_t* bwdFilterLimitBytes, + bool useDilation) { #if CUDNN_VERSION >= 4000 CHECK_NOTNULL(input); @@ -213,21 +214,60 @@ void hl_conv_workspace(hl_tensor_descriptor input, size_t memoryLimitBytes = (1LL << 20) * FLAGS_cudnn_conv_workspace_limit_in_mb; + // For dilation + int algo = 0; + // cudnn convolution forward configuration cudnnTensorDescriptor_t fwd_src_desc = GET_TENSOR_DESCRIPTOR(input); cudnnTensorDescriptor_t fwd_dest_desc = GET_TENSOR_DESCRIPTOR(output); cudnnFilterDescriptor_t fwd_filter_desc = GET_FILTER_DESCRIPTOR(filter); cudnnConvolutionDescriptor_t fwd_conv_desc = GET_CONVOLUTION_DESCRIPTOR(conv); + // cudnn convolution backward data configuration + cudnnFilterDescriptor_t bwd_data_filter_desc = GET_FILTER_DESCRIPTOR(filter); + cudnnTensorDescriptor_t bwd_data_diff_desc = GET_TENSOR_DESCRIPTOR(output); + cudnnTensorDescriptor_t bwd_data_grad_desc = GET_TENSOR_DESCRIPTOR(input); + cudnnConvolutionDescriptor_t bwd_data_conv_desc = + GET_CONVOLUTION_DESCRIPTOR(conv); + // cudnn convolution backward filter configuration + cudnnTensorDescriptor_t bwd_filter_src_desc = GET_TENSOR_DESCRIPTOR(input); + cudnnTensorDescriptor_t bwd_filter_diff_desc = GET_TENSOR_DESCRIPTOR(output); + cudnnConvolutionDescriptor_t bwd_filter_conv_desc = + GET_CONVOLUTION_DESCRIPTOR(conv); + cudnnFilterDescriptor_t bwd_filter_grad_desc = GET_FILTER_DESCRIPTOR(filter); - CHECK_CUDNN(dynload::cudnnGetConvolutionForwardAlgorithm( - t_resource.cudnn_handle, - fwd_src_desc, - fwd_filter_desc, - fwd_conv_desc, - fwd_dest_desc, - CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, - memoryLimitBytes, - reinterpret_cast(convFwdAlgo))); + if (useDilation) { + convFwdAlgo = &algo; + convBwdDataAlgo = &algo; + convBwdFilterAlgo = &algo; + } else { + CHECK_CUDNN(dynload::cudnnGetConvolutionForwardAlgorithm( + t_resource.cudnn_handle, + fwd_src_desc, + fwd_filter_desc, + fwd_conv_desc, + fwd_dest_desc, + CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, + memoryLimitBytes, + reinterpret_cast(convFwdAlgo))); + CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataAlgorithm( + t_resource.cudnn_handle, + bwd_data_filter_desc, + bwd_data_diff_desc, + bwd_data_conv_desc, + bwd_data_grad_desc, + CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, + memoryLimitBytes, + reinterpret_cast(convBwdDataAlgo))); + CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterAlgorithm( + t_resource.cudnn_handle, + bwd_filter_src_desc, + bwd_filter_diff_desc, + bwd_filter_conv_desc, + bwd_filter_grad_desc, + CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, + memoryLimitBytes, + reinterpret_cast(convBwdFilterAlgo))); + } CHECK_CUDNN(dynload::cudnnGetConvolutionForwardWorkspaceSize( t_resource.cudnn_handle, @@ -238,23 +278,6 @@ void hl_conv_workspace(hl_tensor_descriptor input, static_cast(*convFwdAlgo), fwdLimitBytes)); - // cudnn convolution backward data configuration - cudnnFilterDescriptor_t bwd_data_filter_desc = GET_FILTER_DESCRIPTOR(filter); - cudnnTensorDescriptor_t bwd_data_diff_desc = GET_TENSOR_DESCRIPTOR(output); - cudnnTensorDescriptor_t bwd_data_grad_desc = GET_TENSOR_DESCRIPTOR(input); - cudnnConvolutionDescriptor_t bwd_data_conv_desc = - GET_CONVOLUTION_DESCRIPTOR(conv); - - CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataAlgorithm( - t_resource.cudnn_handle, - bwd_data_filter_desc, - bwd_data_diff_desc, - bwd_data_conv_desc, - bwd_data_grad_desc, - CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT, - memoryLimitBytes, - reinterpret_cast(convBwdDataAlgo))); - CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardDataWorkspaceSize( t_resource.cudnn_handle, bwd_data_filter_desc, @@ -264,23 +287,6 @@ void hl_conv_workspace(hl_tensor_descriptor input, static_cast(*convBwdDataAlgo), bwdDataLimitBytes)); - // cudnn convolution backward filter configuration - cudnnTensorDescriptor_t bwd_filter_src_desc = GET_TENSOR_DESCRIPTOR(input); - cudnnTensorDescriptor_t bwd_filter_diff_desc = GET_TENSOR_DESCRIPTOR(output); - cudnnConvolutionDescriptor_t bwd_filter_conv_desc = - GET_CONVOLUTION_DESCRIPTOR(conv); - cudnnFilterDescriptor_t bwd_filter_grad_desc = GET_FILTER_DESCRIPTOR(filter); - - CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterAlgorithm( - t_resource.cudnn_handle, - bwd_filter_src_desc, - bwd_filter_diff_desc, - bwd_filter_conv_desc, - bwd_filter_grad_desc, - CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT, - memoryLimitBytes, - reinterpret_cast(convBwdFilterAlgo))); - CHECK_CUDNN(dynload::cudnnGetConvolutionBackwardFilterWorkspaceSize( t_resource.cudnn_handle, bwd_filter_src_desc, @@ -603,7 +609,9 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, int padding_height, int padding_width, int stride_height, - int stride_width) { + int stride_width, + int dilation_h, + int dilation_w) { CHECK_NOTNULL(conv); cudnn_convolution_descriptor hl_conv = (cudnn_convolution_descriptor)malloc( @@ -625,18 +633,24 @@ void hl_create_convolution_descriptor(hl_convolution_descriptor* conv, padding_width, stride_height, stride_width, - 1, - 1, + dilation_h, + dilation_w, mode, data_type)); #else + if (dilation_h > 1 || dilation_w > 1) { + LOG(FATAL) + << "Current cuDNN version does't support for dilation convolution. " + << "The dilation convolution requires cuDNN >= v6.0."; + } + CHECK_CUDNN(dynload::cudnnSetConvolution2dDescriptor(hl_conv->desc, padding_height, padding_width, stride_height, stride_width, - 1, - 1, + dilation_h, + dilation_w, mode)); #endif @@ -659,7 +673,9 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, int padding_height, int padding_width, int stride_height, - int stride_width) { + int stride_width, + int dilation_h, + int dilation_w) { CHECK_NOTNULL(conv); CHECK_NOTNULL(image); CHECK_NOTNULL(filter); @@ -678,8 +694,8 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, padding_width, stride_height, stride_width, - 1, - 1, + dilation_h, + dilation_w, mode, data_type)); #else @@ -688,8 +704,8 @@ void hl_reset_convolution_descriptor(hl_convolution_descriptor conv, padding_width, stride_height, stride_width, - 1, - 1, + dilation_h, + dilation_w, mode)); #endif diff --git a/paddle/gserver/layers/ConvBaseLayer.cpp b/paddle/gserver/layers/ConvBaseLayer.cpp index e161d89c38a290000a2cbdb2905e56901ae4c144..a5328ef8343e1050352fc48530e041fb6ce12a8b 100644 --- a/paddle/gserver/layers/ConvBaseLayer.cpp +++ b/paddle/gserver/layers/ConvBaseLayer.cpp @@ -32,9 +32,11 @@ bool ConvBaseLayer::init(const LayerMap& layerMap, const ConvConfig& conf = inputConfig.conv_conf(); padding_.push_back(conf.padding()); stride_.push_back(conf.stride()); + dilation_.push_back(conf.dilation()); filterSize_.push_back(conf.filter_size()); paddingY_.push_back(conf.padding_y()); strideY_.push_back(conf.stride_y()); + dilationY_.push_back(conf.dilation_y()); filterSizeY_.push_back(conf.filter_size_y()); filterPixels_.push_back(filterSize_.back() * filterSizeY_.back()); channels_.push_back(conf.channels()); @@ -89,7 +91,11 @@ size_t ConvBaseLayer::calOutputSize() { size_t layerSize = 0; auto setLayerSize = [&](IntV& inH, IntV& inW, IntV& outH, IntV& outW) { + size_t filterSizeY; + size_t filterSize; for (size_t i = 0; i < inputLayers_.size(); i++) { + filterSizeY = (filterSizeY_[i] - 1) * dilationY_[i] + 1; + filterSize = (filterSize_[i] - 1) * dilation_[i] + 1; inH.push_back(inputLayers_[i]->getOutput().getFrameHeight()); inW.push_back(inputLayers_[i]->getOutput().getFrameWidth()); const ConvConfig& conf = config_.inputs(i).conv_conf(); @@ -98,17 +104,17 @@ size_t ConvBaseLayer::calOutputSize() { inH[i] = conf.has_output_y() ? conf.output_y() : conf.output_x(); if (inW[i] == 0) inW[i] = conf.output_x(); outH.push_back(imageSize( - inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], caffeMode_)); - outW.push_back(imageSize( - inW[i], filterSize_[i], padding_[i], stride_[i], caffeMode_)); + inH[i], filterSizeY, paddingY_[i], strideY_[i], caffeMode_)); + outW.push_back( + imageSize(inW[i], filterSize, padding_[i], stride_[i], caffeMode_)); } else { if (inH[i] == 0) inH[i] = conf.has_img_size_y() ? conf.img_size_y() : conf.img_size(); if (inW[i] == 0) inW[i] = conf.img_size(); outH.push_back(outputSize( - inH[i], filterSizeY_[i], paddingY_[i], strideY_[i], caffeMode_)); + inH[i], filterSizeY, paddingY_[i], strideY_[i], caffeMode_)); outW.push_back(outputSize( - inW[i], filterSize_[i], padding_[i], stride_[i], caffeMode_)); + inW[i], filterSize, padding_[i], stride_[i], caffeMode_)); } CHECK_EQ(outH[i], outH[0]); CHECK_EQ(outW[i], outW[0]); diff --git a/paddle/gserver/layers/ConvBaseLayer.h b/paddle/gserver/layers/ConvBaseLayer.h index e9d15d94f806a5d2e6f11cbbfc29e291dfe8538f..223bce8e296d748c8e17eb105aa67e8a1c1219b6 100644 --- a/paddle/gserver/layers/ConvBaseLayer.h +++ b/paddle/gserver/layers/ConvBaseLayer.h @@ -40,6 +40,10 @@ protected: IntV stride_; /// The y dimension of the stride. IntV strideY_; + /// The x dimension of the dilation. + IntV dilation_; + /// The y dimension of the dilation. + IntV dilationY_; /// The x dimension of a filter kernel. IntV filterSize_; /// The y dimension of a filter kernel. diff --git a/paddle/gserver/layers/ConvBaseOperator.cpp b/paddle/gserver/layers/ConvBaseOperator.cpp index 5c231986292d2cd26ee30ccc122142fccd5b4949..5469c41c87468001232f7bae0d5b6bf26693b9e0 100644 --- a/paddle/gserver/layers/ConvBaseOperator.cpp +++ b/paddle/gserver/layers/ConvBaseOperator.cpp @@ -59,7 +59,8 @@ void ConvBaseOperator::allocConvWorkSpace() { &bwdDataAlgo_, &bwdDataLimitBytes_, &bwdFilterAlgo_, - &bwdFilterLimitBytes_); + &bwdFilterLimitBytes_, + /*useDilation*/ false); size_t maxWorkSpace = 0; maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); diff --git a/paddle/gserver/layers/ConvBaseProjection.cpp b/paddle/gserver/layers/ConvBaseProjection.cpp index eb6b0445c95a9e9a7acd5d693ecdb11a263f41fd..08f36c516cfdadd42e9333c1c5a7a247df1f263e 100644 --- a/paddle/gserver/layers/ConvBaseProjection.cpp +++ b/paddle/gserver/layers/ConvBaseProjection.cpp @@ -41,6 +41,11 @@ void ConvBaseProjection::getConvParams() { strideH_ = conf.stride_y(); strideW_ = conf.stride(); + dilationH_ = conf.dilation_y(); + dilationW_ = conf.dilation(); + CHECK_GT(dilationH_, 0); + CHECK_GT(dilationW_, 0); + filterH_ = conf.filter_size_y(); filterW_ = conf.filter_size(); @@ -77,7 +82,9 @@ void ConvBaseProjection::initCudnn() { paddingH_, paddingW_, strideH_, - strideW_); + strideW_, + dilationH_, + dilationW_); // initialize all to default algorithms fwdAlgo_ = 0; @@ -131,7 +138,9 @@ void ConvBaseProjection::reshapeTensorDesc(int batchSize) { paddingH_, paddingW_, strideH_, - strideW_); + strideW_, + dilationH_, + dilationW_); } void ConvBaseProjection::reshape(int batchSize) { @@ -140,6 +149,10 @@ void ConvBaseProjection::reshape(int batchSize) { CHECK_EQ(calInputSize(), in_->value->getWidth()); reshapeTensorDesc(batchSize); + bool useDilation = false; + if (dilationH_ > 1 || dilationW_ > 1) { + useDilation = true; + } hl_conv_workspace(imageDesc_, outputDesc_, filterDesc_, @@ -149,7 +162,8 @@ void ConvBaseProjection::reshape(int batchSize) { &bwdDataAlgo_, &bwdDataLimitBytes_, &bwdFilterAlgo_, - &bwdFilterLimitBytes_); + &bwdFilterLimitBytes_, + useDilation); size_t maxWorkSpace = 0; maxWorkSpace = std::max(fwdLimitBytes_, bwdDataLimitBytes_); diff --git a/paddle/gserver/layers/ConvBaseProjection.h b/paddle/gserver/layers/ConvBaseProjection.h index e9d9f8f1b2937b3a3b7323c43ef5608ffc5f82ca..ebdb57845bb36ac607b1e4c8e02f9d20b6e82a36 100644 --- a/paddle/gserver/layers/ConvBaseProjection.h +++ b/paddle/gserver/layers/ConvBaseProjection.h @@ -63,6 +63,7 @@ protected: int configChannels_, configNumFilters_; int paddingH_, paddingW_; int strideH_, strideW_; + int dilationH_, dilationW_; int filterH_, filterW_; /// One group offset of input data. int inputOffset_; diff --git a/paddle/gserver/layers/ConvProjection.cpp b/paddle/gserver/layers/ConvProjection.cpp index 5b7ecc5560c1e7431305b34a331fe1fbc96c6b06..6f0106b713d93494ba9baa5c7afa0a6b1f167262 100644 --- a/paddle/gserver/layers/ConvProjection.cpp +++ b/paddle/gserver/layers/ConvProjection.cpp @@ -25,12 +25,12 @@ size_t ConvProjection::calOutputSize() { if (imageH_ == 0) imageH_ = configImgH_; if (imageW_ == 0) imageW_ = configImgW_; outputH_ = outputSize(imageH_, - filterH_, + (filterH_ - 1) * dilationH_ + 1, paddingH_, strideH_, /* caffeMode */ true); outputW_ = outputSize(imageW_, - filterW_, + (filterW_ - 1) * dilationW_ + 1, paddingW_, strideW_, /* caffeMode */ true); diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index dd2c955e6a4660a1811f205ec5c5861798291912..9946f7666498e27a3149816c67ff4c9a9f3bb02a 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -12,6 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#ifndef PADDLE_ONLY_CPU +#include +#endif #include #include #include @@ -189,10 +192,16 @@ TEST(Projection, scaling) { void testProjectionConv(size_t groups, bool isDeconv) { const int NUM_FILTERS = 18; const int FILTER_SIZE = 2; - const int FILTER_SIZE_Y = 4; + const int FILTER_SIZE_Y = 2; const int CHANNELS = 3; const int IMAGE_SIZE = 16; +#if CUDNN_VERSION >= 6000 + const int DILATION = 2; +#else + const int DILATION = 1; +#endif + ProjectionConfig conf; if (isDeconv) { conf.set_type("convt"); @@ -209,6 +218,8 @@ void testProjectionConv(size_t groups, bool isDeconv) { conv->set_padding_y(1); conv->set_stride(2); conv->set_stride_y(2); + conv->set_dilation(DILATION); + conv->set_dilation_y(DILATION); conv->set_groups(groups); if (isDeconv) { conv->set_filter_channels(NUM_FILTERS / conv->groups()); @@ -217,12 +228,12 @@ void testProjectionConv(size_t groups, bool isDeconv) { } conv->set_img_size(IMAGE_SIZE); int output_x = outputSize(conv->img_size(), - conv->filter_size(), + (conv->filter_size() - 1) * DILATION + 1, conv->padding(), conv->stride(), /* caffeMode */ true); int output_y = outputSize(conv->img_size(), - conv->filter_size_y(), + (conv->filter_size_y() - 1) * DILATION + 1, conv->padding_y(), conv->stride_y(), /* caffeMode */ true); @@ -424,27 +435,38 @@ void testConvLayer(const string& type, bool trans, bool useGpu) { config.layerConfig.set_partial_sum(1); config.layerConfig.set_shared_biases(true); - config.inputDefs.push_back({INPUT_DATA, "layer_0", 384, 288}); + int dilation = 1; + if (type == "cudnn_conv") { +#if CUDNN_VERSION >= 6000 + dilation = 2; +#else + dilation = 1; +#endif + } + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 768, 192}); LayerInputConfig* input = config.layerConfig.add_inputs(); ConvConfig* conv = input->mutable_conv_conf(); conv->set_filter_size(2); - conv->set_filter_size_y(3); + conv->set_filter_size_y(2); conv->set_channels(3); conv->set_padding(0); conv->set_padding_y(1); conv->set_stride(2); conv->set_stride_y(2); + conv->set_dilation(dilation); + conv->set_dilation_y(dilation); conv->set_groups(1); conv->set_filter_channels(conv->channels() / conv->groups()); conv->set_img_size(16); - conv->set_img_size_y(8); + conv->set_img_size_y(16); conv->set_output_x(outputSize(conv->img_size(), - conv->filter_size(), + (conv->filter_size() - 1) * dilation + 1, conv->padding(), conv->stride(), /* caffeMode */ true)); conv->set_output_y(outputSize(conv->img_size_y(), - conv->filter_size_y(), + (conv->filter_size_y() - 1) * dilation + 1, conv->padding_y(), conv->stride_y(), /* caffeMode */ true)); diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index b56a45b6bd1e4d834a3c11da989b4a0707a24bf6..f466dbc79a2059faa1e3d4ad6ede3f2394580842 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -42,6 +42,7 @@ function(op_library TARGET) endfunction() add_subdirectory(math) + cc_test(gather_test SRCS gather_test.cc DEPS tensor) op_library(gather_op SRCS gather_op.cc gather_op.cu) @@ -67,7 +68,7 @@ op_library(sgd_op SRCS sgd_op.cc sgd_op.cu) op_library(recurrent_op SRCS recurrent_op.cc rnn/recurrent_op_utils.cc DEPS framework_proto tensor op_registry operator net_op) -op_library(uniform_random_op - SRCS uniform_random_op.cc uniform_random_op.cu) +op_library(uniform_random_op SRCS uniform_random_op.cc uniform_random_op.cu) +op_library(lookup_table_op SRCS lookup_table_op.cc lookup_table_op.cu) op_library(scale_op SRCS scale_op.cc scale_op.cu DEPS net_op) op_library(minus_op SRCS minus_op.cc minus_op.cu DEPS scale_op) diff --git a/paddle/operators/fill_zeros_like_op.h b/paddle/operators/fill_zeros_like_op.h index fd380ca8514b0ac50f39613368a4836bd485668b..969998ce2eae02b8ad057c6259703e51559bf98a 100644 --- a/paddle/operators/fill_zeros_like_op.h +++ b/paddle/operators/fill_zeros_like_op.h @@ -26,7 +26,7 @@ class FillZerosLikeKernel : public framework::OpKernel { auto* output = context.Output("Dst"); output->mutable_data(context.GetPlace()); auto t = framework::EigenVector::Flatten(*output); - t.device(context.GetEigenDevice()) = t.constant(T(0)); + t.device(context.GetEigenDevice()) = t.constant(static_cast(0)); } }; diff --git a/paddle/operators/lookup_table_op.cc b/paddle/operators/lookup_table_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..94d40890a765413e88a35a6ad995ca97ac84dcda --- /dev/null +++ b/paddle/operators/lookup_table_op.cc @@ -0,0 +1,72 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/operators/lookup_table_op.h" + +namespace paddle { +namespace operators { + +class LookupTableOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &context) const override { + auto table_t = context.Input("W"); + auto ids_t = context.Input("Ids"); + auto output_t = context.Output("Out"); + + output_t->Resize({ids_t->dims()[0], table_t->dims()[1]}); + } +}; + +class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker { + public: + LookupTableOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("W", + "An input represents embedding tensors," + " which is a learnable parameter."); + AddInput("Ids", + "An input with type int32 or int64" + "contains the ids to be looked up in W."); + AddOutput("Out", "The lookup results, which have the same type with W."); + AddComment( + "This operator is used to perform lookups on the parameter W," + "then concatenated into a dense tensor."); + } +}; + +class LookupTableOpGrad : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(const framework::InferShapeContext &context) const override { + auto table = context.Input("W"); + auto d_table = context.Output(framework::GradVarName("W")); + d_table->Resize(table->dims()); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP(lookup_table, ops::LookupTableOp, ops::LookupTableOpMaker, + lookup_table_grad, ops::LookupTableOpGrad); + +REGISTER_OP_CPU_KERNEL(lookup_table, ops::LookupTableKernel); +REGISTER_OP_CPU_KERNEL(lookup_table_grad, ops::LookupTableGradKernel); diff --git a/paddle/operators/lookup_table_op.cu b/paddle/operators/lookup_table_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..27eee3436af8107cef2aa3577ea238be49edf1af --- /dev/null +++ b/paddle/operators/lookup_table_op.cu @@ -0,0 +1,116 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" +#include "paddle/platform/assert.h" +#include "paddle/platform/cuda_helper.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +__global__ void LookupTable(T* output, const T* table, const int32_t* ids, + const int N, const int K, const int D) { + int idx = threadIdx.x; + int idy = blockIdx.x + threadIdx.y * GridDimX; + + while (idy < K) { + int id = ids[idy]; + PADDLE_ASSERT(id >= 0); + PADDLE_ASSERT(id < N); + T* out = output + idy * D; + const T* tab = table + id * D; + for (int i = idx; i < D; i += BlockDimX) { + out[i] = tab[i]; + } + idy += BlockDimY * GridDimX; + } +} + +template +__global__ void LookupTableGrad(T* table, const T* output, const int32_t* ids, + const int N, const int K, const int D) { + int idx = threadIdx.x; + int idy = blockIdx.x + threadIdx.y * GridDimX; + + while (idy < K) { + int id = ids[idy]; + PADDLE_ASSERT(id >= 0); + PADDLE_ASSERT(id < N); + const T* out = output + idy * D; + T* tab = table + id * D; + for (int i = idx; i < D; i += BlockDimX) { + paddle::platform::CudaAtomicAdd(&tab[i], out[i]); + } + idy += BlockDimY * GridDimX; + } +} + +template +class LookupTableCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto table_t = context.Input("W"); + auto ids_t = context.Input("Ids"); + auto output_t = context.Output("Out"); + + size_t N = table_t->dims()[0]; + size_t D = table_t->dims()[1]; + size_t K = product(ids_t->dims()); + auto ids = ids_t->data(); + auto table = table_t->data(); + auto output = output_t->mutable_data(context.GetPlace()); + + dim3 threads(128, 8); + dim3 grids(8, 1); + LookupTable<<>>(output, table, ids, N, K, D); + } +}; + +template +class LookupTableGradCUDAKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto ids_t = context.Input("Ids"); + auto d_output_t = context.Input(framework::GradVarName("Out")); + auto d_table_t = context.Output(framework::GradVarName("W")); + + int N = d_table_t->dims()[0]; + int D = d_table_t->dims()[1]; + int K = product(ids_t->dims()); + const int32_t* ids = ids_t->data(); + const T* d_output = d_output_t->data(); + T* d_table = d_table_t->mutable_data(context.GetPlace()); + + auto t = framework::EigenVector::Flatten(*d_table_t); + t.device(context.GetEigenDevice()) = + t.constant(static_cast(0)); + + dim3 threads(128, 8); + dim3 grids(8, 1); + LookupTableGrad<<>>(d_table, d_output, ids, N, + K, D); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(lookup_table, ops::LookupTableCUDAKernel); +REGISTER_OP_GPU_KERNEL(lookup_table_grad, + ops::LookupTableGradCUDAKernel); diff --git a/paddle/operators/lookup_table_op.h b/paddle/operators/lookup_table_op.h new file mode 100644 index 0000000000000000000000000000000000000000..4da8079b91624c3510cae89fd599a7035a4c7477 --- /dev/null +++ b/paddle/operators/lookup_table_op.h @@ -0,0 +1,75 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class LookupTableKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto table_t = context.Input("W"); // float tensor + auto ids_t = context.Input("Ids"); // int tensor + auto output_t = context.Output("Out"); // float tensor + + size_t N = table_t->dims()[0]; + size_t D = table_t->dims()[1]; + auto ids = ids_t->data(); + auto table = table_t->data(); + auto output = output_t->mutable_data(context.GetPlace()); + for (size_t i = 0; i < product(ids_t->dims()); ++i) { + PADDLE_ENFORCE_LT(ids[i], N); + PADDLE_ENFORCE_GE(ids[i], 0); + memcpy(output + i * D, table + ids[i] * D, D * sizeof(T)); + } + } +}; + +template +class LookupTableGradKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto ids_t = context.Input("Ids"); + auto d_output_t = context.Input(framework::GradVarName("Out")); + auto d_table_t = context.Output(framework::GradVarName("W")); + + size_t N = d_table_t->dims()[0]; + size_t D = d_table_t->dims()[1]; + auto ids = ids_t->data(); + const T* d_output = d_output_t->data(); + T* d_table = d_table_t->mutable_data(context.GetPlace()); + + auto t = framework::EigenVector::Flatten(*d_table_t); + t.device(context.GetEigenDevice()) = + t.constant(static_cast(0)); + + for (size_t i = 0; i < product(ids_t->dims()); ++i) { + PADDLE_ENFORCE_LT(ids[i], N); + PADDLE_ENFORCE_GE(ids[i], 0); + for (size_t j = 0; j < D; ++j) { + d_table[ids[i] * D + j] += d_output[i * D + j]; + } + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/rowwise_add_op.cu b/paddle/operators/rowwise_add_op.cu index cbc61ad3e117fc79a674ca21831d3fec59d1ec5b..4a57f64c890ce99d6060faec6a4a01b107403344 100644 --- a/paddle/operators/rowwise_add_op.cu +++ b/paddle/operators/rowwise_add_op.cu @@ -18,3 +18,6 @@ namespace ops = paddle::operators; REGISTER_OP_GPU_KERNEL( rowwise_add, ops::RowwiseAddKernel); +REGISTER_OP_GPU_KERNEL( + rowwise_add_grad, + ops::RowwiseAddGradKernel); diff --git a/paddle/platform/cuda_helper.h b/paddle/platform/cuda_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..6feec0d7f8bd5d32d9e5eedee962fcbeff655f1c --- /dev/null +++ b/paddle/platform/cuda_helper.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include + +namespace paddle { +namespace platform { + +#define CUDA_ATOMIC_WRAPPER(op, T) \ + __device__ __forceinline__ T CudaAtomic##op(T* address, const T val) + +#define USE_CUDA_ATOMIC(op, T) \ + CUDA_ATOMIC_WRAPPER(op, T) { return atomic##op(address, val); } + +// For atomicAdd. +USE_CUDA_ATOMIC(Add, float); + +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 600 +USE_CUDA_ATOMIC(Add, double); +#else +CUDA_ATOMIC_WRAPPER(Add, double) { + unsigned long long int* address_as_ull = + reinterpret_cast(address); + unsigned long long int old = *address_as_ull, assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __double_as_longlong(val + __longlong_as_double(assumed))); + + // Note: uses integer comparison to avoid hang in case of NaN + } while (assumed != old); + + return __longlong_as_double(old); +} +#endif + +} // namespace platform +} // namespace paddle diff --git a/paddle/pybind/CMakeLists.txt b/paddle/pybind/CMakeLists.txt index 40db811767a9c273f073f4715e6ddfbf05887730..abb9c248eee9c59e8e6b9fa9d1878fec5dd67569 100644 --- a/paddle/pybind/CMakeLists.txt +++ b/paddle/pybind/CMakeLists.txt @@ -15,6 +15,7 @@ cc_library(paddle_pybind SHARED uniform_random_op gaussian_random_op fill_zeros_like_op + lookup_table_op scale_op minus_op) endif(WITH_PYTHON) diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 27b98e77db80505f7498deb75164e184b900262b..8fa8be2cef5fff04ed61ac726e5d8111e30c8a09 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -42,6 +42,7 @@ USE_OP(fill_zeros_like); USE_OP_ITSELF(recurrent_op); USE_OP(gaussian_random); USE_OP(uniform_random); +USE_OP(lookup_table); USE_OP(scale); USE_OP_ITSELF(identity); USE_OP(minus); diff --git a/proto/ModelConfig.proto b/proto/ModelConfig.proto index 4f3d5bf3f6cb96c97285f40e3a3d100c2af47ad5..1ea1e052596524f5baa0a55f601c4fa928acd8af 100644 --- a/proto/ModelConfig.proto +++ b/proto/ModelConfig.proto @@ -82,6 +82,9 @@ message ConvConfig { // if not set, use img_size optional uint32 img_size_y = 14; + + optional uint32 dilation = 15 [ default = 1 ]; + optional uint32 dilation_y = 16 [ default = 1 ]; } message PoolConfig { diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index 56afaa59bb08b33b7c2bbbff86e77cd29af260ae..efc76764662b3832dbacc6c8a3c2bca4ccbe4cd8 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -870,12 +870,16 @@ class Conv(Cfg): caffe_mode=True, filter_size_y=None, padding_y=None, - stride_y=None): + stride_y=None, + dilation=None, + dilation_y=None): self.add_keys(locals()) if filter_size_y is None: self.filter_size_y = filter_size if padding_y is None: self.padding_y = padding + if dilation_y is None: + self.dilation_y = dilation if stride_y is None: self.stride_y = stride if output_x is not None: diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 252b66feba8c7933fe8d789bbbc365cd51c51476..f323b017c06bb3d0f2c69de0faccb04258ccd2ad 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -2342,6 +2342,7 @@ def img_conv_layer(input, groups=1, stride=1, padding=0, + dilation=1, bias_attr=None, param_attr=None, shared_biases=True, @@ -2349,6 +2350,7 @@ def img_conv_layer(input, filter_size_y=None, stride_y=None, padding_y=None, + dilation_y=None, trans=False, layer_type=None): """ @@ -2413,6 +2415,11 @@ def img_conv_layer(input, :type padding: int|tuple|list :param padding_y: The y dimension of the padding. :type padding_y: int + :param dilation: The x dimension of the dilation. Or input a tuple for two + image dimension + :type dilation: int|tuple|list + :param dilation_y: The y dimension of the dilation. + :type dilation_y: int :param bias_attr: Convolution bias attribute. None means default bias. False means no bias. :type bias_attr: ParameterAttribute|False @@ -2460,6 +2467,13 @@ def img_conv_layer(input, else: padding_y = padding + if dilation_y is None: + if isinstance(dilation, collections.Sequence): + assert len(dilation) == 2 + dilation, dilation_y = dilation + else: + dilation_y = dilation + if param_attr.attr.get('initial_smart'): # special initial for conv layers. init_w = (2.0 / (filter_size**2 * num_channels))**0.5 @@ -2469,6 +2483,8 @@ def img_conv_layer(input, param_attr.attr["initial_smart"] = False if layer_type: + if dilation > 1 or dilation_y > 1: + assert layer_type in ["cudnn_conv", "cudnn_convt"] if trans: assert layer_type in ["exconvt", "cudnn_convt"] else: @@ -2484,11 +2500,13 @@ def img_conv_layer(input, conv=Conv( filter_size=filter_size, padding=padding, + dilation=dilation, stride=stride, channels=num_channels, groups=groups, filter_size_y=filter_size_y, padding_y=padding_y, + dilation_y=dilation_y, stride_y=stride_y), **param_attr.attr), active_type=act.name, diff --git a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py index 9fda16a5407a1fe0af8c5986023a8368e5b87222..01d31ef3fad827bfd103ee00f4ddd1bde14e0f82 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/img_layers.py +++ b/python/paddle/trainer_config_helpers/tests/configs/img_layers.py @@ -12,6 +12,7 @@ img_conv = img_conv_layer( num_filters=64, filter_size=(32, 32), padding=(1, 1), + dilation=(1, 1), stride=(1, 1), act=LinearActivation()) img_bn = batch_norm_layer(input=img_conv, act=ReluActivation()) diff --git a/python/paddle/v2/framework/tests/CMakeLists.txt b/python/paddle/v2/framework/tests/CMakeLists.txt index 3f4110e4a9de796140af9703559937338d27f251..fb4686889a644753afdeb748b444e757ed016eda 100644 --- a/python/paddle/v2/framework/tests/CMakeLists.txt +++ b/python/paddle/v2/framework/tests/CMakeLists.txt @@ -28,4 +28,6 @@ py_test(test_uniform_random_op SRCS test_uniform_random_op.py) py_test(test_recurrent_op SRCS test_recurrent_op.py) py_test(test_sgd_op SRCS test_sgd_op.py) py_test(test_gradient_checker SRCS test_gradient_checker.py) +py_test(test_lookup_table SRCS test_lookup_table.py) py_test(test_scale_and_identity_op SRCS test_scale_and_identity_op.py) +py_test(mnist SRCS mnist.py) diff --git a/python/paddle/v2/framework/tests/gradient_checker.py b/python/paddle/v2/framework/tests/gradient_checker.py index d7809e52fbd720c74bc6598cb03623800a4fbdf7..8eb9f3f07321d388e0035825b71534b7d9076cf6 100644 --- a/python/paddle/v2/framework/tests/gradient_checker.py +++ b/python/paddle/v2/framework/tests/gradient_checker.py @@ -23,6 +23,10 @@ def grad_var_name(var_name): return var_name + "@GRAD" +def empty_var_name(): + return "@EMPTY@" + + def get_numeric_gradient(op, input_values, output_name, @@ -182,7 +186,7 @@ class GradientChecker(unittest.TestCase): ] return outs - def compare_grad(self, forward_op, input_value): + def compare_grad(self, forward_op, input_value, no_grad_set=None): """ Compare the input gradients between CPU and GPU for the given forward operator. @@ -190,15 +194,20 @@ class GradientChecker(unittest.TestCase): :type forward_op: Operator :param input_value: input values. :type input_value: dict{string:numpy.array} + :param no_grad_set: the set of variables names without gradients. + :type no_grad_set: a set of string :raises: AssertionError, there is different gradient value. """ - backward_op = core.Operator.backward(forward_op, set()) + if no_grad_set is None: + no_grad_set = set() + backward_op = core.Operator.backward(forward_op, no_grad_set) # return if not compile with GPU or not implementing GPU kernel if not (core.is_compile_gpu() and backward_op.support_gpu()): return outputs = backward_op.outputs() out_names = [item for k in outputs for item in outputs[k]] + out_names = filter(lambda x: x != empty_var_name(), out_names) cpu_grads = self.__get_gradient(forward_op, backward_op, input_value, out_names, core.CPUPlace()) gpu_grads = self.__get_gradient(forward_op, backward_op, input_value, diff --git a/python/paddle/v2/framework/tests/mnist.py b/python/paddle/v2/framework/tests/mnist.py new file mode 100644 index 0000000000000000000000000000000000000000..9a0b109850e92c66e69f74c5cd0853a09b5551a1 --- /dev/null +++ b/python/paddle/v2/framework/tests/mnist.py @@ -0,0 +1,249 @@ +import paddle.v2.framework.core as core +from paddle.v2.framework.op import Operator +import numpy +import paddle.v2 as paddle + +BATCH_SIZE = 100 + +scope = core.Scope() +place = core.CPUPlace() +# if you want to test GPU training, you can use gpu place +# place = core.GPUPlace(0) +dev_ctx = core.DeviceContext.create(place) + +init_net = core.Net.create() +forward_net = core.Net.create() +backward_net = None +optimize_net = core.Net.create() + + +def atomic_id(): + id = 0 + while True: + yield id + id += 1 + + +uniq_id = atomic_id().next + + +def data_layer(name, dims): + var = scope.new_var(name) + tensor = var.get_tensor() + tensor.set_dims(dims) # 1 is batch size holder. + return name + + +def feed_data(name, data): + assert isinstance(data, numpy.ndarray) + tensor = scope.find_var(name).get_tensor() + tensor.set_dims(data.shape) + if data.dtype == numpy.dtype('int32'): + tensor.alloc_int(place) + elif data.dtype == numpy.dtype('float32'): + tensor.alloc_float(place) + else: + raise ValueError("data type not supported") + tensor.set(data, place) + + +def grad_var_name(var_name): + return var_name + "@GRAD" + + +def sgd_optimizer(net, param_name, learning_rate=0.005): + grad_name = grad_var_name(param_name) + optimize_op = Operator( + "sgd", + param=param_name, + grad=grad_name, + param_out=param_name, + learning_rate=learning_rate) + net.append_op(optimize_op) + + +# should use operator and add these to the init_network +def init_param(net, param_name, dims): + scope.new_var(param_name) + op = Operator( + "uniform_random", Out=param_name, dims=dims, min=-0.5, max=0.5, seed=10) + op.infer_shape(scope) + net.append_op(op) + + +# fc_layer +def fc_layer(net, input, size, act="softmax", bias=True, param=None, name=None): + """ + Add a fc layer to net + + :param input: input variable name. + :type input: str + :param size: fully connected layer size. + :param act: activation name + :param param: parameter attribute, used for initialize parameters. + :param bias: bias attribute. False will not have a bias. + :param name: the name of fc layer. If not set, model will generate a + readable name + :return: output variable name. + """ + if name is None: + name = 'fc_%d' % uniq_id() + if not isinstance(name, str): + raise ValueError("name should be string") + + input_dims = scope.find_var(input).get_tensor().get_dims() + + w_name = param or name + ".w" + init_param(net=init_net, param_name=w_name, dims=[input_dims[1], size]) + sgd_optimizer(net=optimize_net, param_name=w_name, learning_rate=0.01) + + pre_activation = name + ".mul.out" + scope.new_var(pre_activation) + mul_op = Operator("mul", X=input, Y=w_name, Out=pre_activation) + net.append_op(mul_op) + + # create bias variable if needed + if bias: + bias_name = name + ".b" + init_param(net=init_net, param_name=bias_name, dims=[size]) + sgd_optimizer( + net=optimize_net, param_name=bias_name, learning_rate=0.001) + bias_out = name + ".rowwise_add.out" + scope.new_var(bias_out) + rowwise_append_op = Operator( + "rowwise_add", X=pre_activation, b=bias_name, Out=bias_out) + net.append_op(rowwise_append_op) + pre_activation = bias_out + + activation_op = Operator(act, X=pre_activation, Y=name) + net.append_op(activation_op) + scope.new_var(name) + net.infer_shape(scope) + return name + + +def cross_entropy_layer(net, input, label): + cost_name = 'cross_entropy_%d' % uniq_id() + cross_entropy_op = Operator( + "onehot_cross_entropy", X=input, label=label, Y=cost_name) + net.append_op(cross_entropy_op) + scope.new_var(cost_name) + net.infer_shape(scope) + return cost_name + + +def create_backward_net(forward_net): + net = core.Operator.backward(forward_net, set()) + for input in net.inputs()["all"]: + var = scope.new_var(input) + var.get_tensor() + for output in net.outputs()["all"]: + var = scope.new_var(output) + var.get_tensor() + return net + + +def debug_print_op(op): + print("===============" + op.type() + "==============") + print("***inputs:***") + for input in op.inputs()["all"]: + print input, scope.find_var(input).get_tensor().get_dims() + print("\n***outputs:***") + for output in op.outputs()["all"]: + print output, scope.find_var(output).get_tensor().get_dims() + print("") + print("") + + +def set_cost(cost): + cost_shape = numpy.array(scope.find_var(cost).get_tensor()).shape + cost_grad = \ + scope.find_var(grad_var_name(cost)).get_tensor() + cost_grad.set_dims(cost_shape) + cost_grad.alloc_float(place) + cost_grad.set(numpy.ones(cost_shape).astype("float32"), place) + + +def get_cost_mean(cost): + cost_data = numpy.array(scope.find_var(cost).get_tensor()) + return cost_data.sum() / len(cost_data) + + +def error_rate(predict, label): + predict_var = numpy.array(scope.find_var(predict).get_tensor()).argmax( + axis=1) + label = numpy.array(scope.find_var(label).get_tensor()) + error_num = numpy.sum(predict_var != label) + return error_num / float(len(label)) + + +images = data_layer(name='pixel', dims=[BATCH_SIZE, 784]) +labels = data_layer(name='label', dims=[BATCH_SIZE]) +fc1 = fc_layer(net=forward_net, input=images, size=100, act="sigmoid") +fc2 = fc_layer(net=forward_net, input=fc1, size=100, act="sigmoid") +predict = fc_layer(net=forward_net, input=fc2, size=100, act="softmax") +cost = cross_entropy_layer(net=forward_net, input=predict, label=labels) + +init_net.complete_add_op(True) +forward_net.complete_add_op(True) +backward_net = create_backward_net(forward_net) +optimize_net.complete_add_op(True) + +print(init_net) +print(forward_net) +print(backward_net) +print(optimize_net) + +debug_print_op(forward_net) +debug_print_op(backward_net) +debug_print_op(optimize_net) + +train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=8192), + batch_size=BATCH_SIZE) + + +def test(cost_name): + test_reader = paddle.batch( + paddle.dataset.mnist.test(), batch_size=BATCH_SIZE) + cost = [] + error = [] + for data in test_reader(): + image_data = numpy.array(map(lambda x: x[0], data)).astype("float32") + label_data = numpy.array(map(lambda x: x[1], data)).astype("int32") + feed_data(images, image_data) + feed_data(labels, label_data) + + forward_net.infer_shape(scope) + forward_net.run(scope, dev_ctx) + cost.append(get_cost_mean(cost_name)) + error.append(error_rate(predict, "label")) + print("cost=" + str(sum(cost) / float(len(cost))) + " error_rate=" + str( + sum(error) / float(len(error)))) + + +PASS_NUM = 1 + +init_net.run(scope, dev_ctx) +for pass_id in range(PASS_NUM): + batch_id = 0 + + for data in train_reader(): + image_data = numpy.array(map(lambda x: x[0], data)).astype("float32") + label_data = numpy.array(map(lambda x: x[1], data)).astype("int32") + feed_data(images, image_data) + feed_data(labels, label_data) + + forward_net.infer_shape(scope) + forward_net.run(scope, dev_ctx) + set_cost(cost) + backward_net.infer_shape(scope) + backward_net.run(scope, dev_ctx) + + optimize_net.run(scope, dev_ctx) + if batch_id % 100 == 0: + print("pass[" + str(pass_id) + "] batch_id[" + str(batch_id) + "]") + test(cost) + + batch_id = batch_id + 1 diff --git a/python/paddle/v2/framework/tests/test_lookup_table.py b/python/paddle/v2/framework/tests/test_lookup_table.py new file mode 100644 index 0000000000000000000000000000000000000000..19eb464baa555fb67a994f3cfb4d3ed628367c73 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_lookup_table.py @@ -0,0 +1,31 @@ +import unittest +import numpy as np +from op_test_util import OpTestMeta +from gradient_checker import GradientChecker, create_op + + +class TestSigmoidOp(unittest.TestCase): + __metaclass__ = OpTestMeta + + def setUp(self): + self.type = 'lookup_table' + table = np.random.random((17, 31)).astype('float32') + ids = np.random.randint(0, 17, 4).astype('int32') + self.inputs = {'W': table, 'Ids': ids} + self.outputs = {'Out': table[ids]} + + +class TestSigmoidGradOp(GradientChecker): + def test_grad(self): + op = create_op('lookup_table') + table = np.random.random((17, 31)).astype('float32') + ids = np.random.randint(0, 17, 4).astype('int32') + inputs = {'W': table, 'Ids': ids} + # comapre gradients + self.compare_grad(op, inputs, set(['Ids'])) + # check gradients + self.check_grad(op, inputs, set('W'), 'Out') + + +if __name__ == '__main__': + unittest.main()