From ffafc5c911c38ff1245d21c73b1bb7936df490f7 Mon Sep 17 00:00:00 2001 From: caoying03 Date: Mon, 7 Aug 2017 08:54:18 +0800 Subject: [PATCH] fix the SubNestedSequenceLayer implementations. --- .../gserver/layers/SubNestedSequenceLayer.cpp | 88 +- paddle/gserver/tests/test_LayerGrad.cpp | 3820 ++++++++--------- .../paddle/trainer_config_helpers/layers.py | 6 +- 3 files changed, 1982 insertions(+), 1932 deletions(-) diff --git a/paddle/gserver/layers/SubNestedSequenceLayer.cpp b/paddle/gserver/layers/SubNestedSequenceLayer.cpp index 443396a14d..f875fdea45 100644 --- a/paddle/gserver/layers/SubNestedSequenceLayer.cpp +++ b/paddle/gserver/layers/SubNestedSequenceLayer.cpp @@ -31,16 +31,22 @@ public: void backward(const UpdateCallback& callback = nullptr) override; private: - void calSelectedCols(const MatrixPtr scores, - const int* seqStartPos, - const int* subSeqStartPos); + void reorganizeSeqInfo(const ICpuGpuVectorPtr seqStartPos, + const ICpuGpuVectorPtr subSeqStartPos); + void calSelectedCols(const MatrixPtr selectedIndices, + const std::vector> inputSeqInfo); void buildOutputSeqInfo(); std::vector outSeqStartInfo_; std::vector outSubSeqStartInfo_; - MatrixPtr scoreOverInputSeq_; + // if the second input of this layer is on GPU memory, copy it to CPU memory. + MatrixPtr selIdsCpu_; + // reorganize sequenceStartPositions and subSequenceStartPositions altogether + // into a 2d vector to facilitate the sequence selection process. + std::vector> inputSeqInfo_; + // the final seleted row indices in a batch, // rowIdx_ and selectedRows_ actually share a same memory. IVectorPtr rowIndice_; std::vector selectedRows_; @@ -57,12 +63,47 @@ bool SubNestedSequenceLayer::init(const LayerMap& layerMap, return true; } -void SubNestedSequenceLayer::calSelectedCols(const MatrixPtr selected_indices, - const int* seqStartPos, - const int* subSeqStartPos) { +void SubNestedSequenceLayer::reorganizeSeqInfo( + const ICpuGpuVectorPtr seqStartPos, const ICpuGpuVectorPtr subSeqStartPos) { + int* seqStarts = seqStartPos->getMutableData(false); + int* subSeqStarts = subSeqStartPos->getMutableData(false); + + int seqNum = seqStartPos->getSize() - 1; + inputSeqInfo_.resize(seqNum, std::vector()); + int seqIdx = 0; + for (size_t i = 0; i < subSeqStartPos->getSize(); ++i) { + inputSeqInfo_[seqIdx].push_back(subSeqStarts[i]); + if (subSeqStarts[i] == seqStarts[seqIdx + 1]) { + seqIdx++; + if (seqIdx == seqNum) return; + inputSeqInfo_[seqIdx].push_back(subSeqStarts[i]); + } + } +} + +void SubNestedSequenceLayer::calSelectedCols( + const MatrixPtr selectedIndices, + const std::vector> inputSeqInfo) { selectedRows_.clear(); outSubSeqStartInfo_.resize(1, 0); outSeqStartInfo_.resize(1, 0); + + size_t seqNum = selectedIndices->getHeight(); + size_t beamSize = selectedIndices->getWidth(); + for (size_t i = 0; i < seqNum; ++i) { + for (size_t j = 0; j < beamSize; ++j) { + if (selectedIndices->getElement(i, j) == -1.) break; + int selSubSeqIdx = selectedIndices->getElement(i, j); + CHECK_GT(inputSeqInfo_[i].size() - 1, selSubSeqIdx); + + size_t subSeqLen = + inputSeqInfo_[i][selSubSeqIdx + 1] - inputSeqInfo_[i][selSubSeqIdx]; + for (size_t k = 0; k < subSeqLen; ++k) + selectedRows_.push_back(inputSeqInfo_[i][selSubSeqIdx] + k); + outSubSeqStartInfo_.push_back(outSubSeqStartInfo_.back() + subSeqLen); + } + outSeqStartInfo_.push_back(outSubSeqStartInfo_.back()); + } } void SubNestedSequenceLayer::buildOutputSeqInfo() { @@ -83,17 +124,35 @@ void SubNestedSequenceLayer::forward(PassType passType) { Layer::forward(passType); const Argument& inputSeq = getInput(0); - const MatrixPtr selected_indices = getInputValue(1); CHECK(inputSeq.hasSubseq()) << "The first input of SubNestSequence layer " << "must be a nested sequence."; - CHECK_EQ(inputSeq.getNumSequences(), selected_indices->getHeight()); - - calSelectedCols(selected_indices, - inputSeq.sequenceStartPositions->getMutableData(false), - inputSeq.subSequenceStartPositions->getMutableData(false)); + const MatrixPtr selectedIndices = getInputValue(1); + CHECK_EQ(inputSeq.getNumSequences(), selectedIndices->getHeight()); + + if (dynamic_cast(selectedIndices.get())) { + /* + * Currently, the second input for this layer generated by + * kmax_sequence_score_layer whose output is always stored on CPU, + * or a data_layer which canbe on GPU. + * + * If the second input is on GPU, copy it to CPU memory, because this + * input always uses very few memory, and operations related to it are + * all logic control, not computations. + */ + Matrix::resizeOrCreate(selIdsCpu_, + selectedIndices->getHeight(), + selectedIndices->getWidth(), + false /* trans */, + false /* useGpu */); + selIdsCpu_->copyFrom(*selectedIndices); + } else { + selIdsCpu_ = selectedIndices; + } + reorganizeSeqInfo(inputSeq.sequenceStartPositions, + inputSeq.subSequenceStartPositions); + calSelectedCols(selIdsCpu_, inputSeqInfo_); resetOutput(selectedRows_.size(), getSize()); - buildOutputSeqInfo(); if (useGpu_) { rowIndice_ = IVector::create(selectedRows_.size(), useGpu_); @@ -103,6 +162,7 @@ void SubNestedSequenceLayer::forward(PassType passType) { IVector::create(selectedRows_.data(), selectedRows_.size(), useGpu_); } + buildOutputSeqInfo(); getOutputValue()->selectRows(*getInputValue(0), *rowIndice_); } diff --git a/paddle/gserver/tests/test_LayerGrad.cpp b/paddle/gserver/tests/test_LayerGrad.cpp index da546b979e..0f312b6ca5 100644 --- a/paddle/gserver/tests/test_LayerGrad.cpp +++ b/paddle/gserver/tests/test_LayerGrad.cpp @@ -32,1887 +32,1872 @@ DECLARE_double(checkgrad_eps); DECLARE_bool(thread_local_rand_use_global_seed); DECLARE_bool(prev_batch_state); -// TEST(Operator, dot_mul) { -// TestConfig config; -// config.layerConfig.set_size(10); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); -// operatorConf.set_type("dot_mul"); -// operatorConf.set_dotmul_scale(-1); -// -// testOperatorGrad(config, operatorConf, 100, false, false); -// } -// -// TEST(Projection, context) { -// for (auto contextStart : {-5, -3, -1, 0, 3}) { -// for (auto contextLength : {1, 2, 5, 7}) { -// for (auto batchSize : {1, 2, 5, 20, 50}) { -// for (auto trainablePadding : {false, true}) { -// LOG(INFO) << " contextStart=" << contextStart -// << " contextLength=" << contextLength -// << " batchSize=" << batchSize -// << " trainablePadding=" << trainablePadding; -// ProjectionConfig conf; -// conf.set_type("context"); -// conf.set_input_size(10); -// conf.set_context_start(contextStart); -// conf.set_context_length(contextLength); -// conf.set_trainable_padding(trainablePadding); -// conf.set_output_size(conf.context_length() * conf.input_size()); -// int pad = -// std::max(0, -conf.context_start()) + -// std::max(0, conf.context_start() + conf.context_length() - 1); -// for (auto useGpu : {false, true}) { -// testProjectionGrad( -// conf, -// INPUT_SEQUENCE_DATA, -// trainablePadding ? conf.input_size() * pad : 0, -// batchSize, -// useGpu, -// contextStart + contextLength <= 1); // = testState -// } -// } -// } -// } -// } -// } -// -// TEST(Projection, trans_fc) { -// ProjectionConfig conf; -// conf.set_type("trans_fc"); -// conf.set_input_size(50); -// conf.set_output_size(20); -// for (auto useGpu : {false, true}) { -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ 1000, -// /* batchSize */ 100, -// useGpu); -// } -// } -// -// TEST(Projection, fc) { -// ProjectionConfig conf; -// conf.set_type("fc"); -// conf.set_input_size(10); -// conf.set_output_size(20); -// for (auto useGpu : {false, true}) { -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ 200, -// /* batchSize */ 100, -// useGpu); -// } -// } -// -// TEST(Projection, dot_mul) { -// ProjectionConfig conf; -// conf.set_type("dot_mul"); -// conf.set_input_size(20); -// conf.set_output_size(20); -// for (auto useGpu : {false, true}) { -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ 20, -// /* batchSize */ 100, -// useGpu); -// } -// } -// -// TEST(Projection, table) { -// ProjectionConfig conf; -// conf.set_type("table"); -// conf.set_input_size(10); -// conf.set_output_size(20); -// for (auto useGpu : {false, true}) { -// testProjectionGrad(conf, -// INPUT_LABEL, -// /* parameterSize */ 200, -// /* batchSize */ 100, -// useGpu); -// } -// } -// -// TEST(Projection, identity) { -// ProjectionConfig conf; -// conf.set_type("identity"); -// conf.set_input_size(10); -// conf.set_output_size(10); -// for (auto useGpu : {false, true}) { -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ 0, -// /* batchSize */ 100, -// useGpu); -// } -// } -// -// TEST(Projection, slice) { -// ProjectionConfig conf; -// conf.set_type("slice"); -// conf.set_input_size(100); -// SliceConfig& slice1 = *conf.add_slices(); -// slice1.set_start(10); -// slice1.set_end(20); -// SliceConfig& slice2 = *conf.add_slices(); -// slice2.set_start(50); -// slice2.set_end(70); -// conf.set_output_size(30); -// for (auto useGpu : {false, true}) { -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ 0, -// /* batchSize */ 10, -// useGpu); -// } -// } -// -// TEST(Projection, scaling) { -// ProjectionConfig conf; -// conf.set_type("scaling"); -// conf.set_input_size(10); -// conf.set_output_size(10); -// for (auto useGpu : {false}) { -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ 1, -// /* batchSize */ 100, -// useGpu); -// } -// } -// -// void testProjectionConv(size_t groups, bool isDeconv) { -// const int NUM_FILTERS = 18; -// const int FILTER_SIZE = 2; -// const int FILTER_SIZE_Y = 4; -// const int CHANNELS = 3; -// const int IMAGE_SIZE = 16; -// -// ProjectionConfig conf; -// if (isDeconv) { -// conf.set_type("convt"); -// } else { -// conf.set_type("conv"); -// } -// conf.set_num_filters(NUM_FILTERS); -// -// ConvConfig* conv = conf.mutable_conv_conf(); -// conv->set_filter_size(FILTER_SIZE); -// conv->set_filter_size_y(FILTER_SIZE_Y); -// conv->set_channels(CHANNELS); -// conv->set_padding(0); -// conv->set_padding_y(1); -// conv->set_stride(2); -// conv->set_stride_y(2); -// conv->set_groups(groups); -// if (isDeconv) { -// conv->set_filter_channels(NUM_FILTERS / conv->groups()); -// } else { -// conv->set_filter_channels(conv->channels() / conv->groups()); -// } -// conv->set_img_size(IMAGE_SIZE); -// int output_x = outputSize(conv->img_size(), -// conv->filter_size(), -// conv->padding(), -// conv->stride(), -// /* caffeMode */ true); -// int output_y = outputSize(conv->img_size(), -// conv->filter_size_y(), -// conv->padding_y(), -// conv->stride_y(), -// /* caffeMode */ true); -// conv->set_output_x(output_x); -// conv->set_output_y(output_y); -// if (isDeconv) { -// conf.set_input_size(output_x * output_y * CHANNELS); -// conf.set_output_size(IMAGE_SIZE * IMAGE_SIZE * NUM_FILTERS); -// } else { -// conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS); -// conf.set_output_size(output_x * output_y * NUM_FILTERS); -// } -// -// testProjectionGrad(conf, -// INPUT_DATA, -// /* parameterSize */ NUM_FILTERS * CHANNELS * FILTER_SIZE -// * -// FILTER_SIZE_Y / groups, -// /* batchSize */ 100, -// true, -// false, -// NUM_FILTERS, -// true); -// } -// -// #ifndef PADDLE_ONLY_CPU -// TEST(Projection, conv) { -// /// test ConvProjection -// testProjectionConv(1, false); -// testProjectionConv(3, false); -// /// test ConvTransProjection -// testProjectionConv(1, true); -// testProjectionConv(3, true); -// } -// #endif -// -// TEST(Layer, BilinearInterpLayer) { -// TestConfig config; -// config.layerConfig.set_type("bilinear_interp"); -// config.biasSize = 0; -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); -// -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// BilinearInterpConfig* bilinear = input->mutable_bilinear_interp_conf(); -// ImageConfig* image = bilinear->mutable_image_conf(); -// image->set_img_size(32); -// image->set_img_size_y(32); -// image->set_channels(4); -// -// for (auto useGpu : {false, true}) { -// for (auto outSize : {32, 64}) { -// bilinear->set_out_size_x(outSize); -// bilinear->set_out_size_y(outSize); -// testLayerGrad(config, "bilinear_interp", 10, false, useGpu); -// } -// } -// } -// -// TEST(Layer, concat) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("concat"); -// config.layerConfig.set_size(15); -// config.layerConfig.set_active_type("sigmoid"); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "concat", 100, false, useGpu); -// } -// } -// -// TEST(Layer, AddtoLayer) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("addto"); -// config.layerConfig.set_size(10); -// config.layerConfig.set_active_type("sigmoid"); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "addto", 100, false, useGpu); -// } -// } -// -// TEST(Layer, CTCLayer) { -// TestConfig config; -// config.layerConfig.set_type("ctc"); -// config.layerConfig.set_norm_by_times(false); -// config.layerConfig.set_size(10); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); -// config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, -// "ctc", -// 100, -// /* trans */ false, /* useGpu */ -// useGpu); -// } -// } -// -// TEST(Layer, cosSimLayer) { -// TestConfig config; -// config.layerConfig.set_type("cos"); -// config.layerConfig.set_size(1); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 50, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "cos", 100, false, useGpu); -// } -// } -// -// TEST(Layer, CosSimVecMatLayer) { -// TestConfig config; -// config.layerConfig.set_type("cos_vm"); -// config.layerConfig.set_size(5); // output size -// config.layerConfig.set_cos_scale(2.0); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 0}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "cos_vm", 100, false, useGpu); -// } -// } -// -// void testDepthwiseConvLayer(const string& type, bool useGpu) { -// TestConfig config; -// config.biasSize = 32; -// config.layerConfig.set_type(type); -// config.layerConfig.set_num_filters(32); -// config.layerConfig.set_partial_sum(1); -// config.layerConfig.set_shared_biases(true); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 2048, 192}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// ConvConfig* conv = input->mutable_conv_conf(); -// conv->set_filter_size(2); -// conv->set_filter_size_y(3); -// conv->set_channels(16); -// conv->set_padding(0); -// conv->set_padding_y(1); -// conv->set_stride(2); -// conv->set_stride_y(2); -// conv->set_groups(16); -// conv->set_filter_channels(conv->channels() / conv->groups()); -// conv->set_img_size(16); -// conv->set_img_size_y(8); -// conv->set_output_x(outputSize(conv->img_size(), -// conv->filter_size(), -// conv->padding(), -// conv->stride(), -// /* caffeMode */ true)); -// conv->set_output_y(outputSize(conv->img_size_y(), -// conv->filter_size_y(), -// conv->padding_y(), -// conv->stride_y(), -// /* caffeMode */ true)); -// config.layerConfig.set_size(conv->output_x() * conv->output_y() * -// config.layerConfig.num_filters()); -// -// testLayerGrad(config, "depthwise_conv", 100, false, useGpu); -// // Use small batch_size and useWeight=true to test biasGrad -// testLayerGrad(config, "depthwise_conv", 2, false, useGpu, true, 0.02); -// } -// -// TEST(Layer, depthwiseConvLayer) { -// // 'depthwise_conv' is a sepecial case of 'exconv' whose -// // groups size equals to the input channels size. -// testDepthwiseConvLayer("exconv", /* useGpu= */ false); -// #ifndef PADDLE_ONLY_CPU -// testDepthwiseConvLayer("exconv", /* useGpu= */ true); -// #endif -// } -// -// void testConvLayer(const string& type, bool trans, bool useGpu) { -// TestConfig config; -// config.biasSize = 16; -// config.layerConfig.set_type(type); -// config.layerConfig.set_num_filters(16); -// config.layerConfig.set_partial_sum(1); -// config.layerConfig.set_shared_biases(true); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 384, 288}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// ConvConfig* conv = input->mutable_conv_conf(); -// conv->set_filter_size(2); -// conv->set_filter_size_y(3); -// conv->set_channels(3); -// conv->set_padding(0); -// conv->set_padding_y(1); -// conv->set_stride(2); -// conv->set_stride_y(2); -// conv->set_groups(1); -// conv->set_filter_channels(conv->channels() / conv->groups()); -// conv->set_img_size(16); -// conv->set_img_size_y(8); -// conv->set_output_x(outputSize(conv->img_size(), -// conv->filter_size(), -// conv->padding(), -// conv->stride(), -// /* caffeMode */ true)); -// conv->set_output_y(outputSize(conv->img_size_y(), -// conv->filter_size_y(), -// conv->padding_y(), -// conv->stride_y(), -// /* caffeMode */ true)); -// config.layerConfig.set_size(conv->output_x() * conv->output_y() * -// config.layerConfig.num_filters()); -// -// testLayerGrad(config, "conv", 100, trans, useGpu); -// // Use small batch_size and useWeight=true to test biasGrad -// testLayerGrad(config, "conv", 2, trans, useGpu, true, 0.02); -// } -// -// TEST(Layer, convLayer) { -// testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false); -// #ifndef PADDLE_ONLY_CPU -// testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true); -// testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true); -// #endif -// } -// -// void testConvTransLayer(const string& type, bool trans, bool useGpu) { -// TestConfig config; -// config.biasSize = 3; -// config.layerConfig.set_type(type); -// config.layerConfig.set_num_filters(3); -// config.layerConfig.set_partial_sum(1); -// config.layerConfig.set_shared_biases(true); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// ConvConfig* conv = input->mutable_conv_conf(); -// conv->set_filter_size(2); -// conv->set_filter_size_y(4); -// conv->set_channels(16); -// conv->set_padding(0); -// conv->set_padding_y(1); -// conv->set_stride(2); -// conv->set_stride_y(2); -// conv->set_groups(1); -// conv->set_filter_channels(3 / conv->groups()); -// conv->set_img_size(16); -// conv->set_output_x(outputSize(conv->img_size(), -// conv->filter_size(), -// conv->padding(), -// conv->stride(), -// /* caffeMode */ true)); -// -// config.layerConfig.set_size(conv->img_size() * conv->img_size() * -// config.layerConfig.num_filters()); -// -// testLayerGrad(config, "convTrans", 100, trans, useGpu); -// // Use small batch_size and useWeight=true to test biasGrad -// testLayerGrad(config, "convTrans", 2, trans, useGpu, true, 0.02); -// } -// -// TEST(Layer, convTransLayer) { -// for (auto useGpu : {false, true}) { -// testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); -// } -// #ifndef PADDLE_ONLY_CPU -// testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true); -// #endif -// } -// -// TEST(Layer, blockExpandLayer) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("blockexpand"); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 6144, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// BlockExpandConfig* blockExpand = input->mutable_block_expand_conf(); -// blockExpand->set_img_size_x(64); -// blockExpand->set_img_size_y(32); -// blockExpand->set_channels(3); -// blockExpand->set_padding_x(0); -// blockExpand->set_padding_y(0); -// blockExpand->set_block_x(4); -// blockExpand->set_block_y(32); -// blockExpand->set_stride_x(2); -// blockExpand->set_stride_y(2); -// blockExpand->set_output_x(outputSize(blockExpand->img_size_x(), -// blockExpand->block_x(), -// blockExpand->padding_x(), -// blockExpand->stride_x(), -// /* caffeMode */ false)); -// blockExpand->set_output_y(outputSize(blockExpand->img_size_y(), -// blockExpand->block_y(), -// blockExpand->padding_y(), -// blockExpand->stride_y(), -// /* caffeMode */ false)); -// config.layerConfig.set_size(blockExpand->block_x() * blockExpand->block_y() -// * -// blockExpand->channels()); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "blockexpand", 100, false, useGpu); -// } -// } -// -// TEST(Layer, maxoutLayer) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("maxout"); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// MaxOutConfig* maxout = input->mutable_maxout_conf(); -// ImageConfig* image = maxout->mutable_image_conf(); -// -// image->set_img_size(32); -// image->set_img_size_y(32); -// image->set_channels(4); -// maxout->set_groups(2); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "maxout", 10, false, useGpu); -// } -// } -// void testFcLayer(string format, size_t nnz) { -// TestConfig config; -// config.biasSize = 4096; -// config.layerConfig.set_type("fc"); -// config.layerConfig.set_size(4096); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_drop_rate(0.1); -// -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_0", 8192, nnz, ParaSparse(format)}); -// config.layerConfig.add_inputs(); -// -// LOG(INFO) << config.inputDefs[0].sparse.sparse << " " -// << config.inputDefs[0].sparse.format; -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, -// "fc", -// 100, -// /* trans */ false, -// useGpu, -// /* weight */ true); -// } -// } -// -// TEST(Layer, fcLayer) { -// testFcLayer("", 4096 * 4096 * 2); -// testFcLayer("csc", 4096 * 40); -// testFcLayer("csr", 4096 * 40); -// } -// -// TEST(Layer, SelectiveFullyConnectedLayer) { -// TestConfig config; -// size_t nin = 16; -// size_t nout = 256; -// config.layerConfig.set_type("selective_fc"); -// config.layerConfig.set_size(nout); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_has_selected_colums(true); -// config.layerConfig.set_selective_fc_pass_generation(false); -// config.biasSize = nout; -// -// config.inputDefs.push_back({INPUT_DATA, "input0", nin, nin * nout}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back( -// {INPUT_SPARSE_NON_VALUE_DATA, "index", nout, 0, ParaSparse("csr", -// true)}); -// config.layerConfig.add_inputs(); -// -// testLayerGrad(config, -// "selective_fc", -// 100, -// /* trans= */ false, -// /* useGup= */ false, -// false); -// #ifndef PADDLE_ONLY_CPU -// testLayerGrad(config, -// "selective_fc", -// 100, -// /* trans= */ false, -// /* useGup= */ true, -// false); -// #endif -// } -// -// TEST(Layer, DataNormLayer) { -// TestConfig config; -// config.layerConfig.set_type("data_norm"); -// config.layerConfig.set_size(20); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 100}); -// config.inputDefs.back().isStatic = true; -// config.layerConfig.add_inputs(); -// -// for (auto strategy : {"z-score", "min-max", "decimal-scaling"}) { -// config.layerConfig.set_data_norm_strategy(strategy); -// // The parameters are static, so not support GPU now -// testLayerGrad(config, -// "data_norm", -// 200, -// /* trans */ false, -// /* useGpu */ false); -// } -// } -// -// TEST(Layer, hsigmoidLayer) { -// TestConfig config; -// config.layerConfig.set_type("hsigmoid"); -// config.layerConfig.set_num_classes(5); -// config.layerConfig.set_size(1); -// config.biasSize = config.layerConfig.num_classes() - 1; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 200}); -// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 5, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// // Not support GPU now -// testLayerGrad(config, -// "hsigmoid", -// 100, -// /* trans */ false, /* useGpu */ -// false); -// } -// -// TEST(Layer, multi_cross) { -// TestConfig config; -// config.layerConfig.set_type("multi-class-cross-entropy"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad( -// config, "multi-class-cross-entropy", 100, /* trans */ false, useGpu); -// } -// } -// -// TEST(Layer, multi_binary_label_sparse_mat) { -// TestConfig config; -// config.layerConfig.set_type("multi_binary_label_cross_entropy"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, -// 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, -// "multi_binary_label_cross_entropy", -// 100, -// /* trans */ false, -// useGpu); -// } -// } -// -// TEST(layer, multi_binary_label_id) { -// TestConfig config; -// config.layerConfig.set_type("multi_binary_label_cross_entropy"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, -// "multi_binary_label_cross_entropy", -// 100, -// /* trans */ false, -// useGpu); -// } -// } -// -// TEST(Layer, multi_cross_with_selfnorm) { -// TestConfig config; -// config.layerConfig.set_type("multi_class_cross_entropy_with_selfnorm"); -// config.layerConfig.set_softmax_selfnorm_alpha(0.1); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// // Not support GPU now -// testLayerGrad(config, -// "multi_class_cross_entropy_with_selfnorm", -// 100, -// /* trans */ false, -// /* useGpu */ false); -// } -// -// TEST(Layer, multi_cross_soft) { -// TestConfig config; -// config.layerConfig.set_type("soft_binary_class_cross_entropy"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, -// "soft_binary_class_cross_entropy", -// 100, -// /* trans */ false, -// useGpu); -// } -// } -// -// TEST(Layer, square_error) { -// TestConfig config; -// config.layerConfig.set_type("square_error"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); -// } -// } -// -// TEST(Layer, sparse_square_error) { -// TestConfig config; -// config.layerConfig.set_type("square_error"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, -// 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// // "GpuSparseMatrix" as label is not supported -// testLayerGrad(config, -// "square_error", -// 100, -// /* trans */ false, -// /* useGpu */ false); -// } -// -// TEST(Layer, sparse_float_square_error) { -// TestConfig config; -// config.layerConfig.set_type("square_error"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); -// config.inputDefs.push_back({INPUT_SPARSE_FLOAT_VALUE_DATA, "layer_1", 50, -// 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// // "GpuSparseMatrix" as label is not supported -// testLayerGrad(config, -// "square_error", -// 100, -// /* trans */ false, -// /* useGpu */ false); -// } -// -// TEST(Layer, square_error_weighted) { -// TestConfig config; -// config.layerConfig.set_type("square_error"); -// config.biasSize = 0; -// config.testAccumulate = false; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); -// } -// } -// -// TEST(Layer, huber_two_class) { -// TestConfig config; -// config.layerConfig.set_type("huber"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.inputDefs.push_back({INPUT_LABEL, "layer_1", 2, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "huber", 100, /* trans */ false, useGpu); -// } -// } -// -// void testExpandLayer(string trans_type, bool hasSubseq) { -// TestConfig config; -// config.layerConfig.set_type("expand"); -// -// config.inputDefs.push_back( -// {trans_type == "non-seq" ? INPUT_DENSE_DIM_DATA : INPUT_SEQUENCE_DATA, -// "layer_0", -// 10, -// 0}); -// config.inputDefs.push_back( -// {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, -// "layer_1", -// 10, -// 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.set_trans_type(trans_type); -// LOG(INFO) << " trans_type=" << trans_type << " hasSubseq=" << hasSubseq; -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "expand", 30, false, useGpu); -// } -// } -// -// TEST(Layer, ExpandLayer) { -// testExpandLayer("non-seq", false); // non-seq expand to seq -// testExpandLayer("non-seq", true); // non-seq expand to hasSubseq -// testExpandLayer("seq", true); // seq expand to hasSubseq -// } -// -// void testDegradeLayer(bool hasSubseq, -// string layer_type, -// string trans_type, -// int stride) { -// TestConfig config; -// config.layerConfig.set_type(layer_type); -// config.layerConfig.set_size(10); -// config.layerConfig.set_seq_pool_stride(stride); -// config.biasSize = 0; -// -// config.inputDefs.push_back( -// {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, -// "layer_0", -// 10, -// 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.set_trans_type(trans_type); -// -// auto testDegradeLayerGrad = [](TestConfig& config, string layer_type) { -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, layer_type, 100, false, useGpu); -// } -// }; -// -// if (layer_type == "average") { -// for (auto strategy : {"average", "sum", "squarerootn"}) { -// LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type -// << " average_strategy=" << strategy -// << " seq_pool_stride=" << stride; -// config.layerConfig.set_average_strategy(strategy); -// testDegradeLayerGrad(config, layer_type); -// } -// } else { -// LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type -// << " seq_pool_stride=" << stride; -// testDegradeLayerGrad(config, layer_type); -// } -// } -// -// TEST(Layer, MaxLayer) { -// testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq -// testDegradeLayer(false, -// "max", -// "non-seq", -// 5); // seq max to a shorten seq, stride window = 5 -// testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq -// testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq -// } -// -// TEST(Layer, SequenceLastInstanceLayer) { -// testDegradeLayer(false, -// "seqlastins", -// "non-seq", -// -1); // seq seqlastins to non-seq -// testDegradeLayer(false, -// "seqlastins", -// "non-seq", -// 5); // seq seqlastins to a shorten seq, stride window = 5 -// testDegradeLayer(true, -// "seqlastins", -// "non-seq", -// -1); // hasSubseq seqlastins to non-seq -// testDegradeLayer( -// true, "seqlastins", "seq", -1); // hasSubseq seqlastins to seq -// } -// -// TEST(Layer, AverageLayer) { -// testDegradeLayer(false, "average", "non-seq", -1); // seq average to -// non-seq -// testDegradeLayer(false, -// "average", -// "non-seq", -// 5); // seq average to a shorten seq, stride window = 5 -// testDegradeLayer( -// true, "average", "non-seq", -1); // hasSubseq average to -// non-seq -// testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq -// } -// -// TEST(Layer, SequenceConcatLayer) { -// TestConfig config; -// config.layerConfig.set_type("seqconcat"); -// config.layerConfig.set_size(10); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "seqconcat", 100, false, useGpu); -// } -// } -// -// TEST(Layer, SequenceReshapeLayer) { -// TestConfig config; -// config.layerConfig.set_type("seqreshape"); -// config.layerConfig.set_size(10); -// -// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 100, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "seqreshape", 100, false, useGpu); -// } -// } -// -// TEST(Layer, ConvShiftLayer) { -// TestConfig config; -// config.layerConfig.set_type("conv_shift"); -// config.layerConfig.set_size(10); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// // Not support GPU now -// testLayerGrad(config, "conv_shift", 100, false, false); -// } -// -// TEST(Layer, PowerLayer) { -// TestConfig config; -// config.layerConfig.set_type("power"); -// config.layerConfig.set_size(10); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "power", 100, false, useGpu); -// } -// } -// -// TEST(Layer, ConvexCombinationLayer) { -// TestConfig config; -// config.layerConfig.set_type("convex_comb"); -// config.layerConfig.set_size(20); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "convex_comb", 100, false, useGpu); -// } -// } -// -// TEST(Layer, InterpolationLayer) { -// TestConfig config; -// config.layerConfig.set_type("interpolation"); -// config.layerConfig.set_size(10); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_2", 10, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "interpolation", 100, false, useGpu); -// } -// } -// -// TEST(Layer, OuterProdLayer) { -// TestConfig config; -// config.layerConfig.set_type("out_prod"); -// config.layerConfig.set_size(100); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "out_prod", 100, false, useGpu); -// } -// } -// -// TEST(Layer, SlopeInterceptLayer) { -// TestConfig config; -// config.layerConfig.set_type("slope_intercept"); -// config.layerConfig.set_size(10); -// config.layerConfig.set_slope(1.0); -// config.layerConfig.set_intercept(0.1); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "slope_intercept", 100, false, useGpu); -// } -// } -// -// TEST(Layer, ScalingLayer) { -// TestConfig config; -// config.layerConfig.set_type("scaling"); -// config.layerConfig.set_size(10); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.layerConfig.add_inputs(); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "scaling", 100, false, useGpu); -// } -// } -// -// void testNormLayer(const string& normType, bool trans, bool useGpu) { -// TestConfig config; -// config.layerConfig.set_type("norm"); -// config.layerConfig.set_active_type("relu"); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1568, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// NormConfig* norm = input->mutable_norm_conf(); -// norm->set_norm_type(normType); -// norm->set_channels(16); -// norm->set_size(5); -// norm->set_scale(0.001); -// norm->set_pow(0.75); -// norm->set_blocked(0); -// norm->set_img_size(14); -// norm->set_img_size_y(7); -// norm->set_output_x(norm->img_size()); -// norm->set_output_y(norm->img_size_y()); -// if (norm->norm_type() == "cmrnorm" || -// norm->norm_type() == "cmrnorm-projection") { -// norm->set_scale(norm->scale() / norm->size()); -// } else { -// norm->set_scale(norm->scale() / (norm->size() * norm->size())); -// } -// -// config.layerConfig.set_size(norm->output_x() * norm->output_y() * -// norm->channels()); -// config.biasSize = 0; -// -// testLayerGrad(config, "norm", 100, trans, useGpu); -// } -// -// TEST(Layer, NormLayer) { -// testNormLayer("cmrnorm-projection", -// /* trans= */ false, /* useGpu= */ -// true); -// testNormLayer("cmrnorm-projection", -// /* trans= */ false, /* useGpu= */ -// false); -// } -// -// void setPoolConfig(TestConfig* config, -// PoolConfig* pool, -// const string& poolType) { -// (*config).biasSize = 0; -// (*config).layerConfig.set_type("pool"); -// (*config).layerConfig.set_num_filters(16); -// -// int kw = 3, kh = 3; -// int pw = 0, ph = 0; -// int sw = 2, sh = 2; -// pool->set_pool_type(poolType); -// pool->set_channels(16); -// pool->set_size_x(kw); -// pool->set_size_y(kh); -// pool->set_start(0); -// pool->set_padding(pw); -// pool->set_padding_y(ph); -// pool->set_stride(sw); -// pool->set_stride_y(sh); -// -// int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); -// int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); -// pool->set_output_x(ow); -// pool->set_output_y(oh); -// } -// -// void testPoolLayer(const string& poolType, bool trans, bool useGpu) { -// TestConfig config; -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 3136, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// PoolConfig* pool = input->mutable_pool_conf(); -// -// pool->set_img_size(14); -// pool->set_img_size_y(14); -// setPoolConfig(&config, pool, poolType); -// config.layerConfig.set_size(pool->output_x() * pool->output_y() * -// pool->channels()); -// -// testLayerGrad(config, "pool", 100, trans, useGpu); -// } -// -// #ifndef PADDLE_ONLY_CPU -// void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { -// TestConfig config; -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// PoolConfig* pool = input->mutable_pool_conf(); -// -// pool->set_size_y(4); -// pool->set_stride_y(3); -// pool->set_img_size(10); -// pool->set_img_size_y(20); -// setPoolConfig(&config, pool, poolType); -// pool->set_output_y((pool->img_size_y() - pool->start() - pool->size_y()) / -// ((float)pool->stride_y()) + -// 1.5); -// config.layerConfig.set_size(pool->output_x() * pool->output_y() * -// pool->channels()); -// -// testLayerGrad(config, "pool", 100, trans, useGpu); -// } -// #endif -// -// TEST(Layer, PoolLayer) { -// testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); -// testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); -// -// #ifndef PADDLE_ONLY_CPU -// testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); -// testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true); -// testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); -// testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); -// testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); -// testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); -// #endif -// } -// -// void testSppLayer(const string& poolType, -// const int pyramidHeight, -// bool trans, -// bool useGpu) { -// TestConfig config; -// config.layerConfig.set_type("spp"); -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// SppConfig* sppConfig = input->mutable_spp_conf(); -// sppConfig->set_pool_type(poolType); -// sppConfig->set_pyramid_height(pyramidHeight); -// ImageConfig* imageConfig = sppConfig->mutable_image_conf(); -// imageConfig->set_channels(16); -// imageConfig->set_img_size(10); -// imageConfig->set_img_size_y(20); -// int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1); -// config.layerConfig.set_size(outputSize * imageConfig->channels()); -// testLayerGrad(config, "spp", 100, trans, useGpu); -// } -// -// TEST(Layer, SpatialPyramidPoolLayer) { -// for (auto useGpu : {false, true}) { -// for (auto pyramidHeight : {1, 2, 3}) { -// testSppLayer("avg-projection", pyramidHeight, false, useGpu); -// testSppLayer("max-projection", pyramidHeight, false, useGpu); -// } -// } -// } -// -// TEST(Layer, rankCostLayer) { -// TestConfig config; -// config.layerConfig.set_type("rank-cost"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "rank-cost", 100, false, useGpu); -// } -// } -// -// TEST(Layer, sumCostLayer) { -// TestConfig config; -// config.layerConfig.set_type("sum_cost"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "sum_cost", 100, false, useGpu); -// } -// } -// -// TEST(Layer, weightedRankCostLayer) { -// TestConfig config; -// config.layerConfig.set_type("rank-cost"); -// config.biasSize = 0; -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_3", 1, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "weighted-rank-cost", 100, false, useGpu); -// } -// } -// -// TEST(Layer, TensorLayer) { -// TestConfig config; -// config.layerConfig.set_type("tensor"); -// config.layerConfig.set_size(10); -// config.layerConfig.set_active_type("sigmoid"); -// config.biasSize = config.layerConfig.size(); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 250}); -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 5, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "tensor", 100, false, useGpu); -// } -// } -// -// TEST(Layer, RecurrentLayer) { -// TestConfig config; -// config.layerConfig.set_type("recurrent"); -// config.layerConfig.set_size(4); -// config.layerConfig.set_active_type("tanh"); -// config.biasSize = 4; -// -// config.inputDefs.push_back( -// {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 4, /* paraSize= */ 16}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// for (auto reversed : {false, true}) { -// config.layerConfig.set_reversed(reversed); -// config.testState = !reversed; -// testLayerGrad(config, "recurrent", 50, /* trans= */ false, useGpu); -// } -// } -// } -// -// TEST(Layer, LstmLayer) { -// TestConfig config; -// config.layerConfig.set_type("lstmemory"); -// config.layerConfig.set_size(4); -// config.layerConfig.set_active_type("tanh"); -// config.layerConfig.set_active_state_type("sigmoid"); -// config.layerConfig.set_active_gate_type("sigmoid"); -// config.biasSize = 28; -// -// config.inputDefs.push_back( -// {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 64}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// for (auto reversed : {false, true}) { -// config.layerConfig.set_reversed(reversed); -// config.testState = !reversed; -// testLayerGrad(config, "lstmemory", 100, /* trans= */ false, useGpu); -// } -// } -// for (auto useGpu : {true}) { -// config.testBatchState = true; -// config.layerConfig.set_reversed(false); -// testLayerGrad(config, "lstmemory", 10, /* trans= */ false, useGpu); -// } -// } -// -// TEST(Layer, MDLstmLayer) { -// TestConfig config; -// config.layerConfig.set_type("mdlstmemory"); -// config.layerConfig.set_size(4); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_active_state_type("sigmoid"); -// config.layerConfig.set_active_gate_type("sigmoid"); -// config.biasSize = 4 * 9; -// -// config.inputDefs.push_back( -// {INPUT_SEQUENCE_MDIM_DATA, "layer_0", 4 * 5, 4 * 4 * 5}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_directions(true); -// config.layerConfig.add_directions(true); -// -// for (auto useGpu : {false, true}) { -// for (int i = 0; i < 2; i++) { -// for (int j = 0; j < 2; j++) { -// config.layerConfig.set_directions(0, bool(i)); -// config.layerConfig.set_directions(1, bool(j)); -// testLayerGrad(config, "mdlstmemory", 100, false, useGpu); -// } -// } -// } -// } -// -// TEST(Layer, ParameterReluLayer) { -// auto testParameterReluLayer = [&](size_t inputSize, size_t channels) { -// TestConfig config; -// config.layerConfig.set_type("prelu"); -// config.inputDefs.push_back({INPUT_DATA, "layer_0", inputSize, channels}); -// config.layerConfig.add_inputs(); -// config.layerConfig.set_size(inputSize); -// config.layerConfig.set_partial_sum(inputSize / -// channels); // size of feature map -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "prelu", 100, false, useGpu); -// } -// }; -// -// testParameterReluLayer(192, 1); -// testParameterReluLayer(192, 3); -// testParameterReluLayer(192, 192); -// } -// -// TEST(Layer, ResizeLayer) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("resize"); -// config.layerConfig.set_size(64); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 16, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "resize", 100, false, useGpu); -// } -// } -// -// TEST(Layer, RotateLayer) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("rotate"); -// const int CHANNEL = 2; -// const int HEIGHT = 8; -// const int WIDTH = 4; -// const int INPUT_SIZE = HEIGHT * WIDTH * CHANNEL; -// config.layerConfig.set_size(INPUT_SIZE); -// config.layerConfig.set_height(HEIGHT); -// config.layerConfig.set_width(WIDTH); -// config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "rotate", 100, false, useGpu); -// } -// } -// -// TEST(Layer, NCELayer) { -// TestConfig config; -// size_t numClasses = 4; -// config.layerConfig.set_type("nce"); -// config.layerConfig.set_size(1); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_num_classes(numClasses); -// config.biasSize = numClasses; -// -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 16 * -// numClasses}); -// config.inputDefs.push_back( -// {INPUT_LABEL, "label", /* dim= */ numClasses, /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto withWeight : {false, true}) { -// if (withWeight) { -// config.inputDefs.push_back( -// {INPUT_DATA_TARGET, "weight", /* dim= */ 1, /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// } -// -// for (auto isIdLabel : {false, true}) { -// config.inputDefs[1] = { -// isIdLabel ? INPUT_LABEL : INPUT_SPARSE_NON_VALUE_DATA, -// "label", -// /* dim= */ numClasses, -// /* paraSize= */ 0}; -// -// for (auto withDist : {false, true}) { -// config.layerConfig.clear_neg_sampling_dist(); -// if (withDist) { -// double sum = 0; -// for (size_t i = 0; i < numClasses; ++i) { -// real p = rand(); // NOLINT use rand_r -// config.layerConfig.add_neg_sampling_dist(p); -// sum += p; -// } -// for (size_t i = 0; i < numClasses; ++i) { -// real p = config.layerConfig.neg_sampling_dist(i) / sum; -// config.layerConfig.set_neg_sampling_dist(i, p); -// } -// } -// LOG(INFO) << "NCELayer " -// << " isIdLabel=" << isIdLabel << " withWeight=" << -// withWeight -// << " withDist=" << withDist; -// // Not support GPU now -// testLayerGrad(config, -// "nce", -// 100, -// /* trans= */ false, -// /* useGpu */ false); -// } -// } -// } -// } -// -// TEST(Layer, GatedRecurrentLayer) { -// TestConfig config; -// config.layerConfig.set_type("gated_recurrent"); -// config.layerConfig.set_size(4); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_active_gate_type("sigmoid"); -// config.biasSize = 12; -// -// config.inputDefs.push_back( -// {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// for (auto reversed : {false, true}) { -// config.layerConfig.set_reversed(reversed); -// config.testState = !reversed; -// testLayerGrad(config, "gated_recurrent", 100, /* trans= */ false, -// useGpu); -// } -// } -// } -// -// TEST(Layer, GruStepLayer) { -// TestConfig config; -// config.layerConfig.set_type("gru_step"); -// config.layerConfig.set_size(4); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_active_gate_type("sigmoid"); -// config.biasSize = 12; -// -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "gruStep", 100, /* trans= */ false, useGpu); -// } -// } -// -// TEST(Layer, LstmStepLayer) { -// TestConfig config; -// config.layerConfig.set_type("lstm_step"); -// config.layerConfig.set_size(4); -// config.layerConfig.set_active_type("sigmoid"); -// config.layerConfig.set_active_state_type("sigmoid"); -// config.layerConfig.set_active_gate_type("sigmoid"); -// config.biasSize = 12; -// config.testAccumulate = false; -// -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 0}); -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "lstmStep", 100, /* trans= */ false, useGpu); -// } -// } -// -// void testBatchNormLayer(const string& type, bool trans, bool useGpu) { -// TestConfig config; -// const int CHANNELS = 10; -// const int IMG_SIZE = 16; -// const int IMG_SIZE_Y = 8; -// size_t size = CHANNELS * IMG_SIZE * IMG_SIZE_Y; -// config.layerConfig.set_type(type); -// config.layerConfig.set_size(size); -// config.layerConfig.set_active_type("sigmoid"); -// config.biasSize = CHANNELS; -// config.inputDefs.push_back({INPUT_DATA, -// "layer_0", -// /* dim= */ size, -// /* paraSize= */ CHANNELS}); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_1_running_mean", 1, -// CHANNELS}); -// config.inputDefs.back().isStatic = true; -// config.inputDefs.push_back({INPUT_DATA, "layer_2_running_var", 1, -// CHANNELS}); -// config.inputDefs.back().isStatic = true; -// -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// ImageConfig* img_conf = input->mutable_image_conf(); -// img_conf->set_channels(CHANNELS); -// img_conf->set_img_size(IMG_SIZE); -// img_conf->set_img_size_y(IMG_SIZE_Y); -// -// testLayerGrad(config, -// "batch_norm", -// 64, -// /* trans= */ trans, -// useGpu, -// /* useWeight */ true); -// } -// -// TEST(Layer, BatchNormalizationLayer) { -// testBatchNormLayer("batch_norm", false, false); -// #ifndef PADDLE_ONLY_CPU -// testBatchNormLayer("batch_norm", false, true); -// if (hl_get_cudnn_lib_version() >= int(4000)) { -// testBatchNormLayer("cudnn_batch_norm", false, true); -// } -// #endif -// } -// -// void testConvOperator(bool isDeconv) { -// TestConfig config; -// const int NUM_FILTERS = 16; -// const int FILTER_SIZE = 2; -// const int FILTER_SIZE_Y = 3; -// const int CHANNELS = 3; -// const int IMAGE_SIZE = 16; -// const int IMAGE_SIZE_Y = 9; -// OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); -// if (isDeconv) { -// operatorConf.set_type("convt"); -// } else { -// operatorConf.set_type("conv"); -// } -// ConvConfig* conv = operatorConf.mutable_conv_conf(); -// operatorConf.set_num_filters(NUM_FILTERS); -// conv->set_filter_size(FILTER_SIZE); -// conv->set_filter_size_y(FILTER_SIZE_Y); -// conv->set_channels(CHANNELS); -// conv->set_padding(0); -// conv->set_padding_y(1); -// conv->set_stride(2); -// conv->set_stride_y(2); -// conv->set_groups(1); -// conv->set_img_size(IMAGE_SIZE); -// conv->set_img_size_y(IMAGE_SIZE_Y); -// conv->set_output_x(outputSize(conv->img_size(), -// conv->filter_size(), -// conv->padding(), -// conv->stride(), -// /* caffeMode */ true)); -// conv->set_output_y(outputSize(conv->img_size_y(), -// conv->filter_size_y(), -// conv->padding_y(), -// conv->stride_y(), -// /* caffeMode */ true)); -// -// if (isDeconv) { -// conv->set_filter_channels(NUM_FILTERS / conv->groups()); -// config.inputDefs.push_back({INPUT_DATA, -// "layer_0", -// conv->output_x() * conv->output_y() * -// CHANNELS, -// 0}); -// config.layerConfig.set_size(IMAGE_SIZE * IMAGE_SIZE_Y * NUM_FILTERS); -// } else { -// conv->set_filter_channels(conv->channels() / conv->groups()); -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE_Y * CHANNELS, 0}); -// config.layerConfig.set_size(conv->output_x() * conv->output_y() * -// NUM_FILTERS); -// } -// -// config.inputDefs.push_back( -// {INPUT_DATA, -// "layer_1", -// FILTER_SIZE * FILTER_SIZE_Y * CHANNELS * NUM_FILTERS, -// 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// testOperatorGrad(config, operatorConf, 100, /*useGpu*/ true, false); -// } -// -// TEST(Operator, conv) { -// testConvOperator(/*isDeconv*/ true); -// testConvOperator(/*isDeconv*/ false); -// } -// -// TEST(Layer, FeatureMapExpandLayer) { -// TestConfig config; -// config.layerConfig.set_type("featmap_expand"); -// const int CHANNELS = 10; -// const int INPUT_SIZE = 100; -// config.layerConfig.set_size(INPUT_SIZE * CHANNELS); -// config.layerConfig.set_num_filters(CHANNELS); -// config.inputDefs.push_back({INPUT_SEQUENCE_DATA, -// "layer_0", -// /* dim= */ INPUT_SIZE, -// /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// for (auto useGpu : {false, true}) { -// for (auto asRowVec : {false, true}) { -// config.layerConfig.set_user_arg(asRowVec ? "as_row_vec" : -// "as_col_vec"); -// testLayerGrad(config, -// "featmap_expand", -// /*batch_size*/ 100, -// /* trans= */ false, -// useGpu, -// /* useWeight */ true); -// } -// } -// } -// -// TEST(Layer, MultiplexLayer) { -// TestConfig config; -// const int LAYER_SIZE = 100; -// config.layerConfig.set_type("multiplex"); -// config.layerConfig.set_size(LAYER_SIZE); -// -// config.inputDefs.push_back({INPUT_LABEL, "layer_0", 2, 0}); -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_1", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_2", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "multiplex", 512, /* trans= */ false, useGpu); -// } -// } -// -// TEST(Layer, PadLayer) { -// TestConfig config; -// config.biasSize = 0; -// config.layerConfig.set_type("pad"); -// -// int c = 4; -// int h = 31; -// int w = 36; -// size_t size = c * h * w; -// config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// PadConfig* pad = input->mutable_pad_conf(); -// ImageConfig* image = pad->mutable_image_conf(); -// -// image->set_channels(c); -// image->set_img_size(h); -// image->set_img_size_y(w); -// pad->add_pad_c(1); -// pad->add_pad_c(2); -// pad->add_pad_h(2); -// pad->add_pad_h(3); -// pad->add_pad_w(3); -// pad->add_pad_w(5); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "pad", 10, false, useGpu); -// } -// } -// -// TEST(Layer, CrossChannelNormLayer) { -// TestConfig config; -// config.paramInitialMean = 1.; -// config.paramInitialStd = 0.; -// config.layerConfig.set_type("norm"); -// config.layerConfig.set_size(100); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// NormConfig* norm = input->mutable_norm_conf(); -// norm->set_norm_type("cross-channel-norm"); -// norm->set_channels(10); -// norm->set_size(100); -// norm->set_scale(0); -// norm->set_pow(0); -// norm->set_blocked(0); -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false); -// } -// } -// -// TEST(Layer, smooth_l1) { -// TestConfig config; -// config.layerConfig.set_type("smooth_l1"); -// -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 200, 0}); -// config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 200, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "smooth_l1", 100, false, useGpu, false); -// } -// } -// -// TEST(Layer, multibox_loss) { -// TestConfig config; -// config.layerConfig.set_type("multibox_loss"); -// config.biasSize = 0; -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// MultiBoxLossConfig* multiboxLoss = input->mutable_multibox_loss_conf(); -// multiboxLoss->set_num_classes(21); -// multiboxLoss->set_input_num(1); -// multiboxLoss->set_overlap_threshold(0.5); -// multiboxLoss->set_neg_pos_ratio(3); -// multiboxLoss->set_neg_overlap(0.5); -// multiboxLoss->set_background_id(0); -// multiboxLoss->set_height(3); -// multiboxLoss->set_width(3); -// -// size_t gtNum = 1; -// MatrixPtr labelValue = Matrix::create(gtNum, 6, false, false); -// labelValue->randomizeUniform(); -// labelValue->add(-0.5); -// labelValue->sigmoid(*labelValue); -// real* labelData = labelValue->getData(); -// size_t labelWidth = labelValue->getWidth(); -// for (size_t i = 0; i < gtNum; ++i) { -// *(labelData + i * labelWidth) = std::rand() % 20 + 1; -// *(labelData + i * labelWidth + 1) = 0.400259; -// *(labelData + i * labelWidth + 2) = 0.377857; -// *(labelData + i * labelWidth + 3) = 0.525712; -// *(labelData + i * labelWidth + 4) = 0.519368; -// } -// vector seqStartPositions(gtNum + 1, 0); -// for (size_t i = 1; i <= gtNum; ++i) { -// seqStartPositions[i] = i; -// } -// -// // Ensure at lease one matched bbox -// MatrixPtr priorValue = Matrix::create(1, 72, false, false); -// priorValue->randomizeUniform(); -// priorValue->add(-0.5); -// priorValue->sigmoid(*priorValue); -// real* priorData = priorValue->getData(); -// *(priorData) = 0.424811; -// *(priorData + 1) = 0.397059; -// *(priorData + 2) = 0.538905; -// *(priorData + 3) = 0.447091; -// *(priorData + 4) = 0.425720; -// *(priorData + 5) = 0.515228; -// *(priorData + 6) = 0.519452; -// *(priorData + 7) = 0.591065; -// -// config.inputDefs.push_back( -// {INPUT_SELF_DEFINE_DATA, "priorbox", priorValue, {}}); -// config.inputDefs.push_back( -// {INPUT_SELF_DEFINE_DATA, "label", labelValue, seqStartPositions}); -// config.inputDefs.push_back({INPUT_DATA, "locPred", 36, 0}); -// config.inputDefs.push_back({INPUT_DATA, "confPred", 189, 0}); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "multibox_loss", 1, false, useGpu, false); -// } -// } -// -// TEST(Layer, TransLayer) { -// TestConfig config; -// const int height = 128; -// const int width = 1028; -// config.layerConfig.set_type("trans"); -// config.layerConfig.set_size(width); -// -// config.inputDefs.push_back( -// {INPUT_DATA, "layer_0", /* dim= */ height * width, /* paraSize= */ 0}); -// config.layerConfig.add_inputs(); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "trans", height, /* trans= */ false, useGpu); -// } -// } -// -// TEST(Layer, RowConvLayer) { -// const int context = 3; -// const int size = 512; -// -// TestConfig config; -// config.layerConfig.set_type("row_conv"); -// config.layerConfig.set_size(size); -// config.layerConfig.set_active_type("sigmoid"); -// -// config.inputDefs.push_back( -// {INPUT_SEQUENCE_DATA, "layer_0", size, context * size}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// RowConvConfig* conv = input->mutable_row_conv_conf(); -// conv->set_context_length(context); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "row_conv", 100, false, useGpu, false); -// } -// } -// -// TEST(Layer, CropLayer) { -// TestConfig config; -// // config input_0 -// config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// ImageConfig* img = input->mutable_image_conf(); -// img->set_channels(4); -// img->set_img_size(16); -// config.layerConfig.set_axis(2); -// config.layerConfig.add_offset(0); -// config.layerConfig.add_offset(0); -// -// // config input_1 -// config.inputDefs.push_back({INPUT_DATA, "layer_1", 128, 0}); -// input = config.layerConfig.add_inputs(); -// img = input->mutable_image_conf(); -// img->set_channels(2); -// img->set_img_size(8); -// -// // config crop layer -// config.layerConfig.set_type("crop"); -// config.layerConfig.set_name("cropLayer"); -// -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "crop", 100, false, useGpu, false); -// } -// } +TEST(Operator, dot_mul) { + TestConfig config; + config.layerConfig.set_size(10); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); + operatorConf.set_type("dot_mul"); + operatorConf.set_dotmul_scale(-1); + + testOperatorGrad(config, operatorConf, 100, false, false); +} + +TEST(Projection, context) { + for (auto contextStart : {-5, -3, -1, 0, 3}) { + for (auto contextLength : {1, 2, 5, 7}) { + for (auto batchSize : {1, 2, 5, 20, 50}) { + for (auto trainablePadding : {false, true}) { + LOG(INFO) << " contextStart=" << contextStart + << " contextLength=" << contextLength + << " batchSize=" << batchSize + << " trainablePadding=" << trainablePadding; + ProjectionConfig conf; + conf.set_type("context"); + conf.set_input_size(10); + conf.set_context_start(contextStart); + conf.set_context_length(contextLength); + conf.set_trainable_padding(trainablePadding); + conf.set_output_size(conf.context_length() * conf.input_size()); + int pad = + std::max(0, -conf.context_start()) + + std::max(0, conf.context_start() + conf.context_length() - 1); + for (auto useGpu : {false, true}) { + testProjectionGrad( + conf, + INPUT_SEQUENCE_DATA, + trainablePadding ? conf.input_size() * pad : 0, + batchSize, + useGpu, + contextStart + contextLength <= 1); // = testState + } + } + } + } + } +} + +TEST(Projection, trans_fc) { + ProjectionConfig conf; + conf.set_type("trans_fc"); + conf.set_input_size(50); + conf.set_output_size(20); + for (auto useGpu : {false, true}) { + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ 1000, + /* batchSize */ 100, + useGpu); + } +} + +TEST(Projection, fc) { + ProjectionConfig conf; + conf.set_type("fc"); + conf.set_input_size(10); + conf.set_output_size(20); + for (auto useGpu : {false, true}) { + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ 200, + /* batchSize */ 100, + useGpu); + } +} + +TEST(Projection, dot_mul) { + ProjectionConfig conf; + conf.set_type("dot_mul"); + conf.set_input_size(20); + conf.set_output_size(20); + for (auto useGpu : {false, true}) { + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ 20, + /* batchSize */ 100, + useGpu); + } +} + +TEST(Projection, table) { + ProjectionConfig conf; + conf.set_type("table"); + conf.set_input_size(10); + conf.set_output_size(20); + for (auto useGpu : {false, true}) { + testProjectionGrad(conf, + INPUT_LABEL, + /* parameterSize */ 200, + /* batchSize */ 100, + useGpu); + } +} + +TEST(Projection, identity) { + ProjectionConfig conf; + conf.set_type("identity"); + conf.set_input_size(10); + conf.set_output_size(10); + for (auto useGpu : {false, true}) { + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ 0, + /* batchSize */ 100, + useGpu); + } +} + +TEST(Projection, slice) { + ProjectionConfig conf; + conf.set_type("slice"); + conf.set_input_size(100); + SliceConfig& slice1 = *conf.add_slices(); + slice1.set_start(10); + slice1.set_end(20); + SliceConfig& slice2 = *conf.add_slices(); + slice2.set_start(50); + slice2.set_end(70); + conf.set_output_size(30); + for (auto useGpu : {false, true}) { + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ 0, + /* batchSize */ 10, + useGpu); + } +} + +TEST(Projection, scaling) { + ProjectionConfig conf; + conf.set_type("scaling"); + conf.set_input_size(10); + conf.set_output_size(10); + for (auto useGpu : {false}) { + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ 1, + /* batchSize */ 100, + useGpu); + } +} + +void testProjectionConv(size_t groups, bool isDeconv) { + const int NUM_FILTERS = 18; + const int FILTER_SIZE = 2; + const int FILTER_SIZE_Y = 4; + const int CHANNELS = 3; + const int IMAGE_SIZE = 16; + + ProjectionConfig conf; + if (isDeconv) { + conf.set_type("convt"); + } else { + conf.set_type("conv"); + } + conf.set_num_filters(NUM_FILTERS); + + ConvConfig* conv = conf.mutable_conv_conf(); + conv->set_filter_size(FILTER_SIZE); + conv->set_filter_size_y(FILTER_SIZE_Y); + conv->set_channels(CHANNELS); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(groups); + if (isDeconv) { + conv->set_filter_channels(NUM_FILTERS / conv->groups()); + } else { + conv->set_filter_channels(conv->channels() / conv->groups()); + } + conv->set_img_size(IMAGE_SIZE); + int output_x = outputSize(conv->img_size(), + conv->filter_size(), + conv->padding(), + conv->stride(), + /* caffeMode */ true); + int output_y = outputSize(conv->img_size(), + conv->filter_size_y(), + conv->padding_y(), + conv->stride_y(), + /* caffeMode */ true); + conv->set_output_x(output_x); + conv->set_output_y(output_y); + if (isDeconv) { + conf.set_input_size(output_x * output_y * CHANNELS); + conf.set_output_size(IMAGE_SIZE * IMAGE_SIZE * NUM_FILTERS); + } else { + conf.set_input_size(IMAGE_SIZE * IMAGE_SIZE * CHANNELS); + conf.set_output_size(output_x * output_y * NUM_FILTERS); + } + + testProjectionGrad(conf, + INPUT_DATA, + /* parameterSize */ NUM_FILTERS * CHANNELS * FILTER_SIZE * + FILTER_SIZE_Y / groups, + /* batchSize */ 100, + true, + false, + NUM_FILTERS, + true); +} + +#ifndef PADDLE_ONLY_CPU +TEST(Projection, conv) { + /// test ConvProjection + testProjectionConv(1, false); + testProjectionConv(3, false); + /// test ConvTransProjection + testProjectionConv(1, true); + testProjectionConv(3, true); +} +#endif + +TEST(Layer, BilinearInterpLayer) { + TestConfig config; + config.layerConfig.set_type("bilinear_interp"); + config.biasSize = 0; + config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); + + LayerInputConfig* input = config.layerConfig.add_inputs(); + BilinearInterpConfig* bilinear = input->mutable_bilinear_interp_conf(); + ImageConfig* image = bilinear->mutable_image_conf(); + image->set_img_size(32); + image->set_img_size_y(32); + image->set_channels(4); + + for (auto useGpu : {false, true}) { + for (auto outSize : {32, 64}) { + bilinear->set_out_size_x(outSize); + bilinear->set_out_size_y(outSize); + testLayerGrad(config, "bilinear_interp", 10, false, useGpu); + } + } +} + +TEST(Layer, concat) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("concat"); + config.layerConfig.set_size(15); + config.layerConfig.set_active_type("sigmoid"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "concat", 100, false, useGpu); + } +} + +TEST(Layer, AddtoLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("addto"); + config.layerConfig.set_size(10); + config.layerConfig.set_active_type("sigmoid"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "addto", 100, false, useGpu); + } +} + +TEST(Layer, CTCLayer) { + TestConfig config; + config.layerConfig.set_type("ctc"); + config.layerConfig.set_norm_by_times(false); + config.layerConfig.set_size(10); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); + config.inputDefs.push_back({INPUT_SEQUENCE_LABEL, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "ctc", + 100, + /* trans */ false, /* useGpu */ + useGpu); + } +} + +TEST(Layer, cosSimLayer) { + TestConfig config; + config.layerConfig.set_type("cos"); + config.layerConfig.set_size(1); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 50, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "cos", 100, false, useGpu); + } +} + +TEST(Layer, CosSimVecMatLayer) { + TestConfig config; + config.layerConfig.set_type("cos_vm"); + config.layerConfig.set_size(5); // output size + config.layerConfig.set_cos_scale(2.0); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "cos_vm", 100, false, useGpu); + } +} + +void testDepthwiseConvLayer(const string& type, bool useGpu) { + TestConfig config; + config.biasSize = 32; + config.layerConfig.set_type(type); + config.layerConfig.set_num_filters(32); + config.layerConfig.set_partial_sum(1); + config.layerConfig.set_shared_biases(true); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 2048, 192}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + conv->set_filter_size(2); + conv->set_filter_size_y(3); + conv->set_channels(16); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(16); + conv->set_filter_channels(conv->channels() / conv->groups()); + conv->set_img_size(16); + conv->set_img_size_y(8); + conv->set_output_x(outputSize(conv->img_size(), + conv->filter_size(), + conv->padding(), + conv->stride(), + /* caffeMode */ true)); + conv->set_output_y(outputSize(conv->img_size_y(), + conv->filter_size_y(), + conv->padding_y(), + conv->stride_y(), + /* caffeMode */ true)); + config.layerConfig.set_size(conv->output_x() * conv->output_y() * + config.layerConfig.num_filters()); + + testLayerGrad(config, "depthwise_conv", 100, false, useGpu); + // Use small batch_size and useWeight=true to test biasGrad + testLayerGrad(config, "depthwise_conv", 2, false, useGpu, true, 0.02); +} + +TEST(Layer, depthwiseConvLayer) { + // 'depthwise_conv' is a sepecial case of 'exconv' whose + // groups size equals to the input channels size. + testDepthwiseConvLayer("exconv", /* useGpu= */ false); +#ifndef PADDLE_ONLY_CPU + testDepthwiseConvLayer("exconv", /* useGpu= */ true); +#endif +} + +void testConvLayer(const string& type, bool trans, bool useGpu) { + TestConfig config; + config.biasSize = 16; + config.layerConfig.set_type(type); + config.layerConfig.set_num_filters(16); + config.layerConfig.set_partial_sum(1); + config.layerConfig.set_shared_biases(true); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 384, 288}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + conv->set_filter_size(2); + conv->set_filter_size_y(3); + conv->set_channels(3); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(1); + conv->set_filter_channels(conv->channels() / conv->groups()); + conv->set_img_size(16); + conv->set_img_size_y(8); + conv->set_output_x(outputSize(conv->img_size(), + conv->filter_size(), + conv->padding(), + conv->stride(), + /* caffeMode */ true)); + conv->set_output_y(outputSize(conv->img_size_y(), + conv->filter_size_y(), + conv->padding_y(), + conv->stride_y(), + /* caffeMode */ true)); + config.layerConfig.set_size(conv->output_x() * conv->output_y() * + config.layerConfig.num_filters()); + + testLayerGrad(config, "conv", 100, trans, useGpu); + // Use small batch_size and useWeight=true to test biasGrad + testLayerGrad(config, "conv", 2, trans, useGpu, true, 0.02); +} + +TEST(Layer, convLayer) { + testConvLayer("exconv", /* trans= */ false, /* useGpu= */ false); +#ifndef PADDLE_ONLY_CPU + testConvLayer("exconv", /* trans= */ false, /* useGpu= */ true); + testConvLayer("cudnn_conv", /* trans= */ false, /* useGpu= */ true); +#endif +} + +void testConvTransLayer(const string& type, bool trans, bool useGpu) { + TestConfig config; + config.biasSize = 3; + config.layerConfig.set_type(type); + config.layerConfig.set_num_filters(3); + config.layerConfig.set_partial_sum(1); + config.layerConfig.set_shared_biases(true); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 384}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ConvConfig* conv = input->mutable_conv_conf(); + conv->set_filter_size(2); + conv->set_filter_size_y(4); + conv->set_channels(16); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(1); + conv->set_filter_channels(3 / conv->groups()); + conv->set_img_size(16); + conv->set_output_x(outputSize(conv->img_size(), + conv->filter_size(), + conv->padding(), + conv->stride(), + /* caffeMode */ true)); + + config.layerConfig.set_size(conv->img_size() * conv->img_size() * + config.layerConfig.num_filters()); + + testLayerGrad(config, "convTrans", 100, trans, useGpu); + // Use small batch_size and useWeight=true to test biasGrad + testLayerGrad(config, "convTrans", 2, trans, useGpu, true, 0.02); +} + +TEST(Layer, convTransLayer) { + for (auto useGpu : {false, true}) { + testConvTransLayer("exconvt", /* trans= */ false, /* useGpu= */ useGpu); + } +#ifndef PADDLE_ONLY_CPU + testConvTransLayer("cudnn_convt", /* trans= */ false, /* useGpu= */ true); +#endif +} + +TEST(Layer, blockExpandLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("blockexpand"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 6144, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + BlockExpandConfig* blockExpand = input->mutable_block_expand_conf(); + blockExpand->set_img_size_x(64); + blockExpand->set_img_size_y(32); + blockExpand->set_channels(3); + blockExpand->set_padding_x(0); + blockExpand->set_padding_y(0); + blockExpand->set_block_x(4); + blockExpand->set_block_y(32); + blockExpand->set_stride_x(2); + blockExpand->set_stride_y(2); + blockExpand->set_output_x(outputSize(blockExpand->img_size_x(), + blockExpand->block_x(), + blockExpand->padding_x(), + blockExpand->stride_x(), + /* caffeMode */ false)); + blockExpand->set_output_y(outputSize(blockExpand->img_size_y(), + blockExpand->block_y(), + blockExpand->padding_y(), + blockExpand->stride_y(), + /* caffeMode */ false)); + config.layerConfig.set_size(blockExpand->block_x() * blockExpand->block_y() * + blockExpand->channels()); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "blockexpand", 100, false, useGpu); + } +} + +TEST(Layer, maxoutLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("maxout"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 4096, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + MaxOutConfig* maxout = input->mutable_maxout_conf(); + ImageConfig* image = maxout->mutable_image_conf(); + + image->set_img_size(32); + image->set_img_size_y(32); + image->set_channels(4); + maxout->set_groups(2); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "maxout", 10, false, useGpu); + } +} +void testFcLayer(string format, size_t nnz) { + TestConfig config; + config.biasSize = 4096; + config.layerConfig.set_type("fc"); + config.layerConfig.set_size(4096); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_drop_rate(0.1); + + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", 8192, nnz, ParaSparse(format)}); + config.layerConfig.add_inputs(); + + LOG(INFO) << config.inputDefs[0].sparse.sparse << " " + << config.inputDefs[0].sparse.format; + + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "fc", + 100, + /* trans */ false, + useGpu, + /* weight */ true); + } +} + +TEST(Layer, fcLayer) { + testFcLayer("", 4096 * 4096 * 2); + testFcLayer("csc", 4096 * 40); + testFcLayer("csr", 4096 * 40); +} + +TEST(Layer, SelectiveFullyConnectedLayer) { + TestConfig config; + size_t nin = 16; + size_t nout = 256; + config.layerConfig.set_type("selective_fc"); + config.layerConfig.set_size(nout); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_has_selected_colums(true); + config.layerConfig.set_selective_fc_pass_generation(false); + config.biasSize = nout; + + config.inputDefs.push_back({INPUT_DATA, "input0", nin, nin * nout}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back( + {INPUT_SPARSE_NON_VALUE_DATA, "index", nout, 0, ParaSparse("csr", true)}); + config.layerConfig.add_inputs(); + + testLayerGrad(config, + "selective_fc", + 100, + /* trans= */ false, + /* useGup= */ false, + false); +#ifndef PADDLE_ONLY_CPU + testLayerGrad(config, + "selective_fc", + 100, + /* trans= */ false, + /* useGup= */ true, + false); +#endif +} + +TEST(Layer, DataNormLayer) { + TestConfig config; + config.layerConfig.set_type("data_norm"); + config.layerConfig.set_size(20); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 20, 100}); + config.inputDefs.back().isStatic = true; + config.layerConfig.add_inputs(); + + for (auto strategy : {"z-score", "min-max", "decimal-scaling"}) { + config.layerConfig.set_data_norm_strategy(strategy); + // The parameters are static, so not support GPU now + testLayerGrad(config, + "data_norm", + 200, + /* trans */ false, + /* useGpu */ false); + } +} + +TEST(Layer, hsigmoidLayer) { + TestConfig config; + config.layerConfig.set_type("hsigmoid"); + config.layerConfig.set_num_classes(5); + config.layerConfig.set_size(1); + config.biasSize = config.layerConfig.num_classes() - 1; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 200}); + config.inputDefs.push_back({INPUT_LABEL, "layer_1", 5, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + // Not support GPU now + testLayerGrad(config, + "hsigmoid", + 100, + /* trans */ false, /* useGpu */ + false); +} + +TEST(Layer, multi_cross) { + TestConfig config; + config.layerConfig.set_type("multi-class-cross-entropy"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad( + config, "multi-class-cross-entropy", 100, /* trans */ false, useGpu); + } +} + +TEST(Layer, multi_binary_label_sparse_mat) { + TestConfig config; + config.layerConfig.set_type("multi_binary_label_cross_entropy"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "multi_binary_label_cross_entropy", + 100, + /* trans */ false, + useGpu); + } +} + +TEST(layer, multi_binary_label_id) { + TestConfig config; + config.layerConfig.set_type("multi_binary_label_cross_entropy"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "multi_binary_label_cross_entropy", + 100, + /* trans */ false, + useGpu); + } +} + +TEST(Layer, multi_cross_with_selfnorm) { + TestConfig config; + config.layerConfig.set_type("multi_class_cross_entropy_with_selfnorm"); + config.layerConfig.set_softmax_selfnorm_alpha(0.1); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_LABEL, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + // Not support GPU now + testLayerGrad(config, + "multi_class_cross_entropy_with_selfnorm", + 100, + /* trans */ false, + /* useGpu */ false); +} + +TEST(Layer, multi_cross_soft) { + TestConfig config; + config.layerConfig.set_type("soft_binary_class_cross_entropy"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, + "soft_binary_class_cross_entropy", + 100, + /* trans */ false, + useGpu); + } +} + +TEST(Layer, square_error) { + TestConfig config; + config.layerConfig.set_type("square_error"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); + } +} + +TEST(Layer, sparse_square_error) { + TestConfig config; + config.layerConfig.set_type("square_error"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_SPARSE_NON_VALUE_DATA, "layer_1", 50, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + // "GpuSparseMatrix" as label is not supported + testLayerGrad(config, + "square_error", + 100, + /* trans */ false, + /* useGpu */ false); +} + +TEST(Layer, sparse_float_square_error) { + TestConfig config; + config.layerConfig.set_type("square_error"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 50, 0}); + config.inputDefs.push_back({INPUT_SPARSE_FLOAT_VALUE_DATA, "layer_1", 50, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + // "GpuSparseMatrix" as label is not supported + testLayerGrad(config, + "square_error", + 100, + /* trans */ false, + /* useGpu */ false); +} + +TEST(Layer, square_error_weighted) { + TestConfig config; + config.layerConfig.set_type("square_error"); + config.biasSize = 0; + config.testAccumulate = false; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 10, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "square_error", 100, /* trans */ false, useGpu); + } +} + +TEST(Layer, huber_two_class) { + TestConfig config; + config.layerConfig.set_type("huber"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.inputDefs.push_back({INPUT_LABEL, "layer_1", 2, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "huber", 100, /* trans */ false, useGpu); + } +} + +void testExpandLayer(string trans_type, bool hasSubseq) { + TestConfig config; + config.layerConfig.set_type("expand"); + + config.inputDefs.push_back( + {trans_type == "non-seq" ? INPUT_DENSE_DIM_DATA : INPUT_SEQUENCE_DATA, + "layer_0", + 10, + 0}); + config.inputDefs.push_back( + {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, + "layer_1", + 10, + 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.set_trans_type(trans_type); + LOG(INFO) << " trans_type=" << trans_type << " hasSubseq=" << hasSubseq; + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "expand", 30, false, useGpu); + } +} + +TEST(Layer, ExpandLayer) { + testExpandLayer("non-seq", false); // non-seq expand to seq + testExpandLayer("non-seq", true); // non-seq expand to hasSubseq + testExpandLayer("seq", true); // seq expand to hasSubseq +} + +void testDegradeLayer(bool hasSubseq, + string layer_type, + string trans_type, + int stride) { + TestConfig config; + config.layerConfig.set_type(layer_type); + config.layerConfig.set_size(10); + config.layerConfig.set_seq_pool_stride(stride); + config.biasSize = 0; + + config.inputDefs.push_back( + {hasSubseq ? INPUT_HASSUB_SEQUENCE_DATA : INPUT_SEQUENCE_DATA, + "layer_0", + 10, + 0}); + config.layerConfig.add_inputs(); + config.layerConfig.set_trans_type(trans_type); + + auto testDegradeLayerGrad = [](TestConfig& config, string layer_type) { + for (auto useGpu : {false, true}) { + testLayerGrad(config, layer_type, 100, false, useGpu); + } + }; + + if (layer_type == "average") { + for (auto strategy : {"average", "sum", "squarerootn"}) { + LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type + << " average_strategy=" << strategy + << " seq_pool_stride=" << stride; + config.layerConfig.set_average_strategy(strategy); + testDegradeLayerGrad(config, layer_type); + } + } else { + LOG(INFO) << " hasSubseq=" << hasSubseq << " trans_type=" << trans_type + << " seq_pool_stride=" << stride; + testDegradeLayerGrad(config, layer_type); + } +} + +TEST(Layer, MaxLayer) { + testDegradeLayer(false, "max", "non-seq", -1); // seq max to non-seq + testDegradeLayer(false, + "max", + "non-seq", + 5); // seq max to a shorten seq, stride window = 5 + testDegradeLayer(true, "max", "non-seq", -1); // hasSubseq max to non-seq + testDegradeLayer(true, "max", "seq", -1); // hasSubseq max to seq +} + +TEST(Layer, SequenceLastInstanceLayer) { + testDegradeLayer(false, + "seqlastins", + "non-seq", + -1); // seq seqlastins to non-seq + testDegradeLayer(false, + "seqlastins", + "non-seq", + 5); // seq seqlastins to a shorten seq, stride window = 5 + testDegradeLayer(true, + "seqlastins", + "non-seq", + -1); // hasSubseq seqlastins to non-seq + testDegradeLayer( + true, "seqlastins", "seq", -1); // hasSubseq seqlastins to seq +} + +TEST(Layer, AverageLayer) { + testDegradeLayer(false, "average", "non-seq", -1); // seq average to non-seq + testDegradeLayer(false, + "average", + "non-seq", + 5); // seq average to a shorten seq, stride window = 5 + testDegradeLayer( + true, "average", "non-seq", -1); // hasSubseq average to non-seq + testDegradeLayer(true, "average", "seq", -1); // hasSubseq average to seq +} + +TEST(Layer, SequenceConcatLayer) { + TestConfig config; + config.layerConfig.set_type("seqconcat"); + config.layerConfig.set_size(10); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 10, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "seqconcat", 100, false, useGpu); + } +} + +TEST(Layer, SequenceReshapeLayer) { + TestConfig config; + config.layerConfig.set_type("seqreshape"); + config.layerConfig.set_size(10); + + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, "layer_0", 100, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "seqreshape", 100, false, useGpu); + } +} + +TEST(Layer, ConvShiftLayer) { + TestConfig config; + config.layerConfig.set_type("conv_shift"); + config.layerConfig.set_size(10); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 3, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + // Not support GPU now + testLayerGrad(config, "conv_shift", 100, false, false); +} + +TEST(Layer, PowerLayer) { + TestConfig config; + config.layerConfig.set_type("power"); + config.layerConfig.set_size(10); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "power", 100, false, useGpu); + } +} + +TEST(Layer, ConvexCombinationLayer) { + TestConfig config; + config.layerConfig.set_type("convex_comb"); + config.layerConfig.set_size(20); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 100, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "convex_comb", 100, false, useGpu); + } +} + +TEST(Layer, InterpolationLayer) { + TestConfig config; + config.layerConfig.set_type("interpolation"); + config.layerConfig.set_size(10); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_2", 10, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "interpolation", 100, false, useGpu); + } +} + +TEST(Layer, OuterProdLayer) { + TestConfig config; + config.layerConfig.set_type("out_prod"); + config.layerConfig.set_size(100); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "out_prod", 100, false, useGpu); + } +} + +TEST(Layer, SlopeInterceptLayer) { + TestConfig config; + config.layerConfig.set_type("slope_intercept"); + config.layerConfig.set_size(10); + config.layerConfig.set_slope(1.0); + config.layerConfig.set_intercept(0.1); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "slope_intercept", 100, false, useGpu); + } +} + +TEST(Layer, ScalingLayer) { + TestConfig config; + config.layerConfig.set_type("scaling"); + config.layerConfig.set_size(10); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.layerConfig.add_inputs(); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 10, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "scaling", 100, false, useGpu); + } +} + +void testNormLayer(const string& normType, bool trans, bool useGpu) { + TestConfig config; + config.layerConfig.set_type("norm"); + config.layerConfig.set_active_type("relu"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1568, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + NormConfig* norm = input->mutable_norm_conf(); + norm->set_norm_type(normType); + norm->set_channels(16); + norm->set_size(5); + norm->set_scale(0.001); + norm->set_pow(0.75); + norm->set_blocked(0); + norm->set_img_size(14); + norm->set_img_size_y(7); + norm->set_output_x(norm->img_size()); + norm->set_output_y(norm->img_size_y()); + if (norm->norm_type() == "cmrnorm" || + norm->norm_type() == "cmrnorm-projection") { + norm->set_scale(norm->scale() / norm->size()); + } else { + norm->set_scale(norm->scale() / (norm->size() * norm->size())); + } + + config.layerConfig.set_size(norm->output_x() * norm->output_y() * + norm->channels()); + config.biasSize = 0; + + testLayerGrad(config, "norm", 100, trans, useGpu); +} + +TEST(Layer, NormLayer) { + testNormLayer("cmrnorm-projection", + /* trans= */ false, /* useGpu= */ + true); + testNormLayer("cmrnorm-projection", + /* trans= */ false, /* useGpu= */ + false); +} + +void setPoolConfig(TestConfig* config, + PoolConfig* pool, + const string& poolType) { + (*config).biasSize = 0; + (*config).layerConfig.set_type("pool"); + (*config).layerConfig.set_num_filters(16); + + int kw = 3, kh = 3; + int pw = 0, ph = 0; + int sw = 2, sh = 2; + pool->set_pool_type(poolType); + pool->set_channels(16); + pool->set_size_x(kw); + pool->set_size_y(kh); + pool->set_start(0); + pool->set_padding(pw); + pool->set_padding_y(ph); + pool->set_stride(sw); + pool->set_stride_y(sh); + + int ow = outputSize(pool->img_size(), kw, pw, sw, /* caffeMode */ false); + int oh = outputSize(pool->img_size_y(), kh, ph, sh, /* caffeMode */ false); + pool->set_output_x(ow); + pool->set_output_y(oh); +} + +void testPoolLayer(const string& poolType, bool trans, bool useGpu) { + TestConfig config; + config.inputDefs.push_back({INPUT_DATA, "layer_0", 3136, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + PoolConfig* pool = input->mutable_pool_conf(); + + pool->set_img_size(14); + pool->set_img_size_y(14); + setPoolConfig(&config, pool, poolType); + config.layerConfig.set_size(pool->output_x() * pool->output_y() * + pool->channels()); + + testLayerGrad(config, "pool", 100, trans, useGpu); +} + +#ifndef PADDLE_ONLY_CPU +void testPoolLayer2(const string& poolType, bool trans, bool useGpu) { + TestConfig config; + config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + PoolConfig* pool = input->mutable_pool_conf(); + + pool->set_size_y(4); + pool->set_stride_y(3); + pool->set_img_size(10); + pool->set_img_size_y(20); + setPoolConfig(&config, pool, poolType); + pool->set_output_y((pool->img_size_y() - pool->start() - pool->size_y()) / + ((float)pool->stride_y()) + + 1.5); + config.layerConfig.set_size(pool->output_x() * pool->output_y() * + pool->channels()); + + testLayerGrad(config, "pool", 100, trans, useGpu); +} +#endif + +TEST(Layer, PoolLayer) { + testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ false); + testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ false); + +#ifndef PADDLE_ONLY_CPU + testPoolLayer("avg-projection", /* trans= */ false, /* useGpu= */ true); + testPoolLayer("max-projection", /* trans= */ false, /* useGpu= */ true); + testPoolLayer("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); + testPoolLayer("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); + testPoolLayer2("cudnn-max-pool", /* trans= */ false, /* useGpu= */ true); + testPoolLayer2("cudnn-avg-pool", /* trans= */ false, /* useGpu= */ true); +#endif +} + +void testSppLayer(const string& poolType, + const int pyramidHeight, + bool trans, + bool useGpu) { + TestConfig config; + config.layerConfig.set_type("spp"); + config.inputDefs.push_back({INPUT_DATA, "layer_0", 3200, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + SppConfig* sppConfig = input->mutable_spp_conf(); + sppConfig->set_pool_type(poolType); + sppConfig->set_pyramid_height(pyramidHeight); + ImageConfig* imageConfig = sppConfig->mutable_image_conf(); + imageConfig->set_channels(16); + imageConfig->set_img_size(10); + imageConfig->set_img_size_y(20); + int outputSize = (std::pow(4, sppConfig->pyramid_height()) - 1) / (4 - 1); + config.layerConfig.set_size(outputSize * imageConfig->channels()); + testLayerGrad(config, "spp", 100, trans, useGpu); +} + +TEST(Layer, SpatialPyramidPoolLayer) { + for (auto useGpu : {false, true}) { + for (auto pyramidHeight : {1, 2, 3}) { + testSppLayer("avg-projection", pyramidHeight, false, useGpu); + testSppLayer("max-projection", pyramidHeight, false, useGpu); + } + } +} + +TEST(Layer, rankCostLayer) { + TestConfig config; + config.layerConfig.set_type("rank-cost"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "rank-cost", 100, false, useGpu); + } +} + +TEST(Layer, sumCostLayer) { + TestConfig config; + config.layerConfig.set_type("sum_cost"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "sum_cost", 100, false, useGpu); + } +} + +TEST(Layer, weightedRankCostLayer) { + TestConfig config; + config.layerConfig.set_type("rank-cost"); + config.biasSize = 0; + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1, 0}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 1, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_2", 1, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_3", 1, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "weighted-rank-cost", 100, false, useGpu); + } +} + +TEST(Layer, TensorLayer) { + TestConfig config; + config.layerConfig.set_type("tensor"); + config.layerConfig.set_size(10); + config.layerConfig.set_active_type("sigmoid"); + config.biasSize = config.layerConfig.size(); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 5, 250}); + config.inputDefs.push_back({INPUT_DATA, "layer_1", 5, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "tensor", 100, false, useGpu); + } +} + +TEST(Layer, RecurrentLayer) { + TestConfig config; + config.layerConfig.set_type("recurrent"); + config.layerConfig.set_size(4); + config.layerConfig.set_active_type("tanh"); + config.biasSize = 4; + + config.inputDefs.push_back( + {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 4, /* paraSize= */ 16}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + for (auto reversed : {false, true}) { + config.layerConfig.set_reversed(reversed); + config.testState = !reversed; + testLayerGrad(config, "recurrent", 50, /* trans= */ false, useGpu); + } + } +} + +TEST(Layer, LstmLayer) { + TestConfig config; + config.layerConfig.set_type("lstmemory"); + config.layerConfig.set_size(4); + config.layerConfig.set_active_type("tanh"); + config.layerConfig.set_active_state_type("sigmoid"); + config.layerConfig.set_active_gate_type("sigmoid"); + config.biasSize = 28; + + config.inputDefs.push_back( + {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 64}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + for (auto reversed : {false, true}) { + config.layerConfig.set_reversed(reversed); + config.testState = !reversed; + testLayerGrad(config, "lstmemory", 100, /* trans= */ false, useGpu); + } + } + for (auto useGpu : {true}) { + config.testBatchState = true; + config.layerConfig.set_reversed(false); + testLayerGrad(config, "lstmemory", 10, /* trans= */ false, useGpu); + } +} + +TEST(Layer, MDLstmLayer) { + TestConfig config; + config.layerConfig.set_type("mdlstmemory"); + config.layerConfig.set_size(4); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_active_state_type("sigmoid"); + config.layerConfig.set_active_gate_type("sigmoid"); + config.biasSize = 4 * 9; + + config.inputDefs.push_back( + {INPUT_SEQUENCE_MDIM_DATA, "layer_0", 4 * 5, 4 * 4 * 5}); + config.layerConfig.add_inputs(); + config.layerConfig.add_directions(true); + config.layerConfig.add_directions(true); + + for (auto useGpu : {false, true}) { + for (int i = 0; i < 2; i++) { + for (int j = 0; j < 2; j++) { + config.layerConfig.set_directions(0, bool(i)); + config.layerConfig.set_directions(1, bool(j)); + testLayerGrad(config, "mdlstmemory", 100, false, useGpu); + } + } + } +} + +TEST(Layer, ParameterReluLayer) { + auto testParameterReluLayer = [&](size_t inputSize, size_t channels) { + TestConfig config; + config.layerConfig.set_type("prelu"); + config.inputDefs.push_back({INPUT_DATA, "layer_0", inputSize, channels}); + config.layerConfig.add_inputs(); + config.layerConfig.set_size(inputSize); + config.layerConfig.set_partial_sum(inputSize / + channels); // size of feature map + for (auto useGpu : {false, true}) { + testLayerGrad(config, "prelu", 100, false, useGpu); + } + }; + + testParameterReluLayer(192, 1); + testParameterReluLayer(192, 3); + testParameterReluLayer(192, 192); +} + +TEST(Layer, ResizeLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("resize"); + config.layerConfig.set_size(64); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 16, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "resize", 100, false, useGpu); + } +} + +TEST(Layer, RotateLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("rotate"); + const int CHANNEL = 2; + const int HEIGHT = 8; + const int WIDTH = 4; + const int INPUT_SIZE = HEIGHT * WIDTH * CHANNEL; + config.layerConfig.set_size(INPUT_SIZE); + config.layerConfig.set_height(HEIGHT); + config.layerConfig.set_width(WIDTH); + config.inputDefs.push_back({INPUT_DATA, "layer_0", INPUT_SIZE, 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "rotate", 100, false, useGpu); + } +} + +TEST(Layer, NCELayer) { + TestConfig config; + size_t numClasses = 4; + config.layerConfig.set_type("nce"); + config.layerConfig.set_size(1); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_num_classes(numClasses); + config.biasSize = numClasses; + + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 16 * numClasses}); + config.inputDefs.push_back( + {INPUT_LABEL, "label", /* dim= */ numClasses, /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto withWeight : {false, true}) { + if (withWeight) { + config.inputDefs.push_back( + {INPUT_DATA_TARGET, "weight", /* dim= */ 1, /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + } + + for (auto isIdLabel : {false, true}) { + config.inputDefs[1] = { + isIdLabel ? INPUT_LABEL : INPUT_SPARSE_NON_VALUE_DATA, + "label", + /* dim= */ numClasses, + /* paraSize= */ 0}; + + for (auto withDist : {false, true}) { + config.layerConfig.clear_neg_sampling_dist(); + if (withDist) { + double sum = 0; + for (size_t i = 0; i < numClasses; ++i) { + real p = rand(); // NOLINT use rand_r + config.layerConfig.add_neg_sampling_dist(p); + sum += p; + } + for (size_t i = 0; i < numClasses; ++i) { + real p = config.layerConfig.neg_sampling_dist(i) / sum; + config.layerConfig.set_neg_sampling_dist(i, p); + } + } + LOG(INFO) << "NCELayer " + << " isIdLabel=" << isIdLabel << " withWeight=" << withWeight + << " withDist=" << withDist; + // Not support GPU now + testLayerGrad(config, + "nce", + 100, + /* trans= */ false, + /* useGpu */ false); + } + } + } +} + +TEST(Layer, GatedRecurrentLayer) { + TestConfig config; + config.layerConfig.set_type("gated_recurrent"); + config.layerConfig.set_size(4); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_active_gate_type("sigmoid"); + config.biasSize = 12; + + config.inputDefs.push_back( + {INPUT_SEQUENCE_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + for (auto reversed : {false, true}) { + config.layerConfig.set_reversed(reversed); + config.testState = !reversed; + testLayerGrad(config, "gated_recurrent", 100, /* trans= */ false, useGpu); + } + } +} + +TEST(Layer, GruStepLayer) { + TestConfig config; + config.layerConfig.set_type("gru_step"); + config.layerConfig.set_size(4); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_active_gate_type("sigmoid"); + config.biasSize = 12; + + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", /* dim= */ 12, /* paraSize= */ 48}); + config.inputDefs.push_back( + {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "gruStep", 100, /* trans= */ false, useGpu); + } +} + +TEST(Layer, LstmStepLayer) { + TestConfig config; + config.layerConfig.set_type("lstm_step"); + config.layerConfig.set_size(4); + config.layerConfig.set_active_type("sigmoid"); + config.layerConfig.set_active_state_type("sigmoid"); + config.layerConfig.set_active_gate_type("sigmoid"); + config.biasSize = 12; + config.testAccumulate = false; + + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", /* dim= */ 16, /* paraSize= */ 0}); + config.inputDefs.push_back( + {INPUT_DATA, "layer_1", /* dim= */ 4, /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "lstmStep", 100, /* trans= */ false, useGpu); + } +} + +void testBatchNormLayer(const string& type, bool trans, bool useGpu) { + TestConfig config; + const int CHANNELS = 10; + const int IMG_SIZE = 16; + const int IMG_SIZE_Y = 8; + size_t size = CHANNELS * IMG_SIZE * IMG_SIZE_Y; + config.layerConfig.set_type(type); + config.layerConfig.set_size(size); + config.layerConfig.set_active_type("sigmoid"); + config.biasSize = CHANNELS; + config.inputDefs.push_back({INPUT_DATA, + "layer_0", + /* dim= */ size, + /* paraSize= */ CHANNELS}); + + config.inputDefs.push_back({INPUT_DATA, "layer_1_running_mean", 1, CHANNELS}); + config.inputDefs.back().isStatic = true; + config.inputDefs.push_back({INPUT_DATA, "layer_2_running_var", 1, CHANNELS}); + config.inputDefs.back().isStatic = true; + + LayerInputConfig* input = config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + ImageConfig* img_conf = input->mutable_image_conf(); + img_conf->set_channels(CHANNELS); + img_conf->set_img_size(IMG_SIZE); + img_conf->set_img_size_y(IMG_SIZE_Y); + + testLayerGrad(config, + "batch_norm", + 64, + /* trans= */ trans, + useGpu, + /* useWeight */ true); +} + +TEST(Layer, BatchNormalizationLayer) { + testBatchNormLayer("batch_norm", false, false); +#ifndef PADDLE_ONLY_CPU + testBatchNormLayer("batch_norm", false, true); + if (hl_get_cudnn_lib_version() >= int(4000)) { + testBatchNormLayer("cudnn_batch_norm", false, true); + } +#endif +} + +void testConvOperator(bool isDeconv) { + TestConfig config; + const int NUM_FILTERS = 16; + const int FILTER_SIZE = 2; + const int FILTER_SIZE_Y = 3; + const int CHANNELS = 3; + const int IMAGE_SIZE = 16; + const int IMAGE_SIZE_Y = 9; + OperatorConfig& operatorConf = *config.layerConfig.add_operator_confs(); + if (isDeconv) { + operatorConf.set_type("convt"); + } else { + operatorConf.set_type("conv"); + } + ConvConfig* conv = operatorConf.mutable_conv_conf(); + operatorConf.set_num_filters(NUM_FILTERS); + conv->set_filter_size(FILTER_SIZE); + conv->set_filter_size_y(FILTER_SIZE_Y); + conv->set_channels(CHANNELS); + conv->set_padding(0); + conv->set_padding_y(1); + conv->set_stride(2); + conv->set_stride_y(2); + conv->set_groups(1); + conv->set_img_size(IMAGE_SIZE); + conv->set_img_size_y(IMAGE_SIZE_Y); + conv->set_output_x(outputSize(conv->img_size(), + conv->filter_size(), + conv->padding(), + conv->stride(), + /* caffeMode */ true)); + conv->set_output_y(outputSize(conv->img_size_y(), + conv->filter_size_y(), + conv->padding_y(), + conv->stride_y(), + /* caffeMode */ true)); + + if (isDeconv) { + conv->set_filter_channels(NUM_FILTERS / conv->groups()); + config.inputDefs.push_back({INPUT_DATA, + "layer_0", + conv->output_x() * conv->output_y() * CHANNELS, + 0}); + config.layerConfig.set_size(IMAGE_SIZE * IMAGE_SIZE_Y * NUM_FILTERS); + } else { + conv->set_filter_channels(conv->channels() / conv->groups()); + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", IMAGE_SIZE * IMAGE_SIZE_Y * CHANNELS, 0}); + config.layerConfig.set_size(conv->output_x() * conv->output_y() * + NUM_FILTERS); + } + + config.inputDefs.push_back( + {INPUT_DATA, + "layer_1", + FILTER_SIZE * FILTER_SIZE_Y * CHANNELS * NUM_FILTERS, + 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + testOperatorGrad(config, operatorConf, 100, /*useGpu*/ true, false); +} + +TEST(Operator, conv) { + testConvOperator(/*isDeconv*/ true); + testConvOperator(/*isDeconv*/ false); +} + +TEST(Layer, FeatureMapExpandLayer) { + TestConfig config; + config.layerConfig.set_type("featmap_expand"); + const int CHANNELS = 10; + const int INPUT_SIZE = 100; + config.layerConfig.set_size(INPUT_SIZE * CHANNELS); + config.layerConfig.set_num_filters(CHANNELS); + config.inputDefs.push_back({INPUT_SEQUENCE_DATA, + "layer_0", + /* dim= */ INPUT_SIZE, + /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + for (auto useGpu : {false, true}) { + for (auto asRowVec : {false, true}) { + config.layerConfig.set_user_arg(asRowVec ? "as_row_vec" : "as_col_vec"); + testLayerGrad(config, + "featmap_expand", + /*batch_size*/ 100, + /* trans= */ false, + useGpu, + /* useWeight */ true); + } + } +} + +TEST(Layer, MultiplexLayer) { + TestConfig config; + const int LAYER_SIZE = 100; + config.layerConfig.set_type("multiplex"); + config.layerConfig.set_size(LAYER_SIZE); + + config.inputDefs.push_back({INPUT_LABEL, "layer_0", 2, 0}); + config.inputDefs.push_back( + {INPUT_DATA, "layer_1", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); + config.inputDefs.push_back( + {INPUT_DATA, "layer_2", /* dim= */ LAYER_SIZE, /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "multiplex", 512, /* trans= */ false, useGpu); + } +} + +TEST(Layer, PadLayer) { + TestConfig config; + config.biasSize = 0; + config.layerConfig.set_type("pad"); + + int c = 4; + int h = 31; + int w = 36; + size_t size = c * h * w; + config.inputDefs.push_back({INPUT_DATA, "layer_0", size, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + PadConfig* pad = input->mutable_pad_conf(); + ImageConfig* image = pad->mutable_image_conf(); + + image->set_channels(c); + image->set_img_size(h); + image->set_img_size_y(w); + pad->add_pad_c(1); + pad->add_pad_c(2); + pad->add_pad_h(2); + pad->add_pad_h(3); + pad->add_pad_w(3); + pad->add_pad_w(5); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "pad", 10, false, useGpu); + } +} + +TEST(Layer, CrossChannelNormLayer) { + TestConfig config; + config.paramInitialMean = 1.; + config.paramInitialStd = 0.; + config.layerConfig.set_type("norm"); + config.layerConfig.set_size(100); + LayerInputConfig* input = config.layerConfig.add_inputs(); + NormConfig* norm = input->mutable_norm_conf(); + norm->set_norm_type("cross-channel-norm"); + norm->set_channels(10); + norm->set_size(100); + norm->set_scale(0); + norm->set_pow(0); + norm->set_blocked(0); + config.inputDefs.push_back({INPUT_DATA, "layer_0", 100, 10}); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "cross-channel-norm", 10, false, useGpu, false); + } +} + +TEST(Layer, smooth_l1) { + TestConfig config; + config.layerConfig.set_type("smooth_l1"); + + config.inputDefs.push_back({INPUT_DATA, "layer_0", 200, 0}); + config.inputDefs.push_back({INPUT_DATA_TARGET, "layer_1", 200, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "smooth_l1", 100, false, useGpu, false); + } +} + +TEST(Layer, multibox_loss) { + TestConfig config; + config.layerConfig.set_type("multibox_loss"); + config.biasSize = 0; + LayerInputConfig* input = config.layerConfig.add_inputs(); + MultiBoxLossConfig* multiboxLoss = input->mutable_multibox_loss_conf(); + multiboxLoss->set_num_classes(21); + multiboxLoss->set_input_num(1); + multiboxLoss->set_overlap_threshold(0.5); + multiboxLoss->set_neg_pos_ratio(3); + multiboxLoss->set_neg_overlap(0.5); + multiboxLoss->set_background_id(0); + multiboxLoss->set_height(3); + multiboxLoss->set_width(3); + + size_t gtNum = 1; + MatrixPtr labelValue = Matrix::create(gtNum, 6, false, false); + labelValue->randomizeUniform(); + labelValue->add(-0.5); + labelValue->sigmoid(*labelValue); + real* labelData = labelValue->getData(); + size_t labelWidth = labelValue->getWidth(); + for (size_t i = 0; i < gtNum; ++i) { + *(labelData + i * labelWidth) = std::rand() % 20 + 1; + *(labelData + i * labelWidth + 1) = 0.400259; + *(labelData + i * labelWidth + 2) = 0.377857; + *(labelData + i * labelWidth + 3) = 0.525712; + *(labelData + i * labelWidth + 4) = 0.519368; + } + vector seqStartPositions(gtNum + 1, 0); + for (size_t i = 1; i <= gtNum; ++i) { + seqStartPositions[i] = i; + } + + // Ensure at lease one matched bbox + MatrixPtr priorValue = Matrix::create(1, 72, false, false); + priorValue->randomizeUniform(); + priorValue->add(-0.5); + priorValue->sigmoid(*priorValue); + real* priorData = priorValue->getData(); + *(priorData) = 0.424811; + *(priorData + 1) = 0.397059; + *(priorData + 2) = 0.538905; + *(priorData + 3) = 0.447091; + *(priorData + 4) = 0.425720; + *(priorData + 5) = 0.515228; + *(priorData + 6) = 0.519452; + *(priorData + 7) = 0.591065; + + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "priorbox", priorValue, {}}); + config.inputDefs.push_back( + {INPUT_SELF_DEFINE_DATA, "label", labelValue, seqStartPositions}); + config.inputDefs.push_back({INPUT_DATA, "locPred", 36, 0}); + config.inputDefs.push_back({INPUT_DATA, "confPred", 189, 0}); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "multibox_loss", 1, false, useGpu, false); + } +} + +TEST(Layer, TransLayer) { + TestConfig config; + const int height = 128; + const int width = 1028; + config.layerConfig.set_type("trans"); + config.layerConfig.set_size(width); + + config.inputDefs.push_back( + {INPUT_DATA, "layer_0", /* dim= */ height * width, /* paraSize= */ 0}); + config.layerConfig.add_inputs(); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "trans", height, /* trans= */ false, useGpu); + } +} + +TEST(Layer, RowConvLayer) { + const int context = 3; + const int size = 512; + + TestConfig config; + config.layerConfig.set_type("row_conv"); + config.layerConfig.set_size(size); + config.layerConfig.set_active_type("sigmoid"); + + config.inputDefs.push_back( + {INPUT_SEQUENCE_DATA, "layer_0", size, context * size}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + RowConvConfig* conv = input->mutable_row_conv_conf(); + conv->set_context_length(context); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "row_conv", 100, false, useGpu, false); + } +} + +TEST(Layer, CropLayer) { + TestConfig config; + // config input_0 + config.inputDefs.push_back({INPUT_DATA, "layer_0", 1024, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ImageConfig* img = input->mutable_image_conf(); + img->set_channels(4); + img->set_img_size(16); + config.layerConfig.set_axis(2); + config.layerConfig.add_offset(0); + config.layerConfig.add_offset(0); + + // config input_1 + config.inputDefs.push_back({INPUT_DATA, "layer_1", 128, 0}); + input = config.layerConfig.add_inputs(); + img = input->mutable_image_conf(); + img->set_channels(2); + img->set_img_size(8); + + // config crop layer + config.layerConfig.set_type("crop"); + config.layerConfig.set_name("cropLayer"); + + for (auto useGpu : {false, true}) { + testLayerGrad(config, "crop", 100, false, useGpu, false); + } +} vector randSampling(real range, int n) { CHECK_GE(range, n); @@ -1929,18 +1914,20 @@ vector randSampling(real range, int n) { TEST(Layer, SubNestedSequenceLayer) { // layer size is not crutial for this layer, // so use a small layer size in unittest - const int layerSize = 8; - const int maxSeqNum = 5; - const int maxSeqLen = 5; - const int beamSize = 3; + const int layerSize = 4; + + const int maxSeqNum = 50; + const int maxSeqLen = 50; + const int maxBeamSize = 32; + + srand((size_t)(time(NULL))); + int beamSize = 1 + (rand() % maxBeamSize); TestConfig config; config.layerConfig.set_type("sub_nested_seq"); config.layerConfig.set_name("sub_nested_seq_layer"); config.layerConfig.set_size(layerSize); - // srand((size_t)(time(NULL))); - srand(1); int seqNum = 1 + (rand() % maxSeqNum); // sequence information for the first input, it is a nested sequence @@ -1969,6 +1956,7 @@ TEST(Layer, SubNestedSequenceLayer) { MatrixPtr seqInputPtr = Matrix::create(seqStartPos.back(), layerSize, false, false); + seqInputPtr->randomizeUniform(); config.inputDefs.push_back({INPUT_SELF_DEFINE_DATA, "nested_seq_input", seqInputPtr, @@ -1989,35 +1977,35 @@ TEST(Layer, SubNestedSequenceLayer) { } } -// TEST(Layer, ClipLayer) { -// const size_t batchSize = 128; -// const size_t size = 512; -// TestConfig config; -// config.layerConfig.set_type("clip"); -// config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); -// LayerInputConfig* input = config.layerConfig.add_inputs(); -// ClipConfig* layerConf = input->mutable_clip_conf(); -// double p1 = std::rand() / (double)RAND_MAX; -// double p2 = std::rand() / (double)RAND_MAX; -// layerConf->set_min(std::min(p1, p2)); -// layerConf->set_max(std::max(p1, p2)); -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "clip", batchSize, false, useGpu, false); -// } -// } -// -// TEST(Layer, RowL2NormLayer) { -// const size_t batchSize = 128; -// const size_t size = 512; -// TestConfig config; -// config.layerConfig.set_type("row_l2_norm"); -// config.layerConfig.set_size(size); -// config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); -// config.layerConfig.add_inputs(); -// for (auto useGpu : {false, true}) { -// testLayerGrad(config, "row_l2_norm", batchSize, false, useGpu, false); -// } -// } +TEST(Layer, ClipLayer) { + const size_t batchSize = 128; + const size_t size = 512; + TestConfig config; + config.layerConfig.set_type("clip"); + config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); + LayerInputConfig* input = config.layerConfig.add_inputs(); + ClipConfig* layerConf = input->mutable_clip_conf(); + double p1 = std::rand() / (double)RAND_MAX; + double p2 = std::rand() / (double)RAND_MAX; + layerConf->set_min(std::min(p1, p2)); + layerConf->set_max(std::max(p1, p2)); + for (auto useGpu : {false, true}) { + testLayerGrad(config, "clip", batchSize, false, useGpu, false); + } +} + +TEST(Layer, RowL2NormLayer) { + const size_t batchSize = 128; + const size_t size = 512; + TestConfig config; + config.layerConfig.set_type("row_l2_norm"); + config.layerConfig.set_size(size); + config.inputDefs.push_back({INPUT_DATA, "input", size, 0}); + config.layerConfig.add_inputs(); + for (auto useGpu : {false, true}) { + testLayerGrad(config, "row_l2_norm", batchSize, false, useGpu, false); + } +} int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index ebbe95a0c7..2bed2b5f45 100755 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -6097,9 +6097,11 @@ def sub_nested_seq_layer(input, selected_indices, name=None): The sub_nested_seq_layer accepts two inputs: the first one is a nested sequence; the second one is a set of selceted indices in the nested sequence. + Then sub_nest_seq_layer trims the first nested sequence input according to + the selected indices to form a new output. + + This layer is useful in beam training. - Then sub_nest_seq_layer selects trims the first input according to the - selected indices to give a new output. This layer is used in beam training. The example usage is: -- GitLab