diff --git a/CMakeLists.txt b/CMakeLists.txt index 9ad69738eb2ac21d6ff2624f11d17a38410d5c1f..26d94384a9150735aa8341fd8a18cb039895ff91 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -71,7 +71,8 @@ option(ANAKIN_BUILD_CROSS_PLANTFORM "Build anakin lib for any nvidia device plan option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE}) option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF) option(ON_INFER "Turn on inference optimization." OFF) -option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF) +option(WITH_INFERENCE_API_TEST "Test fluid inference C++ high-level api interface" OFF) +option(WITH_HIGH_LEVEL_API_TEST "Test fluid python high-level api interface" OFF) option(WITH_SYSTEM_BLAS "Use system blas library" OFF) option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION}) option(WITH_FAST_MATH "Make use of fast math library, might affect the precision to some extent" ON) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 69da9b98198de358348621ecdb444f2f81c7757f..09eb437aede4364f8aa285d5296f21cd8460fca1 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -221,6 +221,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_LIBDIR=lib + -DBUILD_SHARED_LIBS=OFF CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR} -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index c0df4c2dc6cd46a90d4205b26123fb6c44fcaf88..3f576a45169c9c7e4581d304efc7cf0bca1b310a 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -118,6 +118,8 @@ paddle.fluid.layers.reduce_mean (ArgSpec(args=['input', 'dim', 'keep_dim', 'name paddle.fluid.layers.reduce_max (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', '66a622db727551761ce4eb73eaa7f6a4')) paddle.fluid.layers.reduce_min (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'd50ac552b5d131468ed466d08bb2d38c')) paddle.fluid.layers.reduce_prod (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'fcd8301a0ce15f219c7a4bcd0c1e8eca')) +paddle.fluid.layers.reduce_all (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', '646ca4d4a2cc16084f59de44b6927eca')) +paddle.fluid.layers.reduce_any (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'f36661060aeeaf6c6b1331e41b3726fa')) paddle.fluid.layers.sequence_first_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '2b290d3d77882bfe9bb8d331cac8cdd3')) paddle.fluid.layers.sequence_last_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'c16a892f44f7fe71bfa5afc32d3f34ce')) paddle.fluid.layers.sequence_slice (ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'fdcea0e8b5bc7d8d4b1b072c521014e6')) @@ -125,7 +127,7 @@ paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed paddle.fluid.layers.split (ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '652625345c2acb900029c78cc75f8aa6')) paddle.fluid.layers.ctc_greedy_decoder (ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbf2adbd79683dc93db03454dfa18c2')) paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)), ('document', '97f0262f97602644c83142789d784571')) -paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', '6e428384ce6a77207fa2c70d9f011990')) +paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', '35c6a241bcc1a1fc89508860d82ad62b')) paddle.fluid.layers.matmul (ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)), ('document', 'b4cbe1ac451005df6dad12e9ffdccca9')) paddle.fluid.layers.topk (ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd3570c02f71bcd78e60b3f31dc8f5b32')) paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, False, False)), ('document', 'aaba49c038ba927f0a8e45c0c9a686ab')) @@ -204,6 +206,7 @@ paddle.fluid.layers.gaussian_random_batch_size_like (ArgSpec(args=['input', 'sha paddle.fluid.layers.sum (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'a418e3ccb5e2ac21bd60f5cc221d5860')) paddle.fluid.layers.slice (ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None), ('document', '01dbb91e7c74cb11336cd531013de51a')) paddle.fluid.layers.shape (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '17db0f814eb7bb5a3fac1ca6e60e16d8')) +paddle.fluid.layers.rank (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'ee1386c42ecc8f424fe3fb21862fefc2')) paddle.fluid.layers.logical_and (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'cdcf20c494c92060d10feb9374532f42')) paddle.fluid.layers.logical_or (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '0eae3f726a4afe590757552fa3ced012')) paddle.fluid.layers.logical_xor (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b0daaa3fa4a0aa62f9b58c43d959eb25')) @@ -236,7 +239,7 @@ paddle.fluid.layers.huber_loss (ArgSpec(args=['input', 'label', 'delta'], vararg paddle.fluid.layers.kldiv_loss (ArgSpec(args=['x', 'target', 'reduction', 'name'], varargs=None, keywords=None, defaults=('mean', None)), ('document', '776d536cac47c89073abc7ee524d5aec')) paddle.fluid.layers.tree_conv (ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)), ('document', '34ea12ac9f10a65dccbc50100d12e607')) paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l2_reg'], varargs=None, keywords=None, defaults=(0.002,)), ('document', '46994d10276dd4cb803b4062b5d14329')) -paddle.fluid.layers.pixel_shuffle (ArgSpec(args=['x', 'upscale_factor'], varargs=None, keywords=None, defaults=None), ('document', 'ad669cdf83e72a69ebc5ed79e36486de')) +paddle.fluid.layers.pixel_shuffle (ArgSpec(args=['x', 'upscale_factor'], varargs=None, keywords=None, defaults=None), ('document', '731b21c62a4add60a33bd76d802ffc5c')) paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', 'b76ccca3735bea4a58a0dbf0d77c5393')) paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '33bbd42027d872b3818b3d64ec52e139')) paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'b1ae2e1cc0750e58726374061ea90ecc')) @@ -272,6 +275,7 @@ paddle.fluid.layers.has_inf (ArgSpec(args=['x'], varargs=None, keywords=None, de paddle.fluid.layers.has_nan (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '2e53e83127dbfd86e7098bdfe9a549e8')) paddle.fluid.layers.isfinite (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '0a437011c3906079fd8947ed3e52d292')) paddle.fluid.layers.range (ArgSpec(args=['start', 'end', 'step', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '2ec937ede953ded2fdff2675883900bb')) +paddle.fluid.layers.linspace (ArgSpec(args=['start', 'stop', 'num', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '495e21e9a848c2d075a102802fc67756')) paddle.fluid.layers.While.__init__ (ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.While.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.layers.Switch.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) @@ -361,8 +365,7 @@ paddle.fluid.layers.inverse_time_decay (ArgSpec(args=['learning_rate', 'decay_st paddle.fluid.layers.polynomial_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False)), ('document', '882634f420f626642f0874481263da40')) paddle.fluid.layers.piecewise_decay (ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None), ('document', 'c717d9d1d78a53c809d01b8bc56f3cae')) paddle.fluid.layers.noam_decay (ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None), ('document', 'd9a95746353fd574be36dc28d8726c28')) -paddle.fluid.layers.append_LARS (ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None), ('document', 'd24fa1e7d62ac8a534fc6a86002f84f8')) -paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', '9588c64c26ffaef3c466e404a6af9d9b')) +paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', 'f8b2727bccf0f368c997d7cf05847e49')) paddle.fluid.layers.linear_lr_warmup (ArgSpec(args=['learning_rate', 'warmup_steps', 'start_lr', 'end_lr'], varargs=None, keywords=None, defaults=None), ('document', '2ef3f5ca5cd71ea4217c418e5a7a0565')) paddle.fluid.contrib.InitState.__init__ (ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) paddle.fluid.contrib.StateCell.__init__ (ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754')) diff --git a/paddle/fluid/framework/data_feed.cc b/paddle/fluid/framework/data_feed.cc index b5f7e6c22405d6928f0e423458d6cd720f2d09a8..365c80da34eb287f50d2f0dcbf3844001ab43ec8 100644 --- a/paddle/fluid/framework/data_feed.cc +++ b/paddle/fluid/framework/data_feed.cc @@ -72,7 +72,6 @@ bool DataFeed::PickOneFile(std::string* filename) { } VLOG(3) << "file_idx_=" << *file_idx_; *filename = filelist_[(*file_idx_)++]; - // LOG(ERROR) << "pick file:" << *filename; return true; } @@ -466,6 +465,17 @@ void MultiSlotDataFeed::Init( if (slot.is_used()) { use_slots_.push_back(all_slots_[i]); use_slots_is_dense_.push_back(slot.is_dense()); + std::vector local_shape; + if (slot.is_dense()) { + // for batch size holder if is_dense + if (slot.shape(0) > 0) { + local_shape.push_back(0); + } + } + for (size_t i = 0; i < slot.shape_size(); ++i) { + local_shape.push_back(slot.shape(i)); + } + use_slots_shape_.push_back(local_shape); } } feed_vec_.resize(use_slots_.size()); @@ -752,8 +762,8 @@ void MultiSlotDataFeed::PutToFeedVec( LoD data_lod{offset}; feed_vec_[i]->set_lod(data_lod); if (use_slots_is_dense_[i]) { - int dim = total_instance / batch_size_; - feed_vec_[i]->Resize({batch_size_, dim}); + use_slots_shape_[i][0] = batch_size_; + feed_vec_[i]->Resize(framework::make_ddim(use_slots_shape_[i])); } } #endif @@ -785,6 +795,16 @@ void MultiSlotInMemoryDataFeed::Init( if (slot.is_used()) { use_slots_.push_back(all_slots_[i]); use_slots_is_dense_.push_back(slot.is_dense()); + std::vector local_shape; + if (slot.is_dense()) { + if (slot.shape(0) > 0) { + local_shape.push_back(0); + } + } + for (size_t i = 0; i < slot.shape_size(); ++i) { + local_shape.push_back(slot.shape(i)); + } + use_slots_shape_.push_back(local_shape); } } feed_vec_.resize(use_slots_.size()); @@ -940,8 +960,8 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec( LoD data_lod{offset}; feed_vec_[i]->set_lod(data_lod); if (use_slots_is_dense_[i]) { - int dim = total_instance / batch_size_; - feed_vec_[i]->Resize({batch_size_, dim}); + use_slots_shape_[i][0] = batch_size_; + feed_vec_[i]->Resize(framework::make_ddim(use_slots_shape_[i])); } } #endif diff --git a/paddle/fluid/framework/data_feed.h b/paddle/fluid/framework/data_feed.h index 648c874a0b8763b18118e18adf3b3e93acfd104b..d098c7858a98c644bd3cad78d3cf1e3b35ca026b 100644 --- a/paddle/fluid/framework/data_feed.h +++ b/paddle/fluid/framework/data_feed.h @@ -142,6 +142,7 @@ class DataFeed { // object) std::vector all_slots_; std::vector all_slots_type_; + std::vector> use_slots_shape_; std::vector use_slots_index_; // -1: not used; >=0: the index of use_slots_ diff --git a/paddle/fluid/framework/data_feed.proto b/paddle/fluid/framework/data_feed.proto index 77911306299b77748a2ad9437d49680748885003..03996e0e20a1729ee300a5ad37abc325876930b7 100644 --- a/paddle/fluid/framework/data_feed.proto +++ b/paddle/fluid/framework/data_feed.proto @@ -19,6 +19,7 @@ message Slot { required string type = 2; optional bool is_dense = 3 [ default = false ]; optional bool is_used = 4 [ default = false ]; + repeated int32 shape = 5; // we can define N-D Tensor } message MultiSlotDesc { repeated Slot slots = 1; } diff --git a/paddle/fluid/framework/details/build_strategy.cc b/paddle/fluid/framework/details/build_strategy.cc index afe5078bf80d00b595789a5f45d91a5e7a8dfce6..20cfa75292cf52a01bf794a2714deaac1e821f50 100644 --- a/paddle/fluid/framework/details/build_strategy.cc +++ b/paddle/fluid/framework/details/build_strategy.cc @@ -150,6 +150,11 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder { AppendPass("runtime_context_cache_pass"); } + if (strategy_.cache_expected_kernel_) { + VLOG(10) << "Add expected_kernel_cache_pass"; + AppendPass("expected_kernel_cache_pass"); + } + AppendMultiDevPass(strategy_); if (strategy_.fuse_all_reduce_ops_) { @@ -337,3 +342,4 @@ USE_PASS(fuse_adam_op_pass); USE_PASS(fuse_sgd_op_pass); USE_PASS(fuse_all_reduce_op_pass); USE_PASS(runtime_context_cache_pass); +USE_PASS(expected_kernel_cache_pass); diff --git a/paddle/fluid/framework/details/build_strategy.h b/paddle/fluid/framework/details/build_strategy.h index 8aa444a30c0f7f1f5c19d54cf248f86c3e3b3cf3..b1601cfbcd5e9c66f1bbecd1f6fe10bc279cea26 100644 --- a/paddle/fluid/framework/details/build_strategy.h +++ b/paddle/fluid/framework/details/build_strategy.h @@ -83,11 +83,11 @@ struct BuildStrategy { bool sync_batch_norm_{false}; - bool memory_optimize_{true}; - // TODO(dzhwinter): - // make enable_inplace, memory_optimize_ - // memory_early_delete_ true by default - bool enable_inplace_{true}; + // FIXME(liuwei1031) disable memory_optimzie and enable_inplace in 1.4 + // to open them by default, we need to solve the fetch variable issue + bool memory_optimize_{false}; + + bool enable_inplace_{false}; bool enable_sequential_execution_{false}; @@ -108,6 +108,7 @@ struct BuildStrategy { bool remove_unnecessary_lock_{true}; bool cache_runtime_context_{false}; + bool cache_expected_kernel_{true}; // NOTE: // Before you add new options, think if it's a general strategy that works diff --git a/paddle/fluid/framework/downpour_worker.cc b/paddle/fluid/framework/downpour_worker.cc index 4ca7842fa261a1b8178438d35ca5d626146663d4..386ffd84c57063e950cd8b0d57304c66190be4c4 100644 --- a/paddle/fluid/framework/downpour_worker.cc +++ b/paddle/fluid/framework/downpour_worker.cc @@ -21,40 +21,40 @@ namespace framework { void DownpourWorker::Initialize(const TrainerDesc& desc) { param_ = desc.downpour_param(); - for (size_t i = 0; i < param_.sparse_table_size(); ++i) { + for (int i = 0; i < param_.sparse_table_size(); ++i) { uint64_t table_id = static_cast(param_.sparse_table(i).table_id()); TableParameter table = param_.sparse_table(i); sparse_key_names_[table_id].resize(table.sparse_key_name_size()); - for (size_t j = 0; j < table.sparse_key_name_size(); ++j) { + for (int j = 0; j < table.sparse_key_name_size(); ++j) { sparse_key_names_[table_id][j] = table.sparse_key_name(j); } sparse_value_names_[table_id].resize(table.sparse_value_name_size()); - for (size_t j = 0; j < table.sparse_value_name_size(); ++j) { + for (int j = 0; j < table.sparse_value_name_size(); ++j) { sparse_value_names_[table_id][j] = table.sparse_value_name(j); } sparse_grad_names_[table_id].resize(table.sparse_grad_name_size()); - for (size_t j = 0; j < table.sparse_grad_name_size(); ++j) { + for (int j = 0; j < table.sparse_grad_name_size(); ++j) { sparse_grad_names_[table_id][j] = table.sparse_grad_name(j); } label_var_name_[table_id] = table.label_var_name(); } - for (size_t i = 0; i < param_.dense_table_size(); ++i) { + for (int i = 0; i < param_.dense_table_size(); ++i) { uint64_t table_id = static_cast(param_.dense_table(i).table_id()); auto table = param_.dense_table(i); dense_value_names_[table_id].resize(table.dense_value_name_size()); - for (size_t j = 0; j < table.dense_value_name_size(); ++j) { + for (int j = 0; j < table.dense_value_name_size(); ++j) { dense_value_names_[table_id][j] = table.dense_value_name(j); } dense_grad_names_[table_id].resize(table.dense_grad_name_size()); - for (size_t j = 0; j < table.dense_grad_name_size(); ++j) { + for (int j = 0; j < table.dense_grad_name_size(); ++j) { dense_grad_names_[table_id][j] = table.dense_grad_name(j); } } skip_ops_.resize(param_.skip_ops_size()); - for (size_t i = 0; i < param_.skip_ops_size(); ++i) { + for (int i = 0; i < param_.skip_ops_size(); ++i) { skip_ops_[i] = param_.skip_ops(i); } @@ -83,14 +83,14 @@ void DownpourWorker::CollectLabelInfo(size_t table_idx) { LoDTensor* tensor = var->GetMutable(); int64_t* label_ptr = tensor->data(); - int global_index = 0; + size_t global_index = 0; for (size_t i = 0; i < sparse_key_names_[table_id].size(); ++i) { VLOG(3) << "sparse_key_names_[" << i << "]: " << sparse_key_names_[table_id][i]; Variable* fea_var = thread_scope_->FindVar(sparse_key_names_[table_id][i]); LoDTensor* tensor = fea_var->GetMutable(); int64_t* ids = tensor->data(); - int fea_idx = 0; + size_t fea_idx = 0; // tensor->lod()[0].size() == batch_size + 1 for (auto lod_idx = 1u; lod_idx < tensor->lod()[0].size(); ++lod_idx) { for (; fea_idx < tensor->lod()[0][lod_idx]; ++fea_idx) { @@ -138,7 +138,7 @@ void DownpourWorker::FillSparseValue(size_t table_idx) { auto& tensor_lod = tensor->lod()[0]; LoD data_lod{tensor_lod}; tensor_emb->set_lod(data_lod); - for (auto index = 0u; index < len; ++index) { + for (int index = 0; index < len; ++index) { if (ids[index] == 0u) { memcpy(ptr + table.emb_dim() * index, init_value.data() + 2, sizeof(float) * table.emb_dim()); @@ -192,7 +192,7 @@ void DownpourWorker::TrainFilesWithProfiler() { read_time += timeline.ElapsedSec(); total_time += timeline.ElapsedSec(); VLOG(3) << "program config size: " << param_.program_config_size(); - for (size_t i = 0; i < param_.program_config(0).pull_sparse_table_id_size(); + for (int i = 0; i < param_.program_config(0).pull_sparse_table_id_size(); ++i) { uint64_t tid = static_cast( param_.program_config(0).pull_sparse_table_id(i)); @@ -244,8 +244,8 @@ void DownpourWorker::TrainFilesWithProfiler() { } if (need_to_push_sparse_) { - for (size_t i = 0; - i < param_.program_config(0).push_sparse_table_id_size(); ++i) { + for (int i = 0; i < param_.program_config(0).push_sparse_table_id_size(); + ++i) { uint64_t tid = static_cast( param_.program_config(0).push_sparse_table_id(i)); TableParameter table; @@ -268,8 +268,8 @@ void DownpourWorker::TrainFilesWithProfiler() { if (need_to_push_dense_) { timeline.Start(); - for (size_t i = 0; - i < param_.program_config(0).push_dense_table_id_size(); ++i) { + for (int i = 0; i < param_.program_config(0).push_dense_table_id_size(); + ++i) { uint64_t tid = static_cast( param_.program_config(0).push_dense_table_id(i)); fleet_ptr_->PushDenseVarsAsync( @@ -315,8 +315,8 @@ void DownpourWorker::TrainFilesWithProfiler() { } if (need_to_push_dense_) { - for (size_t i = 0; - i < param_.program_config(0).push_dense_table_id_size(); ++i) { + for (int i = 0; i < param_.program_config(0).push_dense_table_id_size(); + ++i) { uint64_t tid = static_cast( param_.program_config(0).push_dense_table_id(i)); pull_dense_worker_->IncreaseThreadVersion(thread_id_, tid); @@ -362,7 +362,7 @@ void DownpourWorker::TrainFiles() { int cur_batch; while ((cur_batch = device_reader_->Next()) > 0) { // pull sparse here - for (size_t i = 0; i < param_.program_config(0).pull_sparse_table_id_size(); + for (int i = 0; i < param_.program_config(0).pull_sparse_table_id_size(); ++i) { uint64_t tid = static_cast( param_.program_config(0).pull_sparse_table_id(i)); @@ -397,8 +397,8 @@ void DownpourWorker::TrainFiles() { if (need_to_push_sparse_) { // push gradients here - for (size_t i = 0; - i < param_.program_config(0).push_sparse_table_id_size(); ++i) { + for (int i = 0; i < param_.program_config(0).push_sparse_table_id_size(); + ++i) { uint64_t tid = static_cast( param_.program_config(0).push_sparse_table_id(i)); TableParameter table; @@ -416,8 +416,8 @@ void DownpourWorker::TrainFiles() { } if (need_to_push_dense_) { - for (size_t i = 0; - i < param_.program_config(0).push_dense_table_id_size(); ++i) { + for (int i = 0; i < param_.program_config(0).push_dense_table_id_size(); + ++i) { uint64_t tid = static_cast( param_.program_config(0).push_dense_table_id(i)); fleet_ptr_->PushDenseVarsAsync( @@ -461,8 +461,8 @@ void DownpourWorker::TrainFiles() { } if (need_to_push_dense_) { - for (size_t i = 0; - i < param_.program_config(0).push_dense_table_id_size(); ++i) { + for (int i = 0; i < param_.program_config(0).push_dense_table_id_size(); + ++i) { uint64_t tid = static_cast( param_.program_config(0).push_dense_table_id(i)); pull_dense_worker_->IncreaseThreadVersion(thread_id_, tid); diff --git a/paddle/fluid/framework/ir/expected_kernel_cache_pass.cc b/paddle/fluid/framework/ir/expected_kernel_cache_pass.cc index ee67af0aff5c90a9da0ece8f197d9a0c0a8e5b9c..4a99d4c1a9c0f0bd973097d281e380341fe88515 100644 --- a/paddle/fluid/framework/ir/expected_kernel_cache_pass.cc +++ b/paddle/fluid/framework/ir/expected_kernel_cache_pass.cc @@ -23,7 +23,7 @@ namespace ir { void ExpectedKernelCachePass::ApplyImpl(ir::Graph* graph) const { VLOG(3) << "Applies Expected Kernel Cache strategy."; for (const Node* n : graph->Nodes()) { - if (n->IsOp()) { + if (n->IsOp() && n->Op()) { n->Op()->SetAttr(kEnableCacheExpectedKernel, true); } } diff --git a/paddle/fluid/framework/op_desc.cc b/paddle/fluid/framework/op_desc.cc index e6f5b15af8cd440a9304235acfe62787c5f1b134..1ea93b7638a85e67bcc85a0c0e130d636938d6c5 100644 --- a/paddle/fluid/framework/op_desc.cc +++ b/paddle/fluid/framework/op_desc.cc @@ -241,6 +241,7 @@ OpDesc::OpDesc(const std::string &type, const VariableNameMap &inputs, outputs_ = outputs; attrs_ = attrs; need_update_ = true; + block_ = nullptr; } OpDesc::OpDesc(const OpDesc &other, BlockDesc *block) { diff --git a/paddle/fluid/framework/parallel_executor.cc b/paddle/fluid/framework/parallel_executor.cc index 4245caf1689c76d72b410c742488c55562c8b998..c4bf2b7e8c017b22f917c9f9bd40e75b8cde08b2 100644 --- a/paddle/fluid/framework/parallel_executor.cc +++ b/paddle/fluid/framework/parallel_executor.cc @@ -221,7 +221,7 @@ ParallelExecutor::ParallelExecutor(const std::vector &places, PADDLE_ENFORCE(!member_->use_cuda_, "gpu mode does not support async_mode_ now!"); graphs.push_back(graph); - for (int i = 1; i < places.size(); ++i) { + for (size_t i = 1; i < places.size(); ++i) { auto *tmp_graph = new ir::Graph(graph->OriginProgram()); async_graphs_.emplace_back(tmp_graph); graphs.push_back(tmp_graph); @@ -315,7 +315,7 @@ ParallelExecutor::ParallelExecutor(const std::vector &places, graph = build_strategy.Apply(graph, {member_->places_[0]}, loss_var_name, {member_->local_scopes_[0]}, 1, member_->use_cuda_, member_->nccl_ctxs_.get()); - for (int i = 1; i < member_->places_.size(); ++i) { + for (size_t i = 1; i < member_->places_.size(); ++i) { graphs[i] = build_strategy.Apply(graphs[i], {member_->places_[i]}, loss_var_name, {member_->local_scopes_[i]}, 1, diff --git a/paddle/fluid/framework/trainer_desc.proto b/paddle/fluid/framework/trainer_desc.proto index 389c1a870fb54ad28806ad49632323b1c93676f4..4fc05ccf5c9be37e80b4ae7263166ad76eb6d6a7 100644 --- a/paddle/fluid/framework/trainer_desc.proto +++ b/paddle/fluid/framework/trainer_desc.proto @@ -76,7 +76,7 @@ message PullDenseWorkerParameter { message TableParameter { // dense table only - optional int64 table_id = 1; + optional uint64 table_id = 1; repeated string dense_value_name = 2; repeated string dense_grad_name = 3; repeated int32 push_dense_wait_times = 5; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 6942604b0723f8665f0e8b058d48a5356a1a01f4..0155609a029664da2c3d4c90a152ec556927c32d 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -259,6 +259,9 @@ bool AnalysisPredictor::SetFeed(const std::vector &inputs, return false; } + PADDLE_ENFORCE_NOT_NULL(input_ptr); + PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data()); + if (platform::is_cpu_place(place_)) { // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. std::memcpy(static_cast(input_ptr), inputs[i].data.data(), diff --git a/paddle/fluid/inference/api/api.cc b/paddle/fluid/inference/api/api.cc index 7d57b6ec74468dbdb0519f85140629a0ac01c18d..fc2d7b48c2a1f89232dcb96d1899667230e2ddda 100644 --- a/paddle/fluid/inference/api/api.cc +++ b/paddle/fluid/inference/api/api.cc @@ -54,6 +54,7 @@ PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) { memory_owned_ = other.memory_owned_; } else { Resize(other.length()); + PADDLE_ENFORCE(!(other.length() > 0 && other.data() == nullptr)); memcpy(data_, other.data(), other.length()); length_ = other.length(); memory_owned_ = true; diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index 54f40563c3662af24e794422be4d3262d86c76a7..56996c5cff88f5b4a9094291a09996f8b8d70a23 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -169,6 +169,7 @@ std::unique_ptr NativePaddlePredictor::Clone() { std::unique_ptr cls(new NativePaddlePredictor(config_)); // Hot fix the bug that result diff in multi-thread. // TODO(Superjomn) re-implement a real clone here. + PADDLE_ENFORCE_NOT_NULL(dynamic_cast(cls.get())); if (!dynamic_cast(cls.get())->Init(nullptr)) { LOG(ERROR) << "fail to call Init"; return nullptr; @@ -210,6 +211,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, return false; } + PADDLE_ENFORCE_NOT_NULL(input_ptr); + PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data()); if (platform::is_cpu_place(place_)) { // TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy. std::memcpy(static_cast(input_ptr), inputs[i].data.data(), @@ -316,6 +319,8 @@ std::unique_ptr CreatePaddlePredictor< } std::unique_ptr predictor(new NativePaddlePredictor(config)); + PADDLE_ENFORCE_NOT_NULL( + dynamic_cast(predictor.get())); if (!dynamic_cast(predictor.get())->Init(nullptr)) { return nullptr; } diff --git a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc index 9f23b9f037bcaeb758312d011067ae29c82e73cd..5ee848c3cfa2117b2adeab5e563c5d07ce1d76ca 100644 --- a/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc @@ -47,6 +47,7 @@ struct DataRecord { num_lines++; std::vector data; split(line, '\t', &data); + PADDLE_ENFORCE(data.size() >= 4); // load title1 data std::vector title1_data; split_to_int64(data[0], ' ', &title1_data); diff --git a/paddle/fluid/operators/detection/gpc.cc b/paddle/fluid/operators/detection/gpc.cc index 7c0823c0487d39eece5be08322e7d182b931ba3c..f46aaf7d0a7b2d48f18ba6cccb555bbb691ad353 100644 --- a/paddle/fluid/operators/detection/gpc.cc +++ b/paddle/fluid/operators/detection/gpc.cc @@ -24,6 +24,7 @@ **/ #include "paddle/fluid/operators/detection/gpc.h" +#include "paddle/fluid/platform/enforce.h" namespace gpc { @@ -689,6 +690,7 @@ static bbox *create_contour_bboxes(gpc_polygon *p) { gpc_malloc(box, p->num_contours * sizeof(bbox), const_cast("Bounding box creation")); + PADDLE_ENFORCE_NOT_NULL(box); /* Construct contour bounding boxes */ for (c = 0; c < p->num_contours; c++) { @@ -852,6 +854,7 @@ void gpc_add_contour(gpc_polygon *p, gpc_vertex_list *new_contour, int hole) { /* Create an extended hole array */ gpc_malloc(extended_hole, (p->num_contours + 1) * sizeof(int), const_cast("contour hole addition")); + PADDLE_ENFORCE_NOT_NULL(extended_hole); /* Create an extended contour array */ gpc_malloc(extended_contour, @@ -969,6 +972,7 @@ void gpc_polygon_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, /* Build scanbeam table from scanbeam tree */ gpc_malloc(sbt, sbt_entries * sizeof(double), const_cast("sbt creation")); + PADDLE_ENFORCE_NOT_NULL(sbt); build_sbt(&scanbeam, sbt, sbtree); scanbeam = 0; free_sbtree(&sbtree); @@ -1604,6 +1608,7 @@ void gpc_tristrip_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip, /* Build scanbeam table from scanbeam tree */ gpc_malloc(sbt, sbt_entries * sizeof(double), const_cast("sbt creation")); + PADDLE_ENFORCE_NOT_NULL(sbt); build_sbt(&scanbeam, sbt, sbtree); scanbeam = 0; free_sbtree(&sbtree); diff --git a/paddle/fluid/operators/linspace_op.cc b/paddle/fluid/operators/linspace_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..f4aeb062d8dfae31a72b8ebccb3d377276662da6 --- /dev/null +++ b/paddle/fluid/operators/linspace_op.cc @@ -0,0 +1,84 @@ +/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/operators/linspace_op.h" + +namespace paddle { +namespace operators { + +class LinspaceOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Start"), + "Input(Start) of LinspaceOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Stop"), + "Input(Stop) of LinspaceOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Num"), + "Input(Num) of LinspaceOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("Out"), + "Output(OUt) of LinspaceOp should not be null."); + + auto s_dims = ctx->GetInputDim("Start"); + PADDLE_ENFORCE((s_dims.size() == 1) && (s_dims[0] == 1), + "The shape of Input(Start) should be [1]."); + + auto e_dims = ctx->GetInputDim("Stop"); + PADDLE_ENFORCE((e_dims.size() == 1) && (e_dims[0] == 1), + "The shape of Input(Stop) should be [1]."); + + auto step_dims = ctx->GetInputDim("Num"); + PADDLE_ENFORCE((step_dims.size() == 1) && (step_dims[0] == 1), + "The shape of Input(Num) should be [1]."); + + ctx->SetOutputDim("Out", {-1}); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + framework::LibraryType library_{framework::LibraryType::kPlain}; + framework::DataLayout layout_ = framework::DataLayout::kAnyLayout; + return framework::OpKernelType( + ctx.Input("Start")->type(), ctx.device_context(), + layout_, library_); + } +}; + +class LinspaceOpMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("Start", + "First entry in the sequence. It is a tensor of shape [1], should " + "be of type float32 or float64."); + AddInput("Stop", + "Last entry in the sequence. It is a tensor of shape [1], should " + "be of type float32 or float64."); + AddInput("Num", + "Number of entry in the sequence. It is a tensor of shape [1], " + "should be of type int32."); + AddOutput("Out", "A sequence of numbers."); + AddComment(R"DOC( + Return fixed number of evenly spaced values within a given interval. First entry is start, and last entry is stop. In the case when Num is 1, only Start is returned. Like linspace function of numpy. +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(linspace, ops::LinspaceOp, ops::LinspaceOpMaker); +REGISTER_OP_CPU_KERNEL(linspace, ops::CPULinspaceKernel, + ops::CPULinspaceKernel); diff --git a/paddle/fluid/operators/linspace_op.cu b/paddle/fluid/operators/linspace_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..90bd17cda0e0d1f78810233537bb502f9115fbd0 --- /dev/null +++ b/paddle/fluid/operators/linspace_op.cu @@ -0,0 +1,75 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/linspace_op.h" +#include "paddle/fluid/platform/cuda_primitives.h" + +namespace paddle { +namespace operators { + +#define CUDA_1D_KERNEL_LOOP(i, n) \ + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \ + i += blockDim.x * gridDim.x) + +template +__global__ void LinspaceKernel(T start, T step, int64_t size, T* out) { + CUDA_1D_KERNEL_LOOP(index, size) { out[index] = start + step * index; } +} + +template +__global__ void LinspaceSpecialKernel(T start, T* out) { + out[0] = start; +} + +template +class CUDALinspaceKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + auto* start_t = context.Input("Start"); + auto* stop_t = context.Input("Stop"); + auto* num_t = context.Input("Num"); + auto* out = context.Output("Out"); + + framework::Tensor n; + framework::TensorCopy(*start_t, platform::CPUPlace(), &n); + T start = n.data()[0]; + framework::TensorCopy(*stop_t, platform::CPUPlace(), &n); + T stop = n.data()[0]; + framework::TensorCopy(*num_t, platform::CPUPlace(), &n); + int32_t num = n.data()[0]; + + PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0."); + + out->Resize(framework::make_ddim({num})); + T* out_data = out->mutable_data(context.GetPlace()); + + T step = 0; + if (num != 1) { + step = (stop - start) / (num - 1); + } + + auto stream = context.cuda_device_context().stream(); + int block = 512; + int grid = (num + block - 1) / block; + LinspaceKernel<<>>(start, step, num, out_data); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_CUDA_KERNEL(linspace, ops::CUDALinspaceKernel, + ops::CUDALinspaceKernel); diff --git a/paddle/fluid/operators/linspace_op.h b/paddle/fluid/operators/linspace_op.h new file mode 100644 index 0000000000000000000000000000000000000000..b1fcac73b0ad249aa19859bde770a8554cdb7408 --- /dev/null +++ b/paddle/fluid/operators/linspace_op.h @@ -0,0 +1,51 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/math/math_function.h" + +namespace paddle { +namespace operators { + +template +class CPULinspaceKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& context) const override { + T start = context.Input("Start")->data()[0]; + T stop = context.Input("Stop")->data()[0]; + int32_t num = context.Input("Num")->data()[0]; + auto* out = context.Output("Out"); + PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0."); + + out->Resize(framework::make_ddim({num})); + + T* out_data = out->mutable_data(context.GetPlace()); + + if (num > 1) { + T step = (stop - start) / (num - 1); + T value = start; + for (int i = 0; i < num; ++i) { + out_data[i] = value; + value += step; + } + } else { + out_data[0] = start; + } + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/reduce_ops/reduce_all_op.cc b/paddle/fluid/operators/reduce_ops/reduce_all_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..b087fbbb94c7ba2f7449f6bda56010dee1c38ea6 --- /dev/null +++ b/paddle/fluid/operators/reduce_ops/reduce_all_op.cc @@ -0,0 +1,20 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/reduce_ops/reduce_all_op.h" + +REGISTER_REDUCE_OP(reduce_all); +REGISTER_OP_CPU_KERNEL(reduce_all, + ops::ReduceKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_all_op.cu b/paddle/fluid/operators/reduce_ops/reduce_all_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..bd94ba263d957d0d65506ecd802bf43add6e2fb4 --- /dev/null +++ b/paddle/fluid/operators/reduce_ops/reduce_all_op.cu @@ -0,0 +1,19 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/reduce_ops/reduce_all_op.h" + +REGISTER_OP_CUDA_KERNEL(reduce_all, + ops::ReduceKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_all_op.h b/paddle/fluid/operators/reduce_ops/reduce_all_op.h new file mode 100644 index 0000000000000000000000000000000000000000..ba159dd703c8904784546eda262bf7be77967d48 --- /dev/null +++ b/paddle/fluid/operators/reduce_ops/reduce_all_op.h @@ -0,0 +1,29 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "paddle/fluid/operators/reduce_ops/reduce_op.h" + +namespace paddle { +namespace operators { + +struct AllFunctor { + template + void operator()(const DeviceContext& place, X* x, Y* y, const Dim& dim) { + y->device(place) = x->all(dim); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/reduce_ops/reduce_any_op.cc b/paddle/fluid/operators/reduce_ops/reduce_any_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..d865dcb3c935b76b8da25d723a5f780fb4de255b --- /dev/null +++ b/paddle/fluid/operators/reduce_ops/reduce_any_op.cc @@ -0,0 +1,20 @@ +// Copyright (c) 2018 PaddlePaddle Authors. Any Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/reduce_ops/reduce_any_op.h" + +REGISTER_REDUCE_OP(reduce_any); +REGISTER_OP_CPU_KERNEL(reduce_any, + ops::ReduceKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_any_op.cu b/paddle/fluid/operators/reduce_ops/reduce_any_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..66f0c9997ea1e27cf172a6839a68d2eb23395c4d --- /dev/null +++ b/paddle/fluid/operators/reduce_ops/reduce_any_op.cu @@ -0,0 +1,19 @@ +// Copyright (c) 2018 PaddlePaddle Authors. Any Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/operators/reduce_ops/reduce_any_op.h" + +REGISTER_OP_CUDA_KERNEL(reduce_any, + ops::ReduceKernel); diff --git a/paddle/fluid/operators/reduce_ops/reduce_any_op.h b/paddle/fluid/operators/reduce_ops/reduce_any_op.h new file mode 100644 index 0000000000000000000000000000000000000000..b36bad9cada259932d2bd77c2426fbb46790de76 --- /dev/null +++ b/paddle/fluid/operators/reduce_ops/reduce_any_op.h @@ -0,0 +1,29 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "paddle/fluid/operators/reduce_ops/reduce_op.h" + +namespace paddle { +namespace operators { + +struct AnyFunctor { + template + void operator()(const DeviceContext& place, X* x, Y* y, const Dim& dim) { + y->device(place) = x->any(dim); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/paddle/fluid/operators/squared_l2_distance_op.h b/paddle/fluid/operators/squared_l2_distance_op.h index e0133d33e6a840d2d06832393a064df978cb9cbc..12a8f05b5a603417ead8ebd250ff7951f928f4a1 100644 --- a/paddle/fluid/operators/squared_l2_distance_op.h +++ b/paddle/fluid/operators/squared_l2_distance_op.h @@ -77,6 +77,9 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel { auto* x_g = context.Output(framework::GradVarName("X")); auto* y_g = context.Output(framework::GradVarName("Y")); + PADDLE_ENFORCE_NOT_NULL(x_g); + PADDLE_ENFORCE_NOT_NULL(y_g); + auto sub_result = EigenMatrix::From(*in0); auto out_grad = EigenMatrix::From(*in1); @@ -92,31 +95,28 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel { // propagate back to input auto& eigen_place = *context.template device_context().eigen_device(); - if (x_g) { - x_g->mutable_data(context.GetPlace()); - // eigen matrix - auto x_grad = - EigenMatrix::From(*x_g, framework::make_ddim({x_dims[0], cols})); - // dimensions are same with subResult - x_grad.device(eigen_place) = grad_mat; - } - if (y_g) { - y_g->mutable_data(context.GetPlace()); - - PADDLE_ENFORCE_GE(sub_result.dimensions()[0], y_dims[0], - "First dimension of gradient must be greater or " - "equal than first dimension of target."); - - if (sub_result.dimensions()[0] == y_dims[0]) { - auto y_grad = - EigenMatrix::From(*y_g, framework::make_ddim({y_dims[0], cols})); - y_grad.device(eigen_place) = -1 * grad_mat; - } else { - auto col_sum_res = -1 * (grad_mat.sum(Eigen::array({{0}}))); - auto y_grad = EigenVector::Flatten(*y_g); - y_grad.device(eigen_place) = col_sum_res; - } + x_g->mutable_data(context.GetPlace()); + // eigen matrix + auto x_grad = + EigenMatrix::From(*x_g, framework::make_ddim({x_dims[0], cols})); + // dimensions are same with subResult + x_grad.device(eigen_place) = grad_mat; + + y_g->mutable_data(context.GetPlace()); + + PADDLE_ENFORCE_GE(sub_result.dimensions()[0], y_dims[0], + "First dimension of gradient must be greater or " + "equal than first dimension of target."); + + if (sub_result.dimensions()[0] == y_dims[0]) { + auto y_grad = + EigenMatrix::From(*y_g, framework::make_ddim({y_dims[0], cols})); + y_grad.device(eigen_place) = -1 * grad_mat; + } else { + auto col_sum_res = -1 * (grad_mat.sum(Eigen::array({{0}}))); + auto y_grad = EigenVector::Flatten(*y_g); + y_grad.device(eigen_place) = col_sum_res; } } }; diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index f0ea6d9b0a751c86e3911c35d9403a32604056d7..a8a2a94d473b18fdcd78771063ef4565c7fe0e42 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -1366,6 +1366,10 @@ All parameter, weight, gradient are variables in Paddle. "cache_runtime_context", [](const BuildStrategy &self) { return self.cache_runtime_context_; }, [](BuildStrategy &self, bool b) { self.cache_runtime_context_ = b; }) + .def_property( + "cache_expected_kernel", + [](const BuildStrategy &self) { return self.cache_expected_kernel_; }, + [](BuildStrategy &self, bool b) { self.cache_expected_kernel_ = b; }) .def("_finalize_strategy_and_create_passes", [](BuildStrategy &self) -> std::shared_ptr { return self.CreatePassesFromStrategy(true); diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 43db744b5edeb48f87c4413961e6ded3bd45f86d..7bb713493182239b2fd17f7b7fb496afdc9b8e6c 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -202,6 +202,7 @@ function cmake_gen() { -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DWITH_CONTRIB=${WITH_CONTRIB:-ON} -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} + -DWITH_HIGH_LEVEL_API_TEST=${WITH_HIGH_LEVEL_API_TEST:-OFF} -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} -DANAKIN_BUILD_FAT_BIN=${ANAKIN_BUILD_FAT_BIN:OFF} @@ -234,6 +235,7 @@ EOF -DCMAKE_EXPORT_COMPILE_COMMANDS=ON \ -DWITH_CONTRIB=${WITH_CONTRIB:-ON} \ -DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} \ + -DWITH_HIGH_LEVEL_API_TEST=${WITH_HIGH_LEVEL_API_TEST:-OFF} \ -DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} \ -DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \ -DANAKIN_BUILD_FAT_BIN=${ANAKIN_BUILD_FAT_BIN:OFF}\ diff --git a/python/paddle/fluid/dataset.py b/python/paddle/fluid/dataset.py index e655fd4a976a8a6fa2811ddc43de3d1f231229d5..1a023f61675ed62c141bb6e71fabbdf0086b0c64 100644 --- a/python/paddle/fluid/dataset.py +++ b/python/paddle/fluid/dataset.py @@ -136,6 +136,7 @@ class DatasetBase(object): slot_var.name = var.name if var.lod_level == 0: slot_var.is_dense = True + slot_var.shape.extend(var.shape) if var.dtype == core.VarDesc.VarType.FP32: slot_var.type = "float" elif var.dtype == core.VarDesc.VarType.INT64: diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index e15197037e1d901855883919b02a1574b7bc9a29..fa8b49a021294e8555e979459615b1956d9b2b55 100644 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -712,10 +712,6 @@ class Executor(object): if dataset == None: raise RuntimeError("dataset is needed and should be initialized") - if not isinstance(self.place, core.CPUPlace): - raise RuntimeError("infer_from_dataset is verified on CPUPlace" - "We will open CUDAPlace in the future") - scope, trainer = self._prepare_trainer( program=program, dataset=dataset, @@ -796,10 +792,6 @@ class Executor(object): if dataset == None: raise RuntimeError("dataset is need and should be initialized") - if not isinstance(self.place, core.CPUPlace): - raise RuntimeError("train_from_dataset is verified on CPUPlace" - "We will open CUDAPlace in the future") - scope, trainer = self._prepare_trainer( program=program, dataset=dataset, diff --git a/python/paddle/fluid/layers/learning_rate_scheduler.py b/python/paddle/fluid/layers/learning_rate_scheduler.py index b7d1eeba80d93d549a019455087bb7cc1d2a1083..a67c8058f2c42713738420e81316452e15acb697 100644 --- a/python/paddle/fluid/layers/learning_rate_scheduler.py +++ b/python/paddle/fluid/layers/learning_rate_scheduler.py @@ -35,8 +35,8 @@ from ..dygraph import learning_rate_scheduler as imperate_lr __all__ = [ 'exponential_decay', 'natural_exp_decay', 'inverse_time_decay', - 'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS', - 'cosine_decay', 'linear_lr_warmup' + 'polynomial_decay', 'piecewise_decay', 'noam_decay', 'cosine_decay', + 'linear_lr_warmup' ] @@ -349,24 +349,26 @@ def cosine_decay(learning_rate, step_each_epoch, epochs): training progresses. By using this function, the learning rate will be decayed by following cosine decay strategy. - decayed_lr = learning_rate * 0.5 * (math.cos(epoch * math.pi / epochs) + 1) + .. math:: + + decayed\_lr = learning\_rate * 0.5 * (math.cos * (epoch * \\frac{math.pi}{epochs} ) + 1) Args: learning_rate(Variable|float): The initial learning rate. step_each_epoch(int): the number of steps in an epoch. epochs(int): the number of epochs. - Returns: - Variable: The decayed learning rate. - - Examples: + Returns: + Variable: The decayed learning rate. - ..code-block:: python + Examples: + .. code-block:: python - base_lr = 0.1 - lr = fluid.layers.cosine_decay( - learning_rate = base_lr, step_each_epoch=10000, epochs=120) + base_lr = 0.1 + lr = fluid.layers.cosine_decay( + learning_rate = base_lr, step_each_epoch=10000, epochs=120) """ + with default_main_program()._lr_schedule_guard(): if imperative_base.enabled(): decay = imperate_lr.CosineDecay(learning_rate, step_each_epoch, @@ -381,50 +383,6 @@ def cosine_decay(learning_rate, step_each_epoch, epochs): return decayed_lr -def append_LARS(params_grads, learning_rate, weight_decay): - """ - Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for - each layer. - - Args: - learning_rate: A learning rate Variable. This - is the global learning rate for LARS. - weight_decay: A Python `float` number. - - Returns: - The decayed learning rate - Examples: - .. code-block:: python - - learning_rate *= local_gw_ratio * sqrt(sumsq(param)) - / (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param))) - """ - - assert not imperative_base.enabled( - ), "append_LARS is NOT supported in dygraph mode now" - - def _balanced_weight(param_norm, grad_norm): - if weight_decay == 1.0: - return grad_norm + param_norm - else: - return grad_norm + weight_decay * param_norm - - for param, grad in params_grads: - with param.block.program.optimized_guard( - [param, grad]), name_scope("optimizer"): - param_lr = param.optimize_attr['learning_rate'] - param_norm = ops.sqrt(nn.reduce_sum(input=ops.square(param))) - grad_norm = ops.sqrt(nn.reduce_sum(input=ops.square(grad))) - if type(param_lr) == float and param_lr == 1.0: - decayed_lr = learning_rate * param_norm \ - / _balanced_weight(param_norm, grad_norm) - else: - decayed_lr = learning_rate * param_lr * param_norm \ - / _balanced_weight(param_norm, grad_norm) - # set back param local learning rate - param.optimize_attr['learning_rate'] = decayed_lr - - def linear_lr_warmup(learning_rate, warmup_steps, start_lr, end_lr): """ Applies linear learning rate warmup before the normal learning rate diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index e70ea9da38f7640fa09a39e7f8cbfeb91ddc9832..93e46eef16fb177169db679a8437d9a33ed38e99 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -73,6 +73,8 @@ __all__ = [ 'reduce_max', 'reduce_min', 'reduce_prod', + 'reduce_all', + 'reduce_any', 'sequence_first_step', 'sequence_last_step', 'sequence_slice', @@ -159,6 +161,7 @@ __all__ = [ 'sum', 'slice', 'shape', + 'rank', 'logical_and', 'logical_or', 'logical_xor', @@ -4738,6 +4741,106 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None): return out +def reduce_all(input, dim=None, keep_dim=False, name=None): + """ + Computes the ``logical and`` of tensor elements over the given dimension. + + Args: + input (Variable): The input variable which is a Tensor or LoDTensor. + dim (list|int|None): The dimension along which the logical and is computed. + If :attr:`None`, compute the logical and over all elements of + :attr:`input` and return a Tensor variable with a single element, + otherwise must be in the range :math:`[-rank(input), rank(input))`. + If :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`. + keep_dim (bool): Whether to reserve the reduced dimension in the + output Tensor. The result tensor will have one fewer dimension + than the :attr:`input` unless :attr:`keep_dim` is true. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Variable: The reduced Tensor variable. + + Examples: + .. code-block:: python + + # x is a bool Tensor variable with following elements: + # [[True, False] + # [True, True]] + # Each example is followed by the correspending output tensor. + fluid.layers.reduce_all(x) # False + fluid.layers.reduce_all(x, dim=0) # [True, False] + fluid.layers.reduce_all(x, dim=-1) # [False, True] + fluid.layers.reduce_all(x, dim=1, + keep_dim=True) # [[False], [True]] + + """ + helper = LayerHelper('reduce_all', **locals()) + out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) + if dim is not None and not isinstance(dim, list): + dim = [dim] + helper.append_op( + type='reduce_all', + inputs={'X': input}, + outputs={'Out': out}, + attrs={ + 'dim': dim if dim != None else [0], + 'keep_dim': keep_dim, + 'reduce_all': True if dim == None else False + }) + return out + + +def reduce_any(input, dim=None, keep_dim=False, name=None): + """ + Computes the ``logical or`` of tensor elements over the given dimension. + + Args: + input (Variable): The input variable which is a Tensor or LoDTensor. + dim (list|int|None): The dimension along which the logical or is computed. + If :attr:`None`, compute the logical or over all elements of + :attr:`input` and return a Tensor variable with a single element, + otherwise must be in the range :math:`[-rank(input), rank(input))`. + If :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`. + keep_dim (bool): Whether to reserve the reduced dimension in the + output Tensor. The result tensor will have one fewer dimension + than the :attr:`input` unless :attr:`keep_dim` is true. + name(str|None): A name for this layer(optional). If set None, the layer + will be named automatically. + + Returns: + Variable: The reduced Tensor variable. + + Examples: + .. code-block:: python + + # x is a bool Tensor variable with following elements: + # [[True, False] + # [False, False]] + # Each example is followed by the correspending output tensor. + fluid.layers.reduce_any(x) # True + fluid.layers.reduce_any(x, dim=0) # [True, False] + fluid.layers.reduce_any(x, dim=-1) # [True, False] + fluid.layers.reduce_any(x, dim=1, + keep_dim=True) # [[True], [False]] + + """ + helper = LayerHelper('reduce_any', **locals()) + out = helper.create_variable_for_type_inference(dtype=helper.input_dtype()) + if dim is not None and not isinstance(dim, list): + dim = [dim] + helper.append_op( + type='reduce_any', + inputs={'X': input}, + outputs={'Out': out}, + attrs={ + 'dim': dim if dim != None else [0], + 'keep_dim': keep_dim, + 'reduce_all': True if dim == None else False + }) + return out + + def split(input, num_or_sections, dim=-1, name=None): """ Split the input tensor into multiple sub-tensors. @@ -4819,7 +4922,7 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None): the dimension to normalization is rank(X) + axis. -1 is the last dimension. epsilon(float): The epsilon value is used to avoid division by zero, \ - the defalut value is 1e-10. + the defalut value is 1e-12. name(str|None): A name for this layer(optional). If set None, the layer \ will be named automatically. @@ -9237,6 +9340,32 @@ def shape(input): return out +def rank(input): + """ + **Rank Layer** + + Returns the number of dimensions for a tensor, which is a 0-D int32 Tensor. + + Args: + input (Variable): The input variable. + + Returns: + Variable: The rank of the input variable. + + Examples: + .. code-block:: python + + input = layers.data( + name="input", shape=[3, 100, 100], dtype="float32") + rank = layers.rank(input) # 4 + """ + + ndims = len(input.shape) + out = assign(np.array(ndims, 'int32')) + + return out + + def _elementwise_op(helper): op_type = helper.layer_type x = helper.kwargs.get('x', None) @@ -11002,7 +11131,7 @@ def pixel_shuffle(x, upscale_factor): Returns: - Out(Variable): the pixel shuffle result is a tensor variable with the same shape and the same type as the input. + Out(Variable): Reshaped tensor according to the new dimension. Raises: diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 80450119f44e93aae4b483983484ea18be5b2035..03ebd41fa00c69bfce66d325e32fc9aeb25a2486 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -24,26 +24,11 @@ from .layer_function_generator import templatedoc import numpy __all__ = [ - 'create_tensor', - 'create_parameter', - 'create_global_var', - 'cast', - 'tensor_array_to_tensor', - 'concat', - 'sums', - 'assign', - 'fill_constant_batch_size_like', - 'fill_constant', - 'argmin', - 'argmax', - 'argsort', - 'ones', - 'zeros', - 'reverse', - 'has_inf', - 'has_nan', - 'isfinite', - 'range', + 'create_tensor', 'create_parameter', 'create_global_var', 'cast', + 'tensor_array_to_tensor', 'concat', 'sums', 'assign', + 'fill_constant_batch_size_like', 'fill_constant', 'argmin', 'argmax', + 'argsort', 'ones', 'zeros', 'reverse', 'has_inf', 'has_nan', 'isfinite', + 'range', 'linspace' ] @@ -826,3 +811,45 @@ def range(start, end, step, dtype): 'Step': step}, outputs={'Out': [out]}) return out + + +def linspace(start, stop, num, dtype): + """ + Return fixed number of evenly spaced values within a given interval. + + First entry is start, and last entry is stop. In the case when Num is 1, only Start is returned. Like linspace function of numpy. + + Args: + start(float|Variable): First entry in the sequence. It is a float scalar, or a tensor of shape [1] with type 'float32'|'float64'. + stop(float|Variable): Last entry in the sequence. It is a float scalar, or a tensor of shape [1] with type 'float32'|'float64'. + num(int|Variable): Number of entry in the sequence. It is an int scalar, or a tensor of shape [1] with type int32. + dtype(string): 'float32'|'float64', the data type of the output tensor. + + Returns: + Variable: The tensor variable storing a 1-D tensor. + + Examples: + .. code-block:: python + + data = fluid.layers.linspace(0, 10, 5, 'float32') # [0.0, 2.5, 5.0, 7.5, 10.0] + data = fluid.layers.linspace(0, 10, 1, 'float32') # [0.0] + + """ + helper = LayerHelper("linspace", **locals()) + + if not isinstance(start, Variable): + start = fill_constant([1], dtype, start) + if not isinstance(stop, Variable): + stop = fill_constant([1], dtype, stop) + if not isinstance(num, Variable): + num = fill_constant([1], 'int32', num) + + out = helper.create_variable_for_type_inference(dtype=start.dtype) + + helper.append_op( + type='linspace', + inputs={'Start': start, + 'Stop': stop, + 'Num': num}, + outputs={'Out': [out]}) + return out diff --git a/python/paddle/fluid/tests/book/CMakeLists.txt b/python/paddle/fluid/tests/book/CMakeLists.txt index ee734f3c782adb5196a03aca5718377009a5b4e7..999a765b6dc32323a24f9069f11134360dbadcb8 100644 --- a/python/paddle/fluid/tests/book/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/CMakeLists.txt @@ -6,4 +6,6 @@ foreach(src ${TEST_OPS}) py_test(${src} SRCS ${src}.py) endforeach() -add_subdirectory(high-level-api) +if(WITH_HIGH_LEVEL_API_TEST) + add_subdirectory(high-level-api) +endif() diff --git a/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt index efa5ee2d06af3d31e7d84122dd7eea37d6dcf3a3..c034709fbdc2aa315ca995a42c278b261e6283a4 100644 --- a/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt +++ b/python/paddle/fluid/tests/book/high-level-api/CMakeLists.txt @@ -1,16 +1,28 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") +file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*_new_api.py") string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") -# default test -foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) -endforeach() +# This test is buggy +# py_test(test_understand_sentiment_dynamic_rnn SRCS +# test_understand_sentiment_dynamic_rnn.py SERIAL) +LIST(REMOVE_ITEM TEST_OPS test_understand_sentiment_dynamic_rnn_new_api) -add_subdirectory(fit_a_line) -add_subdirectory(recognize_digits) -add_subdirectory(image_classification) -add_subdirectory(understand_sentiment) -add_subdirectory(label_semantic_roles) -add_subdirectory(word2vec) -add_subdirectory(recommender_system) -add_subdirectory(machine_translation) +if(NOT APPLE) + # default test + foreach(src ${TEST_OPS}) + py_test(${src} SRCS ${src}.py) + endforeach() +else() + foreach(src ${TEST_OPS}) + if(${src} STREQUAL "test_image_classification_vgg_new_api") + message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src}) + elseif(${src} STREQUAL "test_image_classification_resnet_new_api") + message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src}) + elseif(${src} STREQUAL "test_recognize_digits_conv_new_api") + message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src}) + elseif(${src} STREQUAL "test_recognize_digits_mlp_new_api") + message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src}) + elseif() + py_test(${src} SRCS ${src}.py) + endif() + endforeach() +endif() diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/cifar10_small_test_set.py b/python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py similarity index 100% rename from python/paddle/fluid/tests/book/high-level-api/image_classification/cifar10_small_test_set.py rename to python/paddle/fluid/tests/book/high-level-api/cifar10_small_test_set.py diff --git a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/fit_a_line/CMakeLists.txt deleted file mode 100644 index 673c965b662a022739f8d489c331f4de9455a926..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -# default test -foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) -endforeach() diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/image_classification/CMakeLists.txt deleted file mode 100644 index 91c1d17eb5391ea37a41a886594cc71c6e6c56bd..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/book/high-level-api/image_classification/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -if(NOT APPLE) - # default test - foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) - endforeach() -else() - foreach(src ${TEST_OPS}) - if(${src} STREQUAL "test_image_classification_vgg") - message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src}) - elseif(${src} STREQUAL "test_image_classification_resnet") - message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src}) - elseif() - py_test(${src} SRCS ${src}.py) - endif() - endforeach() -endif() diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt deleted file mode 100644 index 673c965b662a022739f8d489c331f4de9455a926..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -# default test -foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) -endforeach() diff --git a/python/paddle/fluid/tests/book/high-level-api/machine_translation/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/machine_translation/CMakeLists.txt deleted file mode 100644 index 673c965b662a022739f8d489c331f4de9455a926..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/book/high-level-api/machine_translation/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -# default test -foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) -endforeach() diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/CMakeLists.txt deleted file mode 100644 index f9c6d60540fcb6f8a73fdc4e68471448e16cbdc2..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/CMakeLists.txt +++ /dev/null @@ -1,19 +0,0 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -# default test -if(NOT APPLE) - foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) - endforeach() -else() - foreach(src ${TEST_OPS}) - if(${src} STREQUAL "test_recognize_digits_conv") - message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src}) - elseif(${src} STREQUAL "test_recognize_digits_mlp") - message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src}) - else() - py_test(${src} SRCS ${src}.py) - endif() - endforeach() -endif() diff --git a/python/paddle/fluid/tests/book/high-level-api/recommender_system/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/recommender_system/CMakeLists.txt deleted file mode 100644 index 673c965b662a022739f8d489c331f4de9455a926..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/book/high-level-api/recommender_system/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -# default test -foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) -endforeach() diff --git a/python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py b/python/paddle/fluid/tests/book/high-level-api/test_fit_a_line_new_api.py similarity index 100% rename from python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py rename to python/paddle/fluid/tests/book/high-level-api/test_fit_a_line_new_api.py diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py b/python/paddle/fluid/tests/book/high-level-api/test_image_classification_resnet_new_api.py similarity index 100% rename from python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py rename to python/paddle/fluid/tests/book/high-level-api/test_image_classification_resnet_new_api.py diff --git a/python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py b/python/paddle/fluid/tests/book/high-level-api/test_image_classification_vgg_new_api.py similarity index 100% rename from python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_vgg.py rename to python/paddle/fluid/tests/book/high-level-api/test_image_classification_vgg_new_api.py diff --git a/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py b/python/paddle/fluid/tests/book/high-level-api/test_label_semantic_roles_new_api.py similarity index 100% rename from python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/test_label_semantic_roles_newapi.py rename to python/paddle/fluid/tests/book/high-level-api/test_label_semantic_roles_new_api.py diff --git a/python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py b/python/paddle/fluid/tests/book/high-level-api/test_machine_translation_new_api.py similarity index 100% rename from python/paddle/fluid/tests/book/high-level-api/machine_translation/test_machine_translation.py rename to python/paddle/fluid/tests/book/high-level-api/test_machine_translation_new_api.py diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py b/python/paddle/fluid/tests/book/high-level-api/test_recognize_digits_conv_new_api.py similarity index 100% rename from python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_conv.py rename to python/paddle/fluid/tests/book/high-level-api/test_recognize_digits_conv_new_api.py diff --git a/python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py b/python/paddle/fluid/tests/book/high-level-api/test_recognize_digits_mlp_new_api.py similarity index 100% rename from python/paddle/fluid/tests/book/high-level-api/recognize_digits/test_recognize_digits_mlp.py rename to python/paddle/fluid/tests/book/high-level-api/test_recognize_digits_mlp_new_api.py diff --git a/python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py b/python/paddle/fluid/tests/book/high-level-api/test_recommender_system_new_api.py similarity index 100% rename from python/paddle/fluid/tests/book/high-level-api/recommender_system/test_recommender_system_newapi.py rename to python/paddle/fluid/tests/book/high-level-api/test_recommender_system_new_api.py diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py b/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_conv_new_api.py similarity index 100% rename from python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_conv.py rename to python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_conv_new_api.py diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py b/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_dynamic_rnn_new_api.py similarity index 100% rename from python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_dynamic_rnn.py rename to python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_dynamic_rnn_new_api.py diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py b/python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_stacked_lstm_new_api.py similarity index 100% rename from python/paddle/fluid/tests/book/high-level-api/understand_sentiment/test_understand_sentiment_stacked_lstm.py rename to python/paddle/fluid/tests/book/high-level-api/test_understand_sentiment_stacked_lstm_new_api.py diff --git a/python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py b/python/paddle/fluid/tests/book/high-level-api/test_word2vec_new_api.py similarity index 100% rename from python/paddle/fluid/tests/book/high-level-api/word2vec/test_word2vec_new_api.py rename to python/paddle/fluid/tests/book/high-level-api/test_word2vec_new_api.py diff --git a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt deleted file mode 100644 index d71147a85e77ea6dc5b6391aa169abd9b02a0aa1..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -# This test is buggy -# py_test(test_understand_sentiment_dynamic_rnn SRCS -# test_understand_sentiment_dynamic_rnn.py SERIAL) -LIST(REMOVE_ITEM TEST_OPS test_understand_sentiment_dynamic_rnn) - -# default test -foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) -endforeach() diff --git a/python/paddle/fluid/tests/book/high-level-api/word2vec/CMakeLists.txt b/python/paddle/fluid/tests/book/high-level-api/word2vec/CMakeLists.txt deleted file mode 100644 index 673c965b662a022739f8d489c331f4de9455a926..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/book/high-level-api/word2vec/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py") -string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}") - -# default test -foreach(src ${TEST_OPS}) - py_test(${src} SRCS ${src}.py) -endforeach() diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index cbe9afce035ea9918a41fafe3c2d4a3eb3f4dcb0..43ce20f2578bbf62a18ae694f6b121b64f33fbac 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -74,7 +74,6 @@ list(REMOVE_ITEM TEST_OPS test_dgc_op) list(REMOVE_ITEM TEST_OPS test_dist_se_resnext_nccl) list(REMOVE_ITEM TEST_OPS test_dist_transformer) list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer) -list(REMOVE_ITEM TEST_OPS test_image_classification_resnet) list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op) list(REMOVE_ITEM TEST_OPS test_nearest_interp_op) list(REMOVE_ITEM TEST_OPS test_imperative_resnet) @@ -125,10 +124,6 @@ if(NOT WIN32) py_test_modules(test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL) endif() -if(NOT APPLE) - py_test_modules(test_image_classification_resnet MODULES test_image_classification_resnet SERIAL) -endif() - if(CMAKE_BUILD_TYPE STREQUAL "Debug") # change the timeout from 600 to 2200, because in debug mode, this test need more time. set_tests_properties(test_parallel_executor_seresnext PROPERTIES TIMEOUT 2200) diff --git a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py index 18ed02a72275437fa6106e57c0383e17647d9700..723aafb171271ed248c93665a21089029a30a836 100644 --- a/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py +++ b/python/paddle/fluid/tests/unittests/parallel_executor_test_base.py @@ -29,7 +29,8 @@ __all__ = ['TestParallelExecutorBase'] class TestParallelExecutorBase(unittest.TestCase): - def check_network_convergence(self, + @classmethod + def check_network_convergence(cls, method, use_cuda=True, memory_opt=True, diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 38d0533a7ec820241c6a08f2180a7426984068f2..6630fb26aff9a8c570e65c34a753595da883bea1 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -1925,6 +1925,13 @@ class TestBook(LayerTest): out = layers.flatten(x, axis=1, name="flatten") return (out) + def test_linspace(self): + program = Program() + with program_guard(program): + out = layers.linspace(20, 10, 5, 'float64') + self.assertIsNotNone(out) + print(str(program)) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_linspace.py b/python/paddle/fluid/tests/unittests/test_linspace.py new file mode 100644 index 0000000000000000000000000000000000000000..eeecf178320327cc251f32bfe46c1622200339f4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_linspace.py @@ -0,0 +1,71 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +from op_test import OpTest + + +class TestLinspaceOpCommonCase(OpTest): + def setUp(self): + self.op_type = "linspace" + dtype = 'float32' + self.inputs = { + 'Start': np.array([0]).astype(dtype), + 'Stop': np.array([10]).astype(dtype), + 'Num': np.array([11]).astype('int32') + } + + self.outputs = {'Out': np.arange(0, 11).astype(dtype)} + + def test_check_output(self): + self.check_output() + + +class TestLinspaceOpReverseCase(OpTest): + def setUp(self): + self.op_type = "linspace" + dtype = 'float32' + self.inputs = { + 'Start': np.array([10]).astype(dtype), + 'Stop': np.array([0]).astype(dtype), + 'Num': np.array([11]).astype('int32') + } + + self.outputs = {'Out': np.arange(10, -1, -1).astype(dtype)} + + def test_check_output(self): + self.check_output() + + +class TestLinspaceOpNumOneCase(OpTest): + def setUp(self): + self.op_type = "linspace" + dtype = 'float32' + self.inputs = { + 'Start': np.array([10]).astype(dtype), + 'Stop': np.array([0]).astype(dtype), + 'Num': np.array([1]).astype('int32') + } + + self.outputs = {'Out': np.array(10, dtype=dtype)} + + def test_check_output(self): + self.check_output() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py index 12d854fb54ac30ff2eeed97c16a78198d92387fd..92a5c58c11773e97ca0bb5ff2c21cbc8df612d58 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_seresnext.py @@ -29,7 +29,7 @@ import unittest import math import numpy as np from functools import partial - +os.environ['CPU_NUM'] = str(4) # FIXME(zcd): If the neural net has dropout_op, the output of ParallelExecutor # and Executor is different. Because, for ParallelExecutor, the dropout_op of # the neural net will be copied N copies(N is the number of device). This will @@ -113,7 +113,6 @@ def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio): return fluid.layers.elementwise_add(x=short, y=scale, act='relu') -batch_size = 12 img_shape = [3, 224, 224] @@ -181,43 +180,84 @@ def optimizer(learning_rate=0.01): return optimizer +def _batch_size(): + return 12 + + +def _iter(use_cuda): + if use_cuda: + return 10 + return 2 + + +gpu_img, gpu_label = init_data( + batch_size=_batch_size(), img_shape=img_shape, label_range=999) +cpu_img, cpu_label = init_data( + batch_size=_batch_size(), img_shape=img_shape, label_range=999) +feed_dict_gpu = {"image": gpu_img, "label": gpu_label} +feed_dict_cpu = {"image": cpu_img, "label": cpu_label} +model = SE_ResNeXt50Small + + +def _feed_dict(use_cuda): + if use_cuda: + return feed_dict_gpu + return feed_dict_cpu + + +def _get_result_of_origin_model(use_cuda): + global remove_bn + global remove_dropout + remove_bn = True + remove_dropout = True + first_loss, last_loss = TestParallelExecutorBase.check_network_convergence( + model, + feed_dict=_feed_dict(use_cuda), + iter=_iter(use_cuda), + batch_size=_batch_size(), + use_cuda=use_cuda, + use_reduce=False, + optimizer=optimizer) + + return first_loss, last_loss + + +origin_cpu_first_loss, origin_cpu_last_loss = _get_result_of_origin_model(False) +if core.is_compiled_with_cuda(): + origin_gpu_first_loss, origin_gpu_last_loss = _get_result_of_origin_model( + True) + + +def _get_origin_result(use_cuda): + if use_cuda: + assert core.is_compiled_with_cuda(), "Doesn't compiled with CUDA." + return origin_gpu_first_loss, origin_gpu_last_loss + return origin_cpu_first_loss, origin_cpu_last_loss + + class TestResnet(TestParallelExecutorBase): - @classmethod - def setUpClass(cls): - os.environ['CPU_NUM'] = str(4) - global remove_dropout - global remove_bn - remove_dropout = False - remove_bn = False - - def _compare_reduce_and_allreduce(self, - model, - use_cuda, - iter=20, - delta2=1e-5): + def _compare_reduce_and_allreduce(self, use_cuda, delta2=1e-5): if use_cuda and not core.is_compiled_with_cuda(): return global remove_bn + global remove_dropout remove_bn = True + remove_dropout = True - img, label = init_data( - batch_size=batch_size, img_shape=img_shape, label_range=999) all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, - iter=iter, - batch_size=batch_size, + feed_dict=_feed_dict(use_cuda), + iter=_iter(use_cuda), + batch_size=_batch_size(), use_cuda=use_cuda, use_reduce=False, optimizer=optimizer) reduce_first_loss, reduce_last_loss = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, - iter=iter, - batch_size=batch_size, + feed_dict=_feed_dict(use_cuda), + iter=_iter(use_cuda), + batch_size=_batch_size(), use_cuda=use_cuda, use_reduce=True, optimizer=optimizer) @@ -232,10 +272,9 @@ class TestResnet(TestParallelExecutorBase): all_reduce_first_loss_seq, all_reduce_last_loss_seq = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, - iter=iter, - batch_size=batch_size, + feed_dict=_feed_dict(use_cuda), + iter=_iter(use_cuda), + batch_size=_batch_size(), use_cuda=use_cuda, use_reduce=False, optimizer=optimizer, @@ -243,10 +282,9 @@ class TestResnet(TestParallelExecutorBase): reduce_first_loss_seq, reduce_last_loss_seq = self.check_network_convergence( model, - feed_dict={"image": img, - "label": label}, - iter=iter, - batch_size=batch_size, + feed_dict=_feed_dict(use_cuda), + iter=_iter(use_cuda), + batch_size=_batch_size(), use_cuda=use_cuda, use_reduce=True, optimizer=optimizer, @@ -267,37 +305,28 @@ class TestResnet(TestParallelExecutorBase): for loss in zip(all_reduce_last_loss_seq, reduce_last_loss_seq): self.assertAlmostEquals(loss[0], loss[1], delta=delta2) - def _check_resnet_convergence(self, - model, - check_func_1, - check_func_2, - use_cuda, - iter=20, - delta2=1e-5, - compare_seperately=True): + def _compare_result_with_origin_model(self, + get_origin_result, + check_func_2, + use_cuda, + delta2=1e-5, + compare_seperately=True, + rm_drop_out=False, + rm_bn=False): if use_cuda and not core.is_compiled_with_cuda(): return - global remove_dropout global remove_bn - remove_dropout = True - remove_bn = True + global remove_dropout + remove_bn = rm_bn or use_cuda + remove_dropout = rm_drop_out - img, label = init_data( - batch_size=batch_size, img_shape=img_shape, label_range=999) - func_1_first_loss, func_1_last_loss = check_func_1( - model, - feed_dict={"image": img, - "label": label}, - iter=iter, - batch_size=batch_size, - use_cuda=use_cuda) + func_1_first_loss, func_1_last_loss = get_origin_result(use_cuda) func_2_first_loss, func_2_last_loss = check_func_2( model, - feed_dict={"image": img, - "label": label}, - iter=iter, - batch_size=batch_size, + feed_dict=_feed_dict(use_cuda), + iter=_iter(use_cuda), + batch_size=_batch_size(), use_cuda=use_cuda) if compare_seperately: @@ -311,97 +340,55 @@ class TestResnet(TestParallelExecutorBase): self.assertAlmostEquals( np.mean(func_1_last_loss), func_2_last_loss[0], delta=delta2) - def _compare_with_fused_all_reduce(self, - model, - use_cuda, - iter=20, - delta2=1e-5): - if use_cuda and not core.is_compiled_with_cuda(): - return - - global remove_bn - remove_bn = True - - img, label = init_data( - batch_size=batch_size, img_shape=img_shape, label_range=999) - all_reduce_first_loss, all_reduce_last_loss = self.check_network_convergence( - model, - feed_dict={"image": img, - "label": label}, - iter=iter, - batch_size=batch_size, - use_cuda=use_cuda, - fuse_all_reduce_ops=False, - optimizer=optimizer) - reduce_first_loss, reduce_last_loss = self.check_network_convergence( - model, - feed_dict={"image": img, - "label": label}, - iter=iter, - batch_size=batch_size, - use_cuda=use_cuda, - fuse_all_reduce_ops=True, - optimizer=optimizer) - - for loss in zip(all_reduce_first_loss, reduce_first_loss): - self.assertAlmostEquals(loss[0], loss[1], delta=1e-5) - for loss in zip(all_reduce_last_loss, reduce_last_loss): - self.assertAlmostEquals(loss[0], loss[1], delta=delta2) - def test_seresnext_with_reduce(self): - self._compare_reduce_and_allreduce( - model=SE_ResNeXt50Small, use_cuda=True, delta2=1e-2) - self._compare_reduce_and_allreduce( - model=SE_ResNeXt50Small, use_cuda=False, iter=5) - - def test_seresnext_with_fused_all_reduce(self): - self._compare_with_fused_all_reduce( - model=SE_ResNeXt50Small, use_cuda=True, delta2=1e-3) - self._compare_with_fused_all_reduce( - model=SE_ResNeXt50Small, use_cuda=False, iter=2, delta2=1e-3) + self._compare_reduce_and_allreduce(use_cuda=False, delta2=1e-3) + self._compare_reduce_and_allreduce(use_cuda=True, delta2=1e-2) def test_seresnext_with_learning_rate_decay(self): - check_func_1 = partial( - self.check_network_convergence, - optimizer=optimizer, - use_parallel_executor=True) + # NOTE(zcd): This test is compare the result of use parallel_executor and executor, + # and the result of drop_out op and batch_norm op in this two executor + # have diff, so the two ops should be removed from the model. + check_func_1 = _get_origin_result check_func_2 = partial( self.check_network_convergence, optimizer=optimizer, use_parallel_executor=False) - self._check_resnet_convergence( - SE_ResNeXt50Small, - check_func_1, - check_func_2, - use_cuda=True, - compare_seperately=False) - self._check_resnet_convergence( - SE_ResNeXt50Small, + self._compare_result_with_origin_model( check_func_1, check_func_2, use_cuda=False, + rm_drop_out=True, + rm_bn=True, compare_seperately=False, - iter=2, delta2=1e-3) + self._compare_result_with_origin_model( + check_func_1, + check_func_2, + use_cuda=True, + rm_drop_out=True, + rm_bn=True, + compare_seperately=False) - def test_seresnext_with_fused_optimizer_ops(self): - check_func_1 = partial( - self.check_network_convergence, fuse_all_optimizer_ops=False) + def test_seresnext_with_fused_all_reduce(self): + # NOTE(zcd): In order to make the program faster, + # this unit test remove drop_out and batch_norm. + check_func_1 = _get_origin_result check_func_2 = partial( - self.check_network_convergence, fuse_all_optimizer_ops=True) - # TODO(zcd): this test failed random, I will fix it in next PR. - # self._check_resnet_convergence( - # SE_ResNeXt50Small, - # check_func_1, - # check_func_2, - # use_cuda=True, - # delta2=1e-3) - self._check_resnet_convergence( - SE_ResNeXt50Small, + self.check_network_convergence, + optimizer=optimizer, + fuse_all_reduce_ops=True) + self._compare_result_with_origin_model( check_func_1, check_func_2, use_cuda=False, - iter=2, + rm_drop_out=True, + rm_bn=True) + self._compare_result_with_origin_model( + check_func_1, + check_func_2, + use_cuda=True, + rm_drop_out=True, + rm_bn=True, delta2=1e-3) diff --git a/python/paddle/fluid/tests/unittests/test_reduce_op.py b/python/paddle/fluid/tests/unittests/test_reduce_op.py index 8fc8125a773543eea768783155ad152c475535b5..65fc1453d8db13ad9c85746c3bf148f898e8f788 100644 --- a/python/paddle/fluid/tests/unittests/test_reduce_op.py +++ b/python/paddle/fluid/tests/unittests/test_reduce_op.py @@ -91,6 +91,78 @@ class TestProdOp(OpTest): self.check_grad(['X'], 'Out') +class TestAllOp(OpTest): + def setUp(self): + self.op_type = "reduce_all" + self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")} + self.outputs = {'Out': self.inputs['X'].all()} + self.attrs = {'reduce_all': True} + + def test_check_output(self): + self.check_output() + + +class TestAllOpWithDim(OpTest): + def setUp(self): + self.op_type = "reduce_all" + self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")} + self.attrs = {'dim': [1]} + self.outputs = {'Out': self.inputs['X'].all(axis=1)} + + def test_check_output(self): + self.check_output() + + +class TestAllOpWithKeepDim(OpTest): + def setUp(self): + self.op_type = "reduce_all" + self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")} + self.attrs = {'dim': [1], 'keep_dim': True} + self.outputs = { + 'Out': np.expand_dims( + self.inputs['X'].all(axis=1), axis=1) + } + + def test_check_output(self): + self.check_output() + + +class TestAnyOp(OpTest): + def setUp(self): + self.op_type = "reduce_any" + self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")} + self.outputs = {'Out': self.inputs['X'].any()} + self.attrs = {'reduce_all': True} + + def test_check_output(self): + self.check_output() + + +class TestAnyOpWithDim(OpTest): + def setUp(self): + self.op_type = "reduce_any" + self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")} + self.attrs = {'dim': [1]} + self.outputs = {'Out': self.inputs['X'].any(axis=1)} + + def test_check_output(self): + self.check_output() + + +class TestAnyOpWithKeepDim(OpTest): + def setUp(self): + self.op_type = "reduce_any" + self.inputs = {'X': np.random.randint(0, 2, (5, 6, 10)).astype("bool")} + self.attrs = {'dim': [1], 'keep_dim': True} + self.outputs = { + 'Out': np.expand_dims( + self.inputs['X'].any(axis=1), axis=1) + } + + def test_check_output(self): + self.check_output() + + class Test1DReduce(OpTest): def setUp(self): self.op_type = "reduce_sum"