提交 75994078 编写于 作者: H heqiaozhi

Merge remote-tracking branch 'upstream/develop' into dev

test=develop
......@@ -71,7 +71,8 @@ option(ANAKIN_BUILD_CROSS_PLANTFORM "Build anakin lib for any nvidia device plan
option(WITH_GRPC "Use grpc as the default rpc framework" ${WITH_DISTRIBUTE})
option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
option(ON_INFER "Turn on inference optimization." OFF)
option(WITH_INFERENCE_API_TEST "Test fluid inference high-level api interface" OFF)
option(WITH_INFERENCE_API_TEST "Test fluid inference C++ high-level api interface" OFF)
option(WITH_HIGH_LEVEL_API_TEST "Test fluid python high-level api interface" OFF)
option(WITH_SYSTEM_BLAS "Use system blas library" OFF)
option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
option(WITH_FAST_MATH "Make use of fast math library, might affect the precision to some extent" ON)
......
......@@ -221,6 +221,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib
-DBUILD_SHARED_LIBS=OFF
CMAKE_CACHE_ARGS
-DCMAKE_INSTALL_PREFIX:PATH=${PROTOBUF_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
......
......@@ -13,6 +13,7 @@ paddle.fluid.name_scope (ArgSpec(args=['prefix'], varargs=None, keywords=None, d
paddle.fluid.cuda_places (ArgSpec(args=['device_ids'], varargs=None, keywords=None, defaults=(None,)), ('document', '7d9a51fc9cf3c5245b5227080a8064c3'))
paddle.fluid.cpu_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', '4c0cd83f0b401fc2ff84c70974e5d210'))
paddle.fluid.cuda_pinned_places (ArgSpec(args=['device_count'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd0c3ebd813c39958c92b78e3eef7e912'))
paddle.fluid.in_dygraph_mode (ArgSpec(args=[], varargs=None, keywords=None, defaults=None), ('document', 'f06314a1cb30c96b5808dde2219c2dae'))
paddle.fluid.Executor.__init__ (ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.Executor.close (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', 'f5369953dd0c443961cf79f7a00e1a03'))
paddle.fluid.Executor.infer_from_dataset (ArgSpec(args=['self', 'program', 'dataset', 'scope', 'thread', 'debug', 'fetch_list', 'fetch_info', 'print_period'], varargs=None, keywords=None, defaults=(None, None, None, 0, False, None, None, 100)), ('document', '9c7decb955b9c4f718114179c8985581'))
......@@ -117,6 +118,8 @@ paddle.fluid.layers.reduce_mean (ArgSpec(args=['input', 'dim', 'keep_dim', 'name
paddle.fluid.layers.reduce_max (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', '66a622db727551761ce4eb73eaa7f6a4'))
paddle.fluid.layers.reduce_min (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'd50ac552b5d131468ed466d08bb2d38c'))
paddle.fluid.layers.reduce_prod (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'fcd8301a0ce15f219c7a4bcd0c1e8eca'))
paddle.fluid.layers.reduce_all (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', '646ca4d4a2cc16084f59de44b6927eca'))
paddle.fluid.layers.reduce_any (ArgSpec(args=['input', 'dim', 'keep_dim', 'name'], varargs=None, keywords=None, defaults=(None, False, None)), ('document', 'f36661060aeeaf6c6b1331e41b3726fa'))
paddle.fluid.layers.sequence_first_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '2b290d3d77882bfe9bb8d331cac8cdd3'))
paddle.fluid.layers.sequence_last_step (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'c16a892f44f7fe71bfa5afc32d3f34ce'))
paddle.fluid.layers.sequence_slice (ArgSpec(args=['input', 'offset', 'length', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'fdcea0e8b5bc7d8d4b1b072c521014e6'))
......@@ -124,7 +127,7 @@ paddle.fluid.layers.dropout (ArgSpec(args=['x', 'dropout_prob', 'is_test', 'seed
paddle.fluid.layers.split (ArgSpec(args=['input', 'num_or_sections', 'dim', 'name'], varargs=None, keywords=None, defaults=(-1, None)), ('document', '652625345c2acb900029c78cc75f8aa6'))
paddle.fluid.layers.ctc_greedy_decoder (ArgSpec(args=['input', 'blank', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'ebbf2adbd79683dc93db03454dfa18c2'))
paddle.fluid.layers.edit_distance (ArgSpec(args=['input', 'label', 'normalized', 'ignored_tokens'], varargs=None, keywords=None, defaults=(True, None)), ('document', '97f0262f97602644c83142789d784571'))
paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', '6e428384ce6a77207fa2c70d9f011990'))
paddle.fluid.layers.l2_normalize (ArgSpec(args=['x', 'axis', 'epsilon', 'name'], varargs=None, keywords=None, defaults=(1e-12, None)), ('document', '35c6a241bcc1a1fc89508860d82ad62b'))
paddle.fluid.layers.matmul (ArgSpec(args=['x', 'y', 'transpose_x', 'transpose_y', 'alpha', 'name'], varargs=None, keywords=None, defaults=(False, False, 1.0, None)), ('document', 'b4cbe1ac451005df6dad12e9ffdccca9'))
paddle.fluid.layers.topk (ArgSpec(args=['input', 'k', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', 'd3570c02f71bcd78e60b3f31dc8f5b32'))
paddle.fluid.layers.warpctc (ArgSpec(args=['input', 'label', 'blank', 'norm_by_times', 'use_cudnn'], varargs=None, keywords=None, defaults=(0, False, False)), ('document', 'aaba49c038ba927f0a8e45c0c9a686ab'))
......@@ -155,10 +158,10 @@ paddle.fluid.layers.label_smooth (ArgSpec(args=['label', 'prior_dist', 'epsilon'
paddle.fluid.layers.roi_pool (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale'], varargs=None, keywords=None, defaults=(1, 1, 1.0)), ('document', 'c317aa595deb31649083c8faa91cdb97'))
paddle.fluid.layers.roi_align (ArgSpec(args=['input', 'rois', 'pooled_height', 'pooled_width', 'spatial_scale', 'sampling_ratio', 'name'], varargs=None, keywords=None, defaults=(1, 1, 1.0, -1, None)), ('document', '12c5bbb8b38c42e623fbc47611d766e1'))
paddle.fluid.layers.dice_loss (ArgSpec(args=['input', 'label', 'epsilon'], varargs=None, keywords=None, defaults=(1e-05,)), ('document', '1ba0508d573f65feecf3564dce22aa1d'))
paddle.fluid.layers.image_resize (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1)), ('document', '7a1966d7c3a48f1fc0881cdaf5d83b0b'))
paddle.fluid.layers.image_resize (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'resample', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, 'BILINEAR', None, True, 1)), ('document', 'd1b08c11bb9277386fcf6ae70b6622d1'))
paddle.fluid.layers.image_resize_short (ArgSpec(args=['input', 'out_short_len', 'resample'], varargs=None, keywords=None, defaults=('BILINEAR',)), ('document', '06211aefc50c5a3e940d7204d859cdf7'))
paddle.fluid.layers.resize_bilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1)), ('document', 'e4fb4ed511b2293b8f04f7e872afbfd7'))
paddle.fluid.layers.resize_nearest (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True)), ('document', '735fa9758a6d7ff3b47d7b827f961c1d'))
paddle.fluid.layers.resize_bilinear (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners', 'align_mode'], varargs=None, keywords=None, defaults=(None, None, None, None, True, 1)), ('document', 'c45591fbc4f64a178fbca219e1546a58'))
paddle.fluid.layers.resize_nearest (ArgSpec(args=['input', 'out_shape', 'scale', 'name', 'actual_shape', 'align_corners'], varargs=None, keywords=None, defaults=(None, None, None, None, True)), ('document', 'ae6d73cdc7f3a138d8a338ecdb33c1ae'))
paddle.fluid.layers.gather (ArgSpec(args=['input', 'index'], varargs=None, keywords=None, defaults=None), ('document', '98f1c86716b9b7f4dda83f20e2adeee2'))
paddle.fluid.layers.scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '65f8e9d8ddfd0b412f940579c4faa342'))
paddle.fluid.layers.sequence_scatter (ArgSpec(args=['input', 'index', 'updates', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '15b522457dfef103f0c20ca9d397678b'))
......@@ -203,6 +206,7 @@ paddle.fluid.layers.gaussian_random_batch_size_like (ArgSpec(args=['input', 'sha
paddle.fluid.layers.sum (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', 'a418e3ccb5e2ac21bd60f5cc221d5860'))
paddle.fluid.layers.slice (ArgSpec(args=['input', 'axes', 'starts', 'ends'], varargs=None, keywords=None, defaults=None), ('document', '01dbb91e7c74cb11336cd531013de51a'))
paddle.fluid.layers.shape (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', '17db0f814eb7bb5a3fac1ca6e60e16d8'))
paddle.fluid.layers.rank (ArgSpec(args=['input'], varargs=None, keywords=None, defaults=None), ('document', 'ee1386c42ecc8f424fe3fb21862fefc2'))
paddle.fluid.layers.logical_and (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'cdcf20c494c92060d10feb9374532f42'))
paddle.fluid.layers.logical_or (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', '0eae3f726a4afe590757552fa3ced012'))
paddle.fluid.layers.logical_xor (ArgSpec(args=['x', 'y', 'out', 'name'], varargs=None, keywords=None, defaults=(None, None)), ('document', 'b0daaa3fa4a0aa62f9b58c43d959eb25'))
......@@ -235,7 +239,7 @@ paddle.fluid.layers.huber_loss (ArgSpec(args=['input', 'label', 'delta'], vararg
paddle.fluid.layers.kldiv_loss (ArgSpec(args=['x', 'target', 'reduction', 'name'], varargs=None, keywords=None, defaults=('mean', None)), ('document', '776d536cac47c89073abc7ee524d5aec'))
paddle.fluid.layers.tree_conv (ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)), ('document', '34ea12ac9f10a65dccbc50100d12e607'))
paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l2_reg'], varargs=None, keywords=None, defaults=(0.002,)), ('document', '46994d10276dd4cb803b4062b5d14329'))
paddle.fluid.layers.pixel_shuffle (ArgSpec(args=['x', 'upscale_factor'], varargs=None, keywords=None, defaults=None), ('document', 'ad669cdf83e72a69ebc5ed79e36486de'))
paddle.fluid.layers.pixel_shuffle (ArgSpec(args=['x', 'upscale_factor'], varargs=None, keywords=None, defaults=None), ('document', '731b21c62a4add60a33bd76d802ffc5c'))
paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', 'b76ccca3735bea4a58a0dbf0d77c5393'))
paddle.fluid.layers.continuous_value_model (ArgSpec(args=['input', 'cvm', 'use_cvm'], varargs=None, keywords=None, defaults=(True,)), ('document', '88046160ef4bbd28f18fa6484d95b75c'))
paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '33bbd42027d872b3818b3d64ec52e139'))
......@@ -272,6 +276,7 @@ paddle.fluid.layers.has_inf (ArgSpec(args=['x'], varargs=None, keywords=None, de
paddle.fluid.layers.has_nan (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '2e53e83127dbfd86e7098bdfe9a549e8'))
paddle.fluid.layers.isfinite (ArgSpec(args=['x'], varargs=None, keywords=None, defaults=None), ('document', '0a437011c3906079fd8947ed3e52d292'))
paddle.fluid.layers.range (ArgSpec(args=['start', 'end', 'step', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '2ec937ede953ded2fdff2675883900bb'))
paddle.fluid.layers.linspace (ArgSpec(args=['start', 'stop', 'num', 'dtype'], varargs=None, keywords=None, defaults=None), ('document', '495e21e9a848c2d075a102802fc67756'))
paddle.fluid.layers.While.__init__ (ArgSpec(args=['self', 'cond', 'is_test', 'name'], varargs=None, keywords=None, defaults=(False, None)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.While.block (ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.layers.Switch.__init__ (ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
......@@ -361,8 +366,7 @@ paddle.fluid.layers.inverse_time_decay (ArgSpec(args=['learning_rate', 'decay_st
paddle.fluid.layers.polynomial_decay (ArgSpec(args=['learning_rate', 'decay_steps', 'end_learning_rate', 'power', 'cycle'], varargs=None, keywords=None, defaults=(0.0001, 1.0, False)), ('document', '882634f420f626642f0874481263da40'))
paddle.fluid.layers.piecewise_decay (ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None), ('document', 'c717d9d1d78a53c809d01b8bc56f3cae'))
paddle.fluid.layers.noam_decay (ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None), ('document', 'd9a95746353fd574be36dc28d8726c28'))
paddle.fluid.layers.append_LARS (ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None), ('document', 'd24fa1e7d62ac8a534fc6a86002f84f8'))
paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', '9588c64c26ffaef3c466e404a6af9d9b'))
paddle.fluid.layers.cosine_decay (ArgSpec(args=['learning_rate', 'step_each_epoch', 'epochs'], varargs=None, keywords=None, defaults=None), ('document', 'f8b2727bccf0f368c997d7cf05847e49'))
paddle.fluid.layers.linear_lr_warmup (ArgSpec(args=['learning_rate', 'warmup_steps', 'start_lr', 'end_lr'], varargs=None, keywords=None, defaults=None), ('document', '2ef3f5ca5cd71ea4217c418e5a7a0565'))
paddle.fluid.contrib.InitState.__init__ (ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
paddle.fluid.contrib.StateCell.__init__ (ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)), ('document', '6adf97f83acf6453d4a6a4b1070f3754'))
......
......@@ -72,7 +72,6 @@ bool DataFeed::PickOneFile(std::string* filename) {
}
VLOG(3) << "file_idx_=" << *file_idx_;
*filename = filelist_[(*file_idx_)++];
// LOG(ERROR) << "pick file:" << *filename;
return true;
}
......@@ -466,6 +465,17 @@ void MultiSlotDataFeed::Init(
if (slot.is_used()) {
use_slots_.push_back(all_slots_[i]);
use_slots_is_dense_.push_back(slot.is_dense());
std::vector<int> local_shape;
if (slot.is_dense()) {
// for batch size holder if is_dense
if (slot.shape(0) > 0) {
local_shape.push_back(0);
}
}
for (size_t i = 0; i < slot.shape_size(); ++i) {
local_shape.push_back(slot.shape(i));
}
use_slots_shape_.push_back(local_shape);
}
}
feed_vec_.resize(use_slots_.size());
......@@ -752,8 +762,8 @@ void MultiSlotDataFeed::PutToFeedVec(
LoD data_lod{offset};
feed_vec_[i]->set_lod(data_lod);
if (use_slots_is_dense_[i]) {
int dim = total_instance / batch_size_;
feed_vec_[i]->Resize({batch_size_, dim});
use_slots_shape_[i][0] = batch_size_;
feed_vec_[i]->Resize(framework::make_ddim(use_slots_shape_[i]));
}
}
#endif
......@@ -785,6 +795,16 @@ void MultiSlotInMemoryDataFeed::Init(
if (slot.is_used()) {
use_slots_.push_back(all_slots_[i]);
use_slots_is_dense_.push_back(slot.is_dense());
std::vector<int> local_shape;
if (slot.is_dense()) {
if (slot.shape(0) > 0) {
local_shape.push_back(0);
}
}
for (size_t i = 0; i < slot.shape_size(); ++i) {
local_shape.push_back(slot.shape(i));
}
use_slots_shape_.push_back(local_shape);
}
}
feed_vec_.resize(use_slots_.size());
......@@ -940,8 +960,8 @@ void MultiSlotInMemoryDataFeed::PutToFeedVec(
LoD data_lod{offset};
feed_vec_[i]->set_lod(data_lod);
if (use_slots_is_dense_[i]) {
int dim = total_instance / batch_size_;
feed_vec_[i]->Resize({batch_size_, dim});
use_slots_shape_[i][0] = batch_size_;
feed_vec_[i]->Resize(framework::make_ddim(use_slots_shape_[i]));
}
}
#endif
......
......@@ -142,6 +142,7 @@ class DataFeed {
// object)
std::vector<std::string> all_slots_;
std::vector<std::string> all_slots_type_;
std::vector<std::vector<int>> use_slots_shape_;
std::vector<int>
use_slots_index_; // -1: not used; >=0: the index of use_slots_
......
......@@ -19,6 +19,7 @@ message Slot {
required string type = 2;
optional bool is_dense = 3 [ default = false ];
optional bool is_used = 4 [ default = false ];
repeated int32 shape = 5; // we can define N-D Tensor
}
message MultiSlotDesc { repeated Slot slots = 1; }
......
......@@ -150,6 +150,11 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
AppendPass("runtime_context_cache_pass");
}
if (strategy_.cache_expected_kernel_) {
VLOG(10) << "Add expected_kernel_cache_pass";
AppendPass("expected_kernel_cache_pass");
}
AppendMultiDevPass(strategy_);
if (strategy_.fuse_all_reduce_ops_) {
......@@ -337,3 +342,4 @@ USE_PASS(fuse_adam_op_pass);
USE_PASS(fuse_sgd_op_pass);
USE_PASS(fuse_all_reduce_op_pass);
USE_PASS(runtime_context_cache_pass);
USE_PASS(expected_kernel_cache_pass);
......@@ -83,11 +83,11 @@ struct BuildStrategy {
bool sync_batch_norm_{false};
bool memory_optimize_{true};
// TODO(dzhwinter):
// make enable_inplace, memory_optimize_
// memory_early_delete_ true by default
bool enable_inplace_{true};
// FIXME(liuwei1031) disable memory_optimzie and enable_inplace in 1.4
// to open them by default, we need to solve the fetch variable issue
bool memory_optimize_{false};
bool enable_inplace_{false};
bool enable_sequential_execution_{false};
......@@ -108,6 +108,7 @@ struct BuildStrategy {
bool remove_unnecessary_lock_{true};
bool cache_runtime_context_{false};
bool cache_expected_kernel_{true};
// NOTE:
// Before you add new options, think if it's a general strategy that works
......
......@@ -305,6 +305,12 @@ void InplacePass::TryInplaceOpInputOutput(ir::Node* op,
VLOG(4) << "Try to inplace " << in_var_name << " with " << out_var_name;
if (var_nodes_[in_var_name].back() != in_node) {
VLOG(4) << "SKIP since " << in_var_name
<< " is also used as output by other ops";
continue;
}
bool can_replace = true;
if (in_var_name == out_var_name) {
can_replace = false;
......@@ -527,6 +533,9 @@ void GraphView::Build(ir::Graph* g) {
};
for (auto& node : g->Nodes()) {
if (!node->IsOp()) continue;
// avoid optimize the variable used in sub-blocks
if (OpHasSubBlock(node->Op())) update_skip_set(node);
if (node->Name() == "send") update_skip_set(node);
if (node->Name() == "recv") update_skip_set(node);
if (node->Name() == "prefetch") update_skip_set(node);
......
......@@ -233,6 +233,12 @@ struct OpInfoFiller<T, kNoNeedBufferVarsInference> {
}
};
// A fake OpInfoFiller of void
template <>
struct OpInfoFiller<void, kUnknown> {
void operator()(const char* op_type, OpInfo* info) const {}
};
} // namespace details
} // namespace framework
......
......@@ -21,40 +21,40 @@ namespace framework {
void DownpourWorker::Initialize(const TrainerDesc& desc) {
param_ = desc.downpour_param();
for (size_t i = 0; i < param_.sparse_table_size(); ++i) {
for (int i = 0; i < param_.sparse_table_size(); ++i) {
uint64_t table_id =
static_cast<uint64_t>(param_.sparse_table(i).table_id());
TableParameter table = param_.sparse_table(i);
sparse_key_names_[table_id].resize(table.sparse_key_name_size());
for (size_t j = 0; j < table.sparse_key_name_size(); ++j) {
for (int j = 0; j < table.sparse_key_name_size(); ++j) {
sparse_key_names_[table_id][j] = table.sparse_key_name(j);
}
sparse_value_names_[table_id].resize(table.sparse_value_name_size());
for (size_t j = 0; j < table.sparse_value_name_size(); ++j) {
for (int j = 0; j < table.sparse_value_name_size(); ++j) {
sparse_value_names_[table_id][j] = table.sparse_value_name(j);
}
sparse_grad_names_[table_id].resize(table.sparse_grad_name_size());
for (size_t j = 0; j < table.sparse_grad_name_size(); ++j) {
for (int j = 0; j < table.sparse_grad_name_size(); ++j) {
sparse_grad_names_[table_id][j] = table.sparse_grad_name(j);
}
label_var_name_[table_id] = table.label_var_name();
}
for (size_t i = 0; i < param_.dense_table_size(); ++i) {
for (int i = 0; i < param_.dense_table_size(); ++i) {
uint64_t table_id = static_cast<uint64_t>(param_.dense_table(i).table_id());
auto table = param_.dense_table(i);
dense_value_names_[table_id].resize(table.dense_value_name_size());
for (size_t j = 0; j < table.dense_value_name_size(); ++j) {
for (int j = 0; j < table.dense_value_name_size(); ++j) {
dense_value_names_[table_id][j] = table.dense_value_name(j);
}
dense_grad_names_[table_id].resize(table.dense_grad_name_size());
for (size_t j = 0; j < table.dense_grad_name_size(); ++j) {
for (int j = 0; j < table.dense_grad_name_size(); ++j) {
dense_grad_names_[table_id][j] = table.dense_grad_name(j);
}
}
skip_ops_.resize(param_.skip_ops_size());
for (size_t i = 0; i < param_.skip_ops_size(); ++i) {
for (int i = 0; i < param_.skip_ops_size(); ++i) {
skip_ops_[i] = param_.skip_ops(i);
}
......@@ -83,14 +83,14 @@ void DownpourWorker::CollectLabelInfo(size_t table_idx) {
LoDTensor* tensor = var->GetMutable<LoDTensor>();
int64_t* label_ptr = tensor->data<int64_t>();
int global_index = 0;
size_t global_index = 0;
for (size_t i = 0; i < sparse_key_names_[table_id].size(); ++i) {
VLOG(3) << "sparse_key_names_[" << i
<< "]: " << sparse_key_names_[table_id][i];
Variable* fea_var = thread_scope_->FindVar(sparse_key_names_[table_id][i]);
LoDTensor* tensor = fea_var->GetMutable<LoDTensor>();
int64_t* ids = tensor->data<int64_t>();
int fea_idx = 0;
size_t fea_idx = 0;
// tensor->lod()[0].size() == batch_size + 1
for (auto lod_idx = 1u; lod_idx < tensor->lod()[0].size(); ++lod_idx) {
for (; fea_idx < tensor->lod()[0][lod_idx]; ++fea_idx) {
......@@ -138,7 +138,7 @@ void DownpourWorker::FillSparseValue(size_t table_idx) {
auto& tensor_lod = tensor->lod()[0];
LoD data_lod{tensor_lod};
tensor_emb->set_lod(data_lod);
for (auto index = 0u; index < len; ++index) {
for (int index = 0; index < len; ++index) {
if (ids[index] == 0u) {
memcpy(ptr + table.emb_dim() * index, init_value.data() + 2,
sizeof(float) * table.emb_dim());
......@@ -192,7 +192,7 @@ void DownpourWorker::TrainFilesWithProfiler() {
read_time += timeline.ElapsedSec();
total_time += timeline.ElapsedSec();
VLOG(3) << "program config size: " << param_.program_config_size();
for (size_t i = 0; i < param_.program_config(0).pull_sparse_table_id_size();
for (int i = 0; i < param_.program_config(0).pull_sparse_table_id_size();
++i) {
uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).pull_sparse_table_id(i));
......@@ -244,8 +244,8 @@ void DownpourWorker::TrainFilesWithProfiler() {
}
if (need_to_push_sparse_) {
for (size_t i = 0;
i < param_.program_config(0).push_sparse_table_id_size(); ++i) {
for (int i = 0; i < param_.program_config(0).push_sparse_table_id_size();
++i) {
uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_sparse_table_id(i));
TableParameter table;
......@@ -268,8 +268,8 @@ void DownpourWorker::TrainFilesWithProfiler() {
if (need_to_push_dense_) {
timeline.Start();
for (size_t i = 0;
i < param_.program_config(0).push_dense_table_id_size(); ++i) {
for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
++i) {
uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_dense_table_id(i));
fleet_ptr_->PushDenseVarsAsync(
......@@ -315,8 +315,8 @@ void DownpourWorker::TrainFilesWithProfiler() {
}
if (need_to_push_dense_) {
for (size_t i = 0;
i < param_.program_config(0).push_dense_table_id_size(); ++i) {
for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
++i) {
uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_dense_table_id(i));
pull_dense_worker_->IncreaseThreadVersion(thread_id_, tid);
......@@ -362,7 +362,7 @@ void DownpourWorker::TrainFiles() {
int cur_batch;
while ((cur_batch = device_reader_->Next()) > 0) {
// pull sparse here
for (size_t i = 0; i < param_.program_config(0).pull_sparse_table_id_size();
for (int i = 0; i < param_.program_config(0).pull_sparse_table_id_size();
++i) {
uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).pull_sparse_table_id(i));
......@@ -397,8 +397,8 @@ void DownpourWorker::TrainFiles() {
if (need_to_push_sparse_) {
// push gradients here
for (size_t i = 0;
i < param_.program_config(0).push_sparse_table_id_size(); ++i) {
for (int i = 0; i < param_.program_config(0).push_sparse_table_id_size();
++i) {
uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_sparse_table_id(i));
TableParameter table;
......@@ -416,8 +416,8 @@ void DownpourWorker::TrainFiles() {
}
if (need_to_push_dense_) {
for (size_t i = 0;
i < param_.program_config(0).push_dense_table_id_size(); ++i) {
for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
++i) {
uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_dense_table_id(i));
fleet_ptr_->PushDenseVarsAsync(
......@@ -461,8 +461,8 @@ void DownpourWorker::TrainFiles() {
}
if (need_to_push_dense_) {
for (size_t i = 0;
i < param_.program_config(0).push_dense_table_id_size(); ++i) {
for (int i = 0; i < param_.program_config(0).push_dense_table_id_size();
++i) {
uint64_t tid = static_cast<uint64_t>(
param_.program_config(0).push_dense_table_id(i));
pull_dense_worker_->IncreaseThreadVersion(thread_id_, tid);
......
......@@ -23,7 +23,7 @@ namespace ir {
void ExpectedKernelCachePass::ApplyImpl(ir::Graph* graph) const {
VLOG(3) << "Applies Expected Kernel Cache strategy.";
for (const Node* n : graph->Nodes()) {
if (n->IsOp()) {
if (n->IsOp() && n->Op()) {
n->Op()->SetAttr(kEnableCacheExpectedKernel, true);
}
}
......
......@@ -31,10 +31,10 @@ namespace paddle {
namespace framework {
namespace ir {
namespace {
void SortHelper(
const std::map<ir::Node *, std::unordered_set<ir::Node *>> &adj_list,
ir::Node *node, std::unordered_set<ir::Node *> *visited,
std::vector<ir::Node *> *ret) {
void SortHelper(const std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>,
ir::NodeComp> &adj_list,
ir::Node *node, std::unordered_set<ir::Node *> *visited,
std::vector<ir::Node *> *ret) {
visited->insert(node);
for (auto adj : adj_list.at(node)) {
......@@ -50,7 +50,8 @@ void SortHelper(
bool HasCircleHelper(
ir::Node *node,
const std::map<ir::Node *, std::unordered_set<ir::Node *>> &adj_list,
const std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>, ir::NodeComp>
&adj_list,
std::unordered_set<ir::Node *> *visited,
std::unordered_set<ir::Node *> *in_trace,
std::vector<std::vector<ir::Node *>> *circles) {
......@@ -84,7 +85,8 @@ bool HasCircleHelper(
}
bool HasCircleInternal(
const std::map<ir::Node *, std::unordered_set<ir::Node *>> &adj_list,
const std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>, ir::NodeComp>
&adj_list,
std::vector<std::vector<ir::Node *>> *circles) {
std::unordered_set<ir::Node *> visited;
std::unordered_set<ir::Node *> in_trace;
......@@ -107,8 +109,8 @@ bool FindCircleSubGraph(const Graph &graph,
}
std::vector<ir::Node *> TopologySortOperations(const Graph &graph) {
std::map<ir::Node *, std::unordered_set<ir::Node *>> adj_list =
BuildOperationAdjList(graph);
std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>, ir::NodeComp>
adj_list = BuildOperationAdjList(graph);
PADDLE_ENFORCE(!HasCircleInternal(adj_list, nullptr));
std::unordered_set<ir::Node *> visited;
std::vector<ir::Node *> ret;
......@@ -117,34 +119,30 @@ std::vector<ir::Node *> TopologySortOperations(const Graph &graph) {
SortHelper(adj_list, adj.first, &visited, &ret);
}
}
return ret;
}
// Build operator inlink edge table.
std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
const Graph &graph) {
std::map<ir::Node *, std::unordered_set<ir::Node *>> adj_list;
std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>, ir::NodeComp>
BuildOperationAdjList(const Graph &graph) {
std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>, ir::NodeComp>
adj_list;
for (auto &n : graph.Nodes()) {
if (!n->IsOp()) continue;
if (adj_list.find(n) == adj_list.end()) {
adj_list[n] = std::unordered_set<ir::Node *>();
adj_list[n] = std::set<ir::Node *, ir::NodeComp>();
}
std::vector<ir::Node *> nodes;
for (auto &var : n->inputs) {
for (auto &adj_n : var->inputs) {
PADDLE_ENFORCE(adj_n->NodeType() == ir::Node::Type::kOperation);
VLOG(4) << "adj " << adj_n->Name() << reinterpret_cast<void *>(adj_n)
<< " -> " << n->Name() << reinterpret_cast<void *>(n)
<< " via " << var->Name() << reinterpret_cast<void *>(var);
nodes.push_back(adj_n);
adj_list[n].insert(adj_n);
}
}
std::sort(nodes.begin(), nodes.end(), [](ir::Node *node1, ir::Node *node2) {
return node1->id() > node2->id();
});
adj_list[n].insert(std::make_move_iterator(nodes.begin()),
std::make_move_iterator(nodes.end()));
}
return adj_list;
}
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#include <map>
#include <memory>
#include <set>
#include <vector>
#include "paddle/fluid/framework/ir/graph.h"
......@@ -25,6 +26,13 @@ namespace paddle {
namespace framework {
namespace ir {
// Compare nodes via node id.
struct NodeComp {
bool operator()(ir::Node *const &node1, ir::Node *const &node2) const {
return node1->id() < node2->id();
}
};
// Test if the graph contains circle.
bool HasCircle(const Graph &graph);
......@@ -57,8 +65,8 @@ std::vector<Node *> TopologyVarientSort(const Graph &graph, SortKind sort_kind);
void CleanIndividualNodes(Graph *graph);
// Build an adjacency list of operations for the `graph`.
std::map<ir::Node *, std::unordered_set<ir::Node *>> BuildOperationAdjList(
const Graph &graph);
std::map<ir::Node *, std::set<ir::Node *, ir::NodeComp>, ir::NodeComp>
BuildOperationAdjList(const Graph &graph);
template <typename T>
std::vector<T *> FilterByNodeWrapper(const Graph &graph) {
......
......@@ -241,6 +241,7 @@ OpDesc::OpDesc(const std::string &type, const VariableNameMap &inputs,
outputs_ = outputs;
attrs_ = attrs;
need_update_ = true;
block_ = nullptr;
}
OpDesc::OpDesc(const OpDesc &other, BlockDesc *block) {
......
......@@ -880,7 +880,16 @@ std::vector<KernelConfig>* OperatorWithKernel::GetKernelConfig(
void OperatorWithKernel::RunImpl(const Scope& scope,
const platform::Place& place) const {
if (!HasAttr(kEnableCacheRuntimeContext)) {
// To reduce the elapsed time of HasAttr, we use bool variable to record the
// result of HasAttr.
if (!enable_cache_runtime_context && HasAttr(kEnableCacheRuntimeContext))
enable_cache_runtime_context = true;
if (!enable_cache_expected_kernel && HasAttr(kEnableCacheExpectedKernel))
enable_cache_expected_kernel = true;
if (!all_kernels_must_compute_runtime_shape &&
HasAttr(kAllKernelsMustComputeRuntimeShape))
all_kernels_must_compute_runtime_shape = true;
if (!enable_cache_runtime_context) {
RuntimeContext ctx(Inputs(), Outputs(), scope);
RunImpl(scope, place, &ctx);
} else {
......@@ -899,7 +908,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place);
if (!HasAttr(kEnableCacheExpectedKernel) || !kernel_type_) {
if (!enable_cache_expected_kernel || !kernel_type_) {
ChooseKernel(*runtime_ctx, scope, place);
}
......@@ -918,7 +927,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
dev_ctx = pool.Get(kernel_type_->place_);
}
if (!HasAttr(kAllKernelsMustComputeRuntimeShape)) {
if (!all_kernels_must_compute_runtime_shape) {
RuntimeInferShapeContext infer_shape_ctx(*this, exec_scope, *runtime_ctx);
this->InferShape(&infer_shape_ctx);
}
......
......@@ -506,6 +506,9 @@ class OperatorWithKernel : public OperatorBase {
mutable std::unique_ptr<OpKernelFunc> kernel_func_;
mutable std::unique_ptr<RuntimeContext> runtime_ctx_;
mutable const Scope* pre_scope_ = nullptr;
mutable bool enable_cache_runtime_context = false;
mutable bool enable_cache_expected_kernel = false;
mutable bool all_kernels_must_compute_runtime_shape = false;
};
extern bool OpSupportGPU(const std::string& op_type);
......
......@@ -221,7 +221,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
PADDLE_ENFORCE(!member_->use_cuda_,
"gpu mode does not support async_mode_ now!");
graphs.push_back(graph);
for (int i = 1; i < places.size(); ++i) {
for (size_t i = 1; i < places.size(); ++i) {
auto *tmp_graph = new ir::Graph(graph->OriginProgram());
async_graphs_.emplace_back(tmp_graph);
graphs.push_back(tmp_graph);
......@@ -315,7 +315,7 @@ ParallelExecutor::ParallelExecutor(const std::vector<platform::Place> &places,
graph = build_strategy.Apply(graph, {member_->places_[0]}, loss_var_name,
{member_->local_scopes_[0]}, 1,
member_->use_cuda_, member_->nccl_ctxs_.get());
for (int i = 1; i < member_->places_.size(); ++i) {
for (size_t i = 1; i < member_->places_.size(); ++i) {
graphs[i] =
build_strategy.Apply(graphs[i], {member_->places_[i]}, loss_var_name,
{member_->local_scopes_[i]}, 1,
......
......@@ -76,7 +76,7 @@ message PullDenseWorkerParameter {
message TableParameter {
// dense table only
optional int64 table_id = 1;
optional uint64 table_id = 1;
repeated string dense_value_name = 2;
repeated string dense_grad_name = 3;
repeated int32 push_dense_wait_times = 5;
......
......@@ -259,6 +259,9 @@ bool AnalysisPredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
return false;
}
PADDLE_ENFORCE_NOT_NULL(input_ptr);
PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data());
if (platform::is_cpu_place(place_)) {
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
......
......@@ -54,6 +54,7 @@ PaddleBuf &PaddleBuf::operator=(const PaddleBuf &other) {
memory_owned_ = other.memory_owned_;
} else {
Resize(other.length());
PADDLE_ENFORCE(!(other.length() > 0 && other.data() == nullptr));
memcpy(data_, other.data(), other.length());
length_ = other.length();
memory_owned_ = true;
......
......@@ -169,6 +169,7 @@ std::unique_ptr<PaddlePredictor> NativePaddlePredictor::Clone() {
std::unique_ptr<PaddlePredictor> cls(new NativePaddlePredictor(config_));
// Hot fix the bug that result diff in multi-thread.
// TODO(Superjomn) re-implement a real clone here.
PADDLE_ENFORCE_NOT_NULL(dynamic_cast<NativePaddlePredictor *>(cls.get()));
if (!dynamic_cast<NativePaddlePredictor *>(cls.get())->Init(nullptr)) {
LOG(ERROR) << "fail to call Init";
return nullptr;
......@@ -210,6 +211,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
return false;
}
PADDLE_ENFORCE_NOT_NULL(input_ptr);
PADDLE_ENFORCE_NOT_NULL(inputs[i].data.data());
if (platform::is_cpu_place(place_)) {
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std::memcpy(static_cast<void *>(input_ptr), inputs[i].data.data(),
......@@ -316,6 +319,8 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
}
std::unique_ptr<PaddlePredictor> predictor(new NativePaddlePredictor(config));
PADDLE_ENFORCE_NOT_NULL(
dynamic_cast<NativePaddlePredictor *>(predictor.get()));
if (!dynamic_cast<NativePaddlePredictor *>(predictor.get())->Init(nullptr)) {
return nullptr;
}
......
......@@ -123,8 +123,8 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
// will enhance this pass later.
"runtime_context_cache_pass", //
"attention_lstm_fuse_pass", //
"seqpool_concat_fuse_pass", //
"seqconv_eltadd_relu_fuse_pass", //
// "seqpool_concat_fuse_pass", //
// "embedding_fc_lstm_fuse_pass", //
"fc_lstm_fuse_pass", //
"mul_lstm_fuse_pass", //
......
......@@ -47,6 +47,7 @@ struct DataRecord {
num_lines++;
std::vector<std::string> data;
split(line, '\t', &data);
PADDLE_ENFORCE(data.size() >= 4);
// load title1 data
std::vector<int64_t> title1_data;
split_to_int64(data[0], ' ', &title1_data);
......
......@@ -150,6 +150,9 @@ void SetConfig(AnalysisConfig *cfg, bool use_mkldnn = false) {
if (use_mkldnn) {
cfg->EnableMKLDNN();
}
// Enable seqpool_concat_fuse_pass, disabled by default since it takes much
// time
cfg->pass_builder()->InsertPass(2, "seqpool_concat_fuse_pass");
}
void profile(bool use_mkldnn = false) {
......
......@@ -214,28 +214,23 @@ TEST(Analyzer_Transformer, fuse_statis) {
}
// Compare result of NativeConfig and AnalysisConfig
// void compare(bool use_mkldnn = false) {
// AnalysisConfig cfg;
// SetConfig(&cfg);
// if (use_mkldnn) {
// cfg.EnableMKLDNN();
// }
//
// std::vector<std::vector<PaddleTensor>> input_slots_all;
// SetInput(&input_slots_all);
// CompareNativeAndAnalysis(
// reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
// input_slots_all);
// }
// TODO(yihuaxu):
// Disable compare and compare_mkldnn temporary, see
// https://github.com/paddlePaddle/Paddle/issues/16316 for details.
// TEST(Analyzer_Transformer, compare) { compare(); }
// #ifdef PADDLE_WITH_MKLDNN
// TEST(Analyzer_Transformer, compare_mkldnn) { compare(true /* use_mkldnn */);
// }
// #endif
void compare(bool use_mkldnn = false) {
AnalysisConfig cfg;
SetConfig(&cfg);
if (use_mkldnn) {
cfg.EnableMKLDNN();
}
std::vector<std::vector<PaddleTensor>> input_slots_all;
SetInput(&input_slots_all);
CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), input_slots_all);
}
TEST(Analyzer_Transformer, compare) { compare(); }
#ifdef PADDLE_WITH_MKLDNN
TEST(Analyzer_Transformer, compare_mkldnn) { compare(true /* use_mkldnn */); }
#endif
} // namespace inference
} // namespace paddle
abs
acos
asin
atan
attention_lstm
brelu
conv_shift
cos
cos_sim
dequantize
elu
fc
flatten
fsp
......@@ -21,14 +14,8 @@ fusion_seqconv_eltadd_relu
fusion_seqexpand_concat_fc
fusion_seqpool_concat
fusion_squared_mat_sub
gelu
gru
hard_shrink
hierarchical_sigmoid
leaky_relu
log
logsigmoid
lookup_table
lrn
lstm_unit
lstmp
......@@ -39,10 +26,11 @@ modified_huber_loss
nce
pool2d
pool3d
pow
prelu
quantize
rank_loss
reduce_all
reduce_any
reduce_max
reduce_mean
reduce_min
......@@ -51,26 +39,10 @@ reduce_sum
requantize
reshape
rnn_memory_helper
round
sequence_softmax
sin
softplus
softshrink
softsign
space_to_depth
spp
square
squared_l2_distance
squared_l2_norm
squeeze
stanh
swish
tanh_shrink
teacher_student_sigmoid_loss
tensor_array_to_tensor
thresholded_relu
transpose
tree_conv
unpool
unsqueeze
warpctc
......@@ -12,6 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/activation_op.h"
#include "paddle/fluid/platform/cudnn_desc.h"
......@@ -82,6 +85,8 @@ template <typename T>
struct CudnnReluGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnReluGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_RELU) {}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
};
template <typename T>
......@@ -94,6 +99,8 @@ struct CudnnRelu6GradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnRelu6GradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 6.0, CUDNN_ACTIVATION_CLIPPED_RELU) {
}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
};
template <typename T>
......@@ -105,6 +112,8 @@ template <typename T>
struct CudnnSigmoidGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnSigmoidGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_SIGMOID) {}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
};
template <typename T>
......@@ -116,6 +125,8 @@ template <typename T>
struct CudnnTanhGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnTanhGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, CUDNN_ACTIVATION_TANH) {}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; }
};
template <typename Functor>
......@@ -140,10 +151,13 @@ class CudnnActivationGradKernel
public:
using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override {
static_assert(Functor::FwdDeps() == kDepOut, "Forward deps must be Out.");
const framework::Tensor *X, *Out, *dOut;
X = Out = dOut = nullptr;
framework::Tensor* dX = nullptr;
ExtractActivationGradTensor(context, &X, &Out, &dOut, &dX);
ExtractActivationGradTensor<Functor::FwdDeps()>(context, &X, &Out, &dOut,
&dX);
dX->mutable_data<T>(context.GetPlace());
auto& dev_ctx = context.template device_context<CUDADeviceContext>();
Functor functor(dev_ctx);
......
......@@ -15,7 +15,9 @@ limitations under the License. */
#include "paddle/fluid/operators/activation_op.h"
#include <memory>
#include <string>
#include <type_traits>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h"
#include "paddle/fluid/platform/port.h"
#ifdef PADDLE_WITH_CUDA
......@@ -27,6 +29,25 @@ namespace operators {
using paddle::framework::Tensor;
template <typename GradFunctor>
static constexpr bool CanInplaceAct() {
return GradFunctor::FwdDeps() == kDepOut || GradFunctor::FwdDeps() == kNoDeps;
}
std::unique_ptr<std::unordered_set<std::string>> GetInplaceOpSet() {
std::unique_ptr<std::unordered_set<std::string>> ret(
new std::unordered_set<std::string>());
#define INSERT_INTO_INPLACE_OP_SET(op_type, __omitted, fwd_functor, \
bwd_functor) \
if (CanInplaceAct<bwd_functor<float>>()) { \
ret->insert(#op_type); \
}
FOR_EACH_ACTIVATION_OP(INSERT_INTO_INPLACE_OP_SET);
#undef INSERT_INTO_INPLACE_OP_SET
return ret;
}
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
class OP_NAME##OpMaker \
: public ::paddle::framework::OpProtoAndCheckerMaker { \
......@@ -50,26 +71,32 @@ using paddle::framework::Tensor;
} \
}
#define REGISTER_ACTIVATION_OP_GRAD_MAKER(OP_NAME, KERNEL_TYPE) \
class OP_NAME##GradMaker \
: public ::paddle::framework::SingleGradOpDescMaker { \
public: \
using ::paddle::framework::SingleGradOpDescMaker::SingleGradOpDescMaker; \
\
protected: \
std::unique_ptr<::paddle::framework::OpDesc> Apply() const override { \
auto* op = new ::paddle::framework::OpDesc(); \
op->SetType(#KERNEL_TYPE "_grad"); \
op->SetInput("Out", Output("Out")); \
op->SetInput(::paddle::framework::GradVarName("Out"), \
OutputGrad("Out")); \
\
op->SetAttrMap(Attrs()); \
\
op->SetOutput(::paddle::framework::GradVarName("X"), InputGrad("X")); \
return std::unique_ptr<::paddle::framework::OpDesc>(op); \
} \
template <ActBwdOpFwdDeps kDepValue>
class ActivationGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType(ForwardOpType() + "_grad");
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs());
if (static_cast<int>(kDepValue) &
static_cast<int>(ActBwdOpFwdDeps::kDepX)) {
op->SetInput("X", Input("X"));
}
if (static_cast<int>(kDepValue) &
static_cast<int>(ActBwdOpFwdDeps::kDepOut)) {
op->SetInput("Out", Output("Out"));
}
return op;
}
};
framework::OpKernelType GetKernelType(const framework::ExecutionContext& ctx,
const framework::OperatorWithKernel& oper,
......@@ -129,14 +156,15 @@ class ActivationOpGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
ctx->ShareDim("Out", framework::GradVarName("X"));
ctx->ShareLoD("Out", framework::GradVarName("X"));
auto out_grad_name = framework::GradVarName("Out");
ctx->ShareDim(out_grad_name, framework::GradVarName("X"));
ctx->ShareLoD(out_grad_name, framework::GradVarName("X"));
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return GetKernelType(ctx, *this, "Out");
return GetKernelType(ctx, *this, framework::GradVarName("Out"));
}
};
......@@ -558,79 +586,27 @@ REGISTER_ACTIVATION_OP_MAKER(Log, LogDoc);
REGISTER_ACTIVATION_OP_MAKER(Square, SquareDoc);
REGISTER_ACTIVATION_OP_MAKER(Softplus, SoftplusDoc);
REGISTER_ACTIVATION_OP_MAKER(Softsign, SoftsignDoc);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Sigmoid, sigmoid);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Relu, relu);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Gelu, gelu);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Exp, exp);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Tanh, tanh);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Ceil, ceil);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Floor, floor);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Sqrt, sqrt);
REGISTER_ACTIVATION_OP_GRAD_MAKER(SoftRelu, soft_relu);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Relu6, relu6);
REGISTER_ACTIVATION_OP_GRAD_MAKER(Reciprocal, reciprocal);
REGISTER_ACTIVATION_OP_GRAD_MAKER(HardSigmoid, hard_sigmoid);
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
#define FOR_EACH_INPLACE_OP_FUNCTOR(__macro) \
__macro(Sigmoid, sigmoid); \
__macro(Relu, relu); \
__macro(Exp, exp); \
__macro(Tanh, tanh); \
__macro(Ceil, ceil); \
__macro(Floor, floor); \
__macro(Sqrt, sqrt); \
__macro(SoftRelu, soft_relu); \
__macro(Relu6, relu6); \
__macro(Reciprocal, reciprocal); \
__macro(HardSigmoid, hard_sigmoid);
#define FOR_EACH_OP_FUNCTOR(__macro) \
__macro(LogSigmoid, logsigmoid); \
__macro(SoftShrink, softshrink); \
__macro(Abs, abs); \
__macro(Cos, cos); \
__macro(Acos, acos); \
__macro(Sin, sin); \
__macro(Asin, asin); \
__macro(Atan, atan); \
__macro(Round, round); \
__macro(Log, log); \
__macro(Square, square); \
__macro(Gelu, gelu); \
__macro(BRelu, brelu); \
__macro(Pow, pow); \
__macro(STanh, stanh); \
__macro(Softplus, softplus); \
__macro(Softsign, softsign); \
__macro(LeakyRelu, leaky_relu); \
__macro(TanhShrink, tanh_shrink); \
__macro(ELU, elu); \
__macro(HardShrink, hard_shrink); \
__macro(Swish, swish); \
__macro(ThresholdedRelu, thresholded_relu);
#define REGISTER_INPLACE_ACTIVATION_OP(OP_NAME, KERNEL_TYPE) \
REGISTER_OPERATOR(KERNEL_TYPE, ::paddle::operators::ActivationOp, \
::paddle::operators::OP_NAME##OpMaker, \
::paddle::operators::ActivationOpInferVarType, \
::paddle::operators::OP_NAME##GradMaker, \
::paddle::framework::SingleOpInplaceInToOut); \
REGISTER_OPERATOR(KERNEL_TYPE##_grad, ::paddle::operators::ActivationOpGrad, \
::paddle::framework::SingleOpInplaceInToOut)
#define REGISTER_ACTIVATION_OP(OP_NAME, KERNEL_TYPE) \
REGISTER_OPERATOR(KERNEL_TYPE, ::paddle::operators::ActivationOp, \
::paddle::operators::OP_NAME##OpMaker, \
::paddle::operators::ActivationOpInferVarType, \
::paddle::framework::DefaultGradOpDescMaker<true>); \
REGISTER_OPERATOR(KERNEL_TYPE##_grad, ::paddle::operators::ActivationOpGrad)
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, functor, grad_functor) \
#define REGISTER_ACTIVATION_OP(KERNEL_TYPE, OP_NAME, functor, grad_functor) \
REGISTER_OPERATOR( \
KERNEL_TYPE, ops::ActivationOp, ops::OP_NAME##OpMaker, \
ops::ActivationOpInferVarType, \
ops::ActivationGradOpDescMaker<ops::grad_functor<float>::FwdDeps()>, \
std::conditional<ops::CanInplaceAct<ops::grad_functor<float>>(), \
::paddle::framework::SingleOpInplaceInToOut, \
void>::type); \
REGISTER_OPERATOR( \
KERNEL_TYPE##_grad, ops::ActivationOpGrad, \
std::conditional<ops::CanInplaceAct<ops::grad_functor<float>>(), \
::paddle::framework::SingleOpInplaceInToOut, \
void>::type)
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, op_name, functor, \
grad_functor) \
REGISTER_OP_CPU_KERNEL( \
act_type, ops::ActivationKernel<paddle::platform::CPUDeviceContext, \
ops::functor<float>>, \
......@@ -643,6 +619,5 @@ namespace ops = paddle::operators;
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, \
ops::grad_functor<double>>);
FOR_EACH_OP_FUNCTOR(REGISTER_ACTIVATION_OP);
FOR_EACH_INPLACE_OP_FUNCTOR(REGISTER_INPLACE_ACTIVATION_OP);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CPU_KERNEL);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL);
......@@ -15,7 +15,8 @@ limitations under the License. */
namespace ops = paddle::operators;
namespace plat = paddle::platform;
#define REGISTER_ACTIVATION_CUDA_KERNEL(act_type, functor, grad_functor) \
#define REGISTER_ACTIVATION_CUDA_KERNEL(act_type, op_name, functor, \
grad_functor) \
REGISTER_OP_CUDA_KERNEL( \
act_type, \
ops::ActivationKernel<plat::CUDADeviceContext, ops::functor<float>>, \
......@@ -30,4 +31,4 @@ namespace plat = paddle::platform;
ops::ActivationGradKernel<plat::CUDADeviceContext, \
ops::grad_functor<plat::float16>>);
FOR_EACH_KERNEL_FUNCTOR(REGISTER_ACTIVATION_CUDA_KERNEL);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CUDA_KERNEL);
......@@ -24,6 +24,7 @@
**/
#include "paddle/fluid/operators/detection/gpc.h"
#include "paddle/fluid/platform/enforce.h"
namespace gpc {
......@@ -689,6 +690,7 @@ static bbox *create_contour_bboxes(gpc_polygon *p) {
gpc_malloc<bbox>(box, p->num_contours * sizeof(bbox),
const_cast<char *>("Bounding box creation"));
PADDLE_ENFORCE_NOT_NULL(box);
/* Construct contour bounding boxes */
for (c = 0; c < p->num_contours; c++) {
......@@ -852,6 +854,7 @@ void gpc_add_contour(gpc_polygon *p, gpc_vertex_list *new_contour, int hole) {
/* Create an extended hole array */
gpc_malloc<int>(extended_hole, (p->num_contours + 1) * sizeof(int),
const_cast<char *>("contour hole addition"));
PADDLE_ENFORCE_NOT_NULL(extended_hole);
/* Create an extended contour array */
gpc_malloc<gpc_vertex_list>(extended_contour,
......@@ -969,6 +972,7 @@ void gpc_polygon_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip,
/* Build scanbeam table from scanbeam tree */
gpc_malloc<double>(sbt, sbt_entries * sizeof(double),
const_cast<char *>("sbt creation"));
PADDLE_ENFORCE_NOT_NULL(sbt);
build_sbt(&scanbeam, sbt, sbtree);
scanbeam = 0;
free_sbtree(&sbtree);
......@@ -1604,6 +1608,7 @@ void gpc_tristrip_clip(gpc_op op, gpc_polygon *subj, gpc_polygon *clip,
/* Build scanbeam table from scanbeam tree */
gpc_malloc<double>(sbt, sbt_entries * sizeof(double),
const_cast<char *>("sbt creation"));
PADDLE_ENFORCE_NOT_NULL(sbt);
build_sbt(&scanbeam, sbt, sbtree);
scanbeam = 0;
free_sbtree(&sbtree);
......
......@@ -16,6 +16,7 @@ limitations under the License. */
#include <nccl.h>
#endif
#include <limits>
#include <memory>
#include <thread> // NOLINT
#include "google/protobuf/io/coded_stream.h"
......@@ -104,8 +105,10 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber,
payload->memory_size());
if (payload->memory_size() >= std::numeric_limits<int>::max()) {
LOG(FATAL) << "AppendZeroCopy varname:" << name
<< ", vlen:" << payload->memory_size();
LOG(FATAL) << "FATAL error: varname:" << name
<< ", vlen:" << payload->memory_size()
<< " >= std::numeric_limits<int>::max():"
<< std::numeric_limits<int>::max() << ", so exit!";
}
// steal reference of tensor data
::grpc::Slice slices[4]; // metadata, tensor, rows meta, rows
......
......@@ -37,10 +37,19 @@ class InterpolateOp : public framework::OperatorWithKernel {
"Interpolation method can only be \"bilinear\" or \"nearest\".");
auto dim_x = ctx->GetInputDim("X"); // NCHW format
int out_h = ctx->Attrs().Get<int>("out_h");
int out_w = ctx->Attrs().Get<int>("out_w");
PADDLE_ENFORCE_EQ(dim_x.size(), 4, "X's dimension must be 4");
int out_h, out_w;
float scale = ctx->Attrs().Get<float>("scale");
if (scale > 0) {
// round down
out_h = static_cast<int>(dim_x[2] * scale);
out_w = static_cast<int>(dim_x[3] * scale);
} else {
out_h = ctx->Attrs().Get<int>("out_h");
out_w = ctx->Attrs().Get<int>("out_w");
}
if (ctx->HasInput("OutSize") && ctx->IsRuntime()) {
auto out_size_dim = ctx->GetInputDim("OutSize");
PADDLE_ENFORCE_EQ(out_size_dim.size(), 1,
......@@ -77,6 +86,7 @@ class InterpolateOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<int>("out_h", "output height of interpolate op.");
AddAttr<int>("out_w", "output width of interpolate op.");
AddAttr<float>("scale", "scale factor of interpolate op.").SetDefault(0.);
AddAttr<std::string>("interp_method",
"(string, default \"bilinear\"), interpolation "
"method, can be \"bilinear\" for "
......
......@@ -192,9 +192,21 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
auto* output = ctx.Output<Tensor>("Out");
auto* input_data = input->data<T>();
int n = input->dims()[0];
int c = input->dims()[1];
int in_h = input->dims()[2];
int in_w = input->dims()[3];
auto interp_method = ctx.Attr<std::string>("interp_method");
int out_h = ctx.Attr<int>("out_h");
int out_w = ctx.Attr<int>("out_w");
float scale = ctx.Attr<float>("scale");
if (scale > 0) {
out_h = in_h * scale;
out_w = in_w * scale;
}
auto out_size = ctx.Input<Tensor>("OutSize");
if (out_size != nullptr) {
Tensor sizes;
......@@ -207,11 +219,6 @@ class InterpolateOpCUDAKernel : public framework::OpKernel<T> {
bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode");
int n = input->dims()[0];
int c = input->dims()[1];
int in_h = input->dims()[2];
int in_w = input->dims()[3];
auto* output_data =
output->mutable_data<T>({n, c, out_h, out_w}, ctx.GetPlace());
......@@ -268,14 +275,20 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
math::SetConstant<platform::CUDADeviceContext, T> zero;
zero(device_ctx, input_grad, static_cast<T>(0.0));
int n = input_grad->dims()[0];
int c = input_grad->dims()[1];
int in_h = input_grad->dims()[2];
int in_w = input_grad->dims()[3];
auto interp_method = ctx.Attr<std::string>("interp_method");
int out_h = ctx.Attr<int>("out_h");
int out_w = ctx.Attr<int>("out_w");
float scale = ctx.Attr<float>("scale");
if (scale > 0) {
out_h = in_h * scale;
out_w - in_w* scale;
}
auto out_size = ctx.Input<Tensor>("OutSize");
bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode");
if (out_size != nullptr) {
Tensor sizes;
framework::TensorCopy(*out_size, platform::CPUPlace(), &sizes);
......@@ -284,10 +297,8 @@ class InterpolateGradOpCUDAKernel : public framework::OpKernel<T> {
out_w = size_data[1];
}
int n = input_grad->dims()[0];
int c = input_grad->dims()[1];
int in_h = input_grad->dims()[2];
int in_w = input_grad->dims()[3];
bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode");
int in_hw = in_h * in_w;
int out_hw = out_h * out_w;
......
......@@ -163,9 +163,21 @@ class InterpolateKernel : public framework::OpKernel<T> {
auto* input = ctx.Input<Tensor>("X");
auto* output = ctx.Output<Tensor>("Out");
const int n = input->dims()[0];
const int c = input->dims()[1];
const int in_h = input->dims()[2];
const int in_w = input->dims()[3];
std::string interp_method = ctx.Attr<std::string>("interp_method");
int out_h = ctx.Attr<int>("out_h");
int out_w = ctx.Attr<int>("out_w");
float scale = ctx.Attr<float>("scale");
if (scale > 0) {
out_h = static_cast<int>(in_h * scale);
out_w = static_cast<int>(in_w * scale);
}
auto out_size = ctx.Input<Tensor>("OutSize");
if (out_size != nullptr) {
auto out_size_data = out_size->data<int>();
......@@ -175,11 +187,6 @@ class InterpolateKernel : public framework::OpKernel<T> {
bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode");
const int n = input->dims()[0];
const int c = input->dims()[1];
const int in_h = input->dims()[2];
const int in_w = input->dims()[3];
output->mutable_data<T>({n, c, out_h, out_w}, ctx.GetPlace());
auto& device_ctx =
ctx.template device_context<platform::CPUDeviceContext>();
......@@ -221,23 +228,31 @@ class InterpolateGradKernel : public framework::OpKernel<T> {
auto* input_grad = ctx.Output<Tensor>(framework::GradVarName("X"));
auto* output_grad = ctx.Input<Tensor>(framework::GradVarName("Out"));
const int n = input->dims()[0];
const int c = input->dims()[1];
const int in_h = input->dims()[2];
const int in_w = input->dims()[3];
std::string interp_method = ctx.Attr<std::string>("interp_method");
int out_h = ctx.Attr<int>("out_h");
int out_w = ctx.Attr<int>("out_w");
float scale = ctx.Attr<float>("scale");
if (scale > 0) {
out_h = static_cast<int>(in_h * scale);
out_w = static_cast<int>(in_w * scale);
}
auto out_size = ctx.Input<Tensor>("OutSize");
if (out_size != nullptr) {
auto out_size_data = out_size->data<int>();
out_h = out_size_data[0];
out_w = out_size_data[1];
}
bool align_corners = ctx.Attr<bool>("align_corners");
int align_mode = ctx.Attr<int>("align_mode");
const int n = input->dims()[0];
const int c = input->dims()[1];
const int in_h = input->dims()[2];
const int in_w = input->dims()[3];
input_grad->mutable_data<T>({n, c, in_h, in_w}, ctx.GetPlace());
auto& device_ctx =
ctx.template device_context<platform::CPUDeviceContext>();
......
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/linspace_op.h"
namespace paddle {
namespace operators {
class LinspaceOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Start"),
"Input(Start) of LinspaceOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Stop"),
"Input(Stop) of LinspaceOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Num"),
"Input(Num) of LinspaceOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(OUt) of LinspaceOp should not be null.");
auto s_dims = ctx->GetInputDim("Start");
PADDLE_ENFORCE((s_dims.size() == 1) && (s_dims[0] == 1),
"The shape of Input(Start) should be [1].");
auto e_dims = ctx->GetInputDim("Stop");
PADDLE_ENFORCE((e_dims.size() == 1) && (e_dims[0] == 1),
"The shape of Input(Stop) should be [1].");
auto step_dims = ctx->GetInputDim("Num");
PADDLE_ENFORCE((step_dims.size() == 1) && (step_dims[0] == 1),
"The shape of Input(Num) should be [1].");
ctx->SetOutputDim("Out", {-1});
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
framework::LibraryType library_{framework::LibraryType::kPlain};
framework::DataLayout layout_ = framework::DataLayout::kAnyLayout;
return framework::OpKernelType(
ctx.Input<framework::Tensor>("Start")->type(), ctx.device_context(),
layout_, library_);
}
};
class LinspaceOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("Start",
"First entry in the sequence. It is a tensor of shape [1], should "
"be of type float32 or float64.");
AddInput("Stop",
"Last entry in the sequence. It is a tensor of shape [1], should "
"be of type float32 or float64.");
AddInput("Num",
"Number of entry in the sequence. It is a tensor of shape [1], "
"should be of type int32.");
AddOutput("Out", "A sequence of numbers.");
AddComment(R"DOC(
Return fixed number of evenly spaced values within a given interval. First entry is start, and last entry is stop. In the case when Num is 1, only Start is returned. Like linspace function of numpy.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_WITHOUT_GRADIENT(linspace, ops::LinspaceOp, ops::LinspaceOpMaker);
REGISTER_OP_CPU_KERNEL(linspace, ops::CPULinspaceKernel<float>,
ops::CPULinspaceKernel<double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/linspace_op.h"
#include "paddle/fluid/platform/cuda_primitives.h"
namespace paddle {
namespace operators {
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
i += blockDim.x * gridDim.x)
template <typename T>
__global__ void LinspaceKernel(T start, T step, int64_t size, T* out) {
CUDA_1D_KERNEL_LOOP(index, size) { out[index] = start + step * index; }
}
template <typename T>
__global__ void LinspaceSpecialKernel(T start, T* out) {
out[0] = start;
}
template <typename T>
class CUDALinspaceKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* start_t = context.Input<framework::Tensor>("Start");
auto* stop_t = context.Input<framework::Tensor>("Stop");
auto* num_t = context.Input<framework::Tensor>("Num");
auto* out = context.Output<framework::Tensor>("Out");
framework::Tensor n;
framework::TensorCopy(*start_t, platform::CPUPlace(), &n);
T start = n.data<T>()[0];
framework::TensorCopy(*stop_t, platform::CPUPlace(), &n);
T stop = n.data<T>()[0];
framework::TensorCopy(*num_t, platform::CPUPlace(), &n);
int32_t num = n.data<int32_t>()[0];
PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0.");
out->Resize(framework::make_ddim({num}));
T* out_data = out->mutable_data<T>(context.GetPlace());
T step = 0;
if (num != 1) {
step = (stop - start) / (num - 1);
}
auto stream = context.cuda_device_context().stream();
int block = 512;
int grid = (num + block - 1) / block;
LinspaceKernel<T><<<grid, block, 0, stream>>>(start, step, num, out_data);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(linspace, ops::CUDALinspaceKernel<float>,
ops::CUDALinspaceKernel<double>);
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <functional>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle {
namespace operators {
template <typename T>
class CPULinspaceKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
T start = context.Input<framework::Tensor>("Start")->data<T>()[0];
T stop = context.Input<framework::Tensor>("Stop")->data<T>()[0];
int32_t num = context.Input<framework::Tensor>("Num")->data<int32_t>()[0];
auto* out = context.Output<framework::Tensor>("Out");
PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0.");
out->Resize(framework::make_ddim({num}));
T* out_data = out->mutable_data<T>(context.GetPlace());
if (num > 1) {
T step = (stop - start) / (num - 1);
T value = start;
for (int i = 0; i < num; ++i) {
out_data[i] = value;
value += step;
}
} else {
out_data[0] = start;
}
}
};
} // namespace operators
} // namespace paddle
......@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/lookup_table_op.h"
#include <memory>
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
#include "paddle/fluid/framework/var_type_inference.h"
namespace paddle {
......@@ -119,6 +123,29 @@ or not. And the output only shares the LoD information with input Ids.
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(LookupTableGradOpNoBuffer, "W");
class LookupTableGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("lookup_table_grad");
op->SetInput("W", Input("W"));
op->SetInput("Ids", Input("Ids"));
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetOutput(framework::GradVarName("W"), InputGrad("W"));
op->SetAttrMap(Attrs());
return op;
}
};
class LookupTableOpGrad : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -131,7 +158,8 @@ class LookupTableOpGrad : public framework::OperatorWithKernel {
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type = framework::GetDataTypeOfVar(ctx.InputVar("Out"));
auto data_type = framework::GetDataTypeOfVar(
ctx.InputVar(framework::GradVarName("Out")));
return framework::OpKernelType(data_type, ctx.device_context());
}
};
......@@ -159,10 +187,11 @@ class LookupTableOpGradVarTypeInference : public framework::VarTypeInference {
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(lookup_table, ops::LookupTableOp,
paddle::framework::DefaultGradOpDescMaker<true>,
ops::LookupTableOpMaker);
REGISTER_OPERATOR(lookup_table, ops::LookupTableOp, ops::LookupTableOpMaker,
ops::LookupTableGradOpDescMaker);
REGISTER_OPERATOR(lookup_table_grad, ops::LookupTableOpGrad,
ops::LookupTableGradOpNoBuffer,
ops::LookupTableOpGradVarTypeInference);
REGISTER_OP_CPU_KERNEL(lookup_table, ops::LookupTableKernel<float>,
......
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_all_op.h"
REGISTER_REDUCE_OP(reduce_all);
REGISTER_OP_CPU_KERNEL(reduce_all,
ops::ReduceKernel<paddle::platform::CPUDeviceContext,
bool, ops::AllFunctor>);
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_all_op.h"
REGISTER_OP_CUDA_KERNEL(reduce_all,
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
bool, ops::AllFunctor>);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
namespace paddle {
namespace operators {
struct AllFunctor {
template <typename DeviceContext, typename X, typename Y, typename Dim>
void operator()(const DeviceContext& place, X* x, Y* y, const Dim& dim) {
y->device(place) = x->all(dim);
}
};
} // namespace operators
} // namespace paddle
// Copyright (c) 2018 PaddlePaddle Authors. Any Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_any_op.h"
REGISTER_REDUCE_OP(reduce_any);
REGISTER_OP_CPU_KERNEL(reduce_any,
ops::ReduceKernel<paddle::platform::CPUDeviceContext,
bool, ops::AnyFunctor>);
// Copyright (c) 2018 PaddlePaddle Authors. Any Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/reduce_ops/reduce_any_op.h"
REGISTER_OP_CUDA_KERNEL(reduce_any,
ops::ReduceKernel<paddle::platform::CUDADeviceContext,
bool, ops::AnyFunctor>);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/operators/reduce_ops/reduce_op.h"
namespace paddle {
namespace operators {
struct AnyFunctor {
template <typename DeviceContext, typename X, typename Y, typename Dim>
void operator()(const DeviceContext& place, X* x, Y* y, const Dim& dim) {
y->device(place) = x->any(dim);
}
};
} // namespace operators
} // namespace paddle
......@@ -13,12 +13,18 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/space_to_depth_op.h"
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
class SpaceToDepthOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -100,6 +106,28 @@ class SpaceToDepthOpMaker : public framework::OpProtoAndCheckerMaker {
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(SpaceToDepthGradOpNoBuffer, "X");
class SpaceToDepthGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("space_to_depth_grad");
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetInput("X", Input("X"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs());
return op;
}
};
class SpaceToDepthGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -110,6 +138,14 @@ class SpaceToDepthGradOp : public framework::OperatorWithKernel {
"Input(Out@GRAD) shouldn't be null.");
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(
ctx.Input<Tensor>(framework::GradVarName("Out"))->type(),
ctx.GetPlace());
}
};
} // namespace operators
} // namespace paddle
......@@ -117,8 +153,9 @@ class SpaceToDepthGradOp : public framework::OperatorWithKernel {
namespace ops = paddle::operators;
REGISTER_OPERATOR(space_to_depth, ops::SpaceToDepthOp, ops::SpaceToDepthOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(space_to_depth_grad, ops::SpaceToDepthGradOp);
ops::SpaceToDepthGradOpDescMaker);
REGISTER_OPERATOR(space_to_depth_grad, ops::SpaceToDepthGradOp,
ops::SpaceToDepthGradOpNoBuffer);
REGISTER_OP_CPU_KERNEL(
space_to_depth,
ops::SpaceToDepthKernel<paddle::platform::CPUDeviceContext, float>,
......
......@@ -14,6 +14,10 @@ limitations under the License. */
#include "paddle/fluid/operators/squared_l2_distance_op.h"
#include <memory>
#include "paddle/fluid/framework/no_need_buffer_vars_inference.h"
namespace paddle {
namespace operators {
......@@ -54,6 +58,34 @@ class SquaredL2DistanceOp : public framework::OperatorWithKernel {
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE(SquaredL2DistanceGradOpNoBuffer, "X",
"Y");
class SquaredL2DistanceGradOpDescMaker
: public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("squared_l2_distance_grad");
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetInput("sub_result", Output("sub_result"));
op->SetInput("X", Input("X"));
op->SetInput("Y", Input("Y"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetOutput(framework::GradVarName("Y"), InputGrad("Y"));
op->SetAttrMap(Attrs());
return op;
}
};
class SquaredL2DistanceOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
......@@ -88,6 +120,7 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel {
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
"Gradient of Out should not be null");
PADDLE_ENFORCE(ctx->HasInput("sub_result"), "SubResult should not be null");
auto out_dims = ctx->GetInputDim(framework::GradVarName("Out"));
auto x_dims = ctx->GetInputDim("X");
auto y_dims = ctx->GetInputDim("Y");
......@@ -102,6 +135,13 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel {
if (ctx->HasOutput(x_grad_name)) ctx->SetOutputDim(x_grad_name, x_dims);
if (ctx->HasOutput(y_grad_name)) ctx->SetOutputDim(y_grad_name, y_dims);
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(ctx.Input<Tensor>("sub_result")->type(),
ctx.GetPlace());
}
};
} // namespace operators
......@@ -110,8 +150,9 @@ class SquaredL2DistanceGradOp : public framework::OperatorWithKernel {
namespace ops = paddle::operators;
REGISTER_OPERATOR(squared_l2_distance, ops::SquaredL2DistanceOp,
ops::SquaredL2DistanceOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
REGISTER_OPERATOR(squared_l2_distance_grad, ops::SquaredL2DistanceGradOp);
ops::SquaredL2DistanceGradOpDescMaker);
REGISTER_OPERATOR(squared_l2_distance_grad, ops::SquaredL2DistanceGradOp,
ops::SquaredL2DistanceGradOpNoBuffer);
REGISTER_OP_CPU_KERNEL(
squared_l2_distance,
ops::SquaredL2DistanceKernel<paddle::platform::CPUDeviceContext, float>);
......
......@@ -77,6 +77,9 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel<T> {
auto* x_g = context.Output<Tensor>(framework::GradVarName("X"));
auto* y_g = context.Output<Tensor>(framework::GradVarName("Y"));
PADDLE_ENFORCE_NOT_NULL(x_g);
PADDLE_ENFORCE_NOT_NULL(y_g);
auto sub_result = EigenMatrix<T>::From(*in0);
auto out_grad = EigenMatrix<T>::From(*in1);
......@@ -92,31 +95,28 @@ class SquaredL2DistanceGradKernel : public framework::OpKernel<T> {
// propagate back to input
auto& eigen_place =
*context.template device_context<DeviceContext>().eigen_device();
if (x_g) {
x_g->mutable_data<T>(context.GetPlace());
// eigen matrix
auto x_grad =
EigenMatrix<T>::From(*x_g, framework::make_ddim({x_dims[0], cols}));
// dimensions are same with subResult
x_grad.device(eigen_place) = grad_mat;
}
if (y_g) {
y_g->mutable_data<T>(context.GetPlace());
PADDLE_ENFORCE_GE(sub_result.dimensions()[0], y_dims[0],
"First dimension of gradient must be greater or "
"equal than first dimension of target.");
if (sub_result.dimensions()[0] == y_dims[0]) {
auto y_grad =
EigenMatrix<T>::From(*y_g, framework::make_ddim({y_dims[0], cols}));
y_grad.device(eigen_place) = -1 * grad_mat;
} else {
auto col_sum_res = -1 * (grad_mat.sum(Eigen::array<int, 1>({{0}})));
auto y_grad = EigenVector<T>::Flatten(*y_g);
y_grad.device(eigen_place) = col_sum_res;
}
x_g->mutable_data<T>(context.GetPlace());
// eigen matrix
auto x_grad =
EigenMatrix<T>::From(*x_g, framework::make_ddim({x_dims[0], cols}));
// dimensions are same with subResult
x_grad.device(eigen_place) = grad_mat;
y_g->mutable_data<T>(context.GetPlace());
PADDLE_ENFORCE_GE(sub_result.dimensions()[0], y_dims[0],
"First dimension of gradient must be greater or "
"equal than first dimension of target.");
if (sub_result.dimensions()[0] == y_dims[0]) {
auto y_grad =
EigenMatrix<T>::From(*y_g, framework::make_ddim({y_dims[0], cols}));
y_grad.device(eigen_place) = -1 * grad_mat;
} else {
auto col_sum_res = -1 * (grad_mat.sum(Eigen::array<int, 1>({{0}})));
auto y_grad = EigenVector<T>::Flatten(*y_g);
y_grad.device(eigen_place) = col_sum_res;
}
}
};
......
......@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/operators/squared_l2_norm_op.h"
#include <memory>
namespace paddle {
namespace operators {
......@@ -31,6 +33,26 @@ class SquaredL2NormOp : public framework::OperatorWithKernel {
}
};
class SquaredL2NormGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("squared_l2_norm_grad");
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetInput("X", Input("X"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs());
return op;
}
};
class SquaredL2NormGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -67,8 +89,7 @@ $$Out = \sum_{i} X_{i}^2$$
namespace ops = paddle::operators;
REGISTER_OPERATOR(squared_l2_norm, ops::SquaredL2NormOp,
ops::SquaredL2NormOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
ops::SquaredL2NormOpMaker, ops::SquaredL2NormGradOpDescMaker);
REGISTER_OPERATOR(squared_l2_norm_grad, ops::SquaredL2NormGradOp);
REGISTER_OP_CPU_KERNEL(
squared_l2_norm,
......
......@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/teacher_student_sigmoid_loss_op.h"
#include <memory>
#include "paddle/fluid/operators/math/math_function.h"
namespace paddle {
......@@ -55,6 +58,28 @@ class TeacherStudentSigmoidLossOp : public framework::OperatorWithKernel {
}
};
class TeacherStudentSigmoidLossGradOpDescMaker
: public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("teacher_student_sigmoid_loss_grad");
op->SetInput("X", Input("X"));
op->SetInput("Label", Input("Label"));
op->SetInput(framework::GradVarName("Y"), OutputGrad("Y"));
op->SetOutput(framework::GradVarName("X"), InputGrad("X"));
op->SetAttrMap(Attrs());
return op;
}
};
class TeacherStudentSigmoidLossGradientOp
: public framework::OperatorWithKernel {
public:
......@@ -148,7 +173,7 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR(teacher_student_sigmoid_loss,
ops::TeacherStudentSigmoidLossOp,
ops::TeacherStudentSigmoidLossOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
ops::TeacherStudentSigmoidLossGradOpDescMaker);
REGISTER_OPERATOR(teacher_student_sigmoid_loss_grad,
ops::TeacherStudentSigmoidLossGradientOp);
......
......@@ -13,6 +13,8 @@
// limitations under the License.
#include "paddle/fluid/operators/tree_conv_op.h"
#include <memory>
#include <string>
namespace paddle {
......@@ -86,6 +88,30 @@ class TreeConvOp : public framework::OperatorWithKernel {
}
};
class TreeConvGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("tree_conv_grad");
op->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
op->SetInput("Filter", Input("Filter"));
op->SetInput("EdgeSet", Input("EdgeSet"));
op->SetInput("NodesVector", Input("NodesVector"));
op->SetOutput(framework::GradVarName("NodesVector"),
InputGrad("NodesVector"));
op->SetOutput(framework::GradVarName("Filter"), InputGrad("Filter"));
op->SetAttrMap(Attrs());
return op;
}
};
class TreeConvGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -115,7 +141,7 @@ class TreeConvGradOp : public framework::OperatorWithKernel {
namespace ops = paddle::operators;
REGISTER_OPERATOR(tree_conv, ops::TreeConvOp, ops::TreeConvOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
ops::TreeConvGradOpDescMaker);
REGISTER_OPERATOR(tree_conv_grad, ops::TreeConvGradOp);
......
......@@ -14,6 +14,8 @@ limitations under the License. */
#include "paddle/fluid/operators/warpctc_op.h"
#include <memory>
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/cudnn_helper.h"
#endif
......@@ -118,6 +120,27 @@ http://machinelearning.wustl.edu/mlpapers/paper_files/icml2006_GravesFGS06.pdf).
}
};
class WarpCTCGradOpDescMaker : public framework::SingleGradOpDescMaker {
public:
using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
protected:
std::unique_ptr<framework::OpDesc> Apply() const override {
std::unique_ptr<framework::OpDesc> op(new framework::OpDesc());
op->SetType("warpctc_grad");
op->SetInput("WarpCTCGrad", Output("WarpCTCGrad"));
op->SetInput("Logits", Input("Logits"));
op->SetInput(framework::GradVarName("Loss"), OutputGrad("Loss"));
op->SetOutput(framework::GradVarName("Logits"), InputGrad("Logits"));
op->SetAttrMap(Attrs());
return op;
}
};
class WarpCTCGradOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
......@@ -145,7 +168,7 @@ class WarpCTCGradOp : public framework::OperatorWithKernel {
namespace ops = paddle::operators;
REGISTER_OPERATOR(warpctc, ops::WarpCTCOp, ops::WarpCTCOpMaker,
paddle::framework::DefaultGradOpDescMaker<true>);
ops::WarpCTCGradOpDescMaker);
REGISTER_OPERATOR(warpctc_grad, ops::WarpCTCGradOp);
REGISTER_OP_CPU_KERNEL(
warpctc, ops::WarpCTCKernel<paddle::platform::CPUDeviceContext, float>);
......
......@@ -1366,6 +1366,10 @@ All parameter, weight, gradient are variables in Paddle.
"cache_runtime_context",
[](const BuildStrategy &self) { return self.cache_runtime_context_; },
[](BuildStrategy &self, bool b) { self.cache_runtime_context_ = b; })
.def_property(
"cache_expected_kernel",
[](const BuildStrategy &self) { return self.cache_expected_kernel_; },
[](BuildStrategy &self, bool b) { self.cache_expected_kernel_ = b; })
.def("_finalize_strategy_and_create_passes",
[](BuildStrategy &self) -> std::shared_ptr<ir::PassBuilder> {
return self.CreatePassesFromStrategy(true);
......
......@@ -202,6 +202,7 @@ function cmake_gen() {
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DWITH_CONTRIB=${WITH_CONTRIB:-ON}
-DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON}
-DWITH_HIGH_LEVEL_API_TEST=${WITH_HIGH_LEVEL_API_TEST:-OFF}
-DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR}
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF}
-DANAKIN_BUILD_FAT_BIN=${ANAKIN_BUILD_FAT_BIN:OFF}
......@@ -234,6 +235,7 @@ EOF
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON \
-DWITH_CONTRIB=${WITH_CONTRIB:-ON} \
-DWITH_INFERENCE_API_TEST=${WITH_INFERENCE_API_TEST:-ON} \
-DWITH_HIGH_LEVEL_API_TEST=${WITH_HIGH_LEVEL_API_TEST:-OFF} \
-DINFERENCE_DEMO_INSTALL_DIR=${INFERENCE_DEMO_INSTALL_DIR} \
-DWITH_ANAKIN=${WITH_ANAKIN:-OFF} \
-DANAKIN_BUILD_FAT_BIN=${ANAKIN_BUILD_FAT_BIN:OFF}\
......@@ -291,8 +293,12 @@ function build() {
Building in /paddle/build ...
============================================
EOF
parallel_number=`nproc`
if [[ "$1" != "" ]]; then
parallel_number=$1
fi
make clean
make -j `nproc`
make -j ${parallel_number}
make install -j `nproc`
}
......@@ -737,9 +743,13 @@ function gen_fluid_lib() {
Generating fluid library for train and inference ...
========================================
EOF
parallel_number=`nproc`
if [[ "$1" != "" ]]; then
parallel_number=$1
fi
cmake .. -DWITH_DISTRIBUTE=OFF -DON_INFER=ON
make -j `nproc` fluid_lib_dist
make -j `nproc` inference_lib_dist
make -j ${parallel_number} fluid_lib_dist
make -j ${parallel_number} inference_lib_dist
}
function tar_fluid_lib() {
......@@ -770,11 +780,22 @@ EOF
function main() {
local CMD=$1
local parallel_number=$2
init
case $CMD in
build_only)
cmake_gen ${PYTHON_ABI:-""}
build ${parallel_number}
;;
build_and_check)
cmake_gen ${PYTHON_ABI:-""}
build ${parallel_number}
assert_api_not_changed ${PYTHON_ABI:-""}
assert_api_spec_approvals
;;
build)
cmake_gen ${PYTHON_ABI:-""}
build
build ${parallel_number}
gen_dockerfile ${PYTHON_ABI:-""}
;;
test)
......@@ -797,7 +818,7 @@ function main() {
;;
fluid_inference_lib)
cmake_gen ${PYTHON_ABI:-""}
gen_fluid_lib
gen_fluid_lib ${parallel_number}
tar_fluid_lib
test_fluid_lib
;;
......@@ -806,16 +827,16 @@ function main() {
;;
cicheck)
cmake_gen ${PYTHON_ABI:-""}
build
build ${parallel_number}
assert_api_not_changed ${PYTHON_ABI:-""}
run_test
gen_fluid_lib
gen_fluid_lib ${parallel_number}
test_fluid_lib
assert_api_spec_approvals
;;
cicheck_brpc)
cmake_gen ${PYTHON_ABI:-""}
build
build ${parallel_number}
run_brpc_test
;;
assert_api)
......@@ -823,7 +844,7 @@ function main() {
assert_api_spec_approvals
;;
test_inference)
gen_fluid_lib
gen_fluid_lib ${parallel_number}
test_fluid_lib
;;
assert_api_approvals)
......@@ -840,7 +861,7 @@ function main() {
;;
cicheck_py35)
cmake_gen ${PYTHON_ABI:-""}
build
build ${parallel_number}
run_test
assert_api_not_changed ${PYTHON_ABI:-""}
;;
......@@ -848,7 +869,7 @@ function main() {
cmake_gen ${PYTHON_ABI:-""}
;;
gen_fluid_lib)
gen_fluid_lib
gen_fluid_lib ${parallel_number}
;;
test_fluid_lib)
test_fluid_lib
......
......@@ -66,6 +66,8 @@ from . import compiler
from .compiler import *
from paddle.fluid.layers.math_op_patch import monkey_patch_variable
from . import install_check
from .dygraph.nn import *
from .dygraph.layers import *
Tensor = LoDTensor
......
......@@ -136,6 +136,7 @@ class DatasetBase(object):
slot_var.name = var.name
if var.lod_level == 0:
slot_var.is_dense = True
slot_var.shape.extend(var.shape)
if var.dtype == core.VarDesc.VarType.FP32:
slot_var.type = "float"
elif var.dtype == core.VarDesc.VarType.INT64:
......
......@@ -22,7 +22,7 @@ __all__ = ['enabled', 'guard', 'to_variable']
def enabled():
return framework._in_dygraph_mode()
return framework.in_dygraph_mode()
@signature_safe_contextmanager
......
......@@ -97,20 +97,12 @@ def load_persistables(vardict, dirname, filename=None):
Examples:
.. code-block:: python
my_layer = layer(fluid.dygraph.Layer)
my_layer = layer(fluid.Layer)
param_path = "./my_paddle_model"
param_dict = fluid.dygraph.load_persistables(my_layer.parameters(), param_path)
param_1 = param_dict['PtbModel_0.w_1']
or:
my_layer = layer(fluid.dygraph.Layer)
param_path = "./my_paddle_model"
filename = "model.file"
param_dict = fluid.dygraph.load_persistables(my_layer.state_dict(), param_path,
filename=filename)
param_1 = param_dict['PtbModel_0.w_1']
"""
if isinstance(vardict, collections.OrderedDict):
return _load_var_from_file(vardict, dirname, filename)
......
......@@ -16,7 +16,7 @@ from __future__ import print_function
import copy
import six
from ..framework import Parameter, _in_dygraph_mode
from ..framework import Parameter, in_dygraph_mode
from ..param_attr import ParamAttr
from .. import core
from six.moves import zip
......
......@@ -139,14 +139,14 @@ class Layer(core.Layer):
def clear_gradients(self):
for p in self.parameters():
p._clear_gradient()
p.clear_gradient()
def _build_once(self, *args):
def build_once(self, *args):
pass
def __call__(self, *inputs):
if not self._built:
self._build_once(*inputs)
self.build_once(*inputs)
outputs = self.forward(*inputs)
self._built = True
......
此差异已折叠。
......@@ -712,10 +712,6 @@ class Executor(object):
if dataset == None:
raise RuntimeError("dataset is needed and should be initialized")
if not isinstance(self.place, core.CPUPlace):
raise RuntimeError("infer_from_dataset is verified on CPUPlace"
"We will open CUDAPlace in the future")
scope, trainer = self._prepare_trainer(
program=program,
dataset=dataset,
......@@ -796,10 +792,6 @@ class Executor(object):
if dataset == None:
raise RuntimeError("dataset is need and should be initialized")
if not isinstance(self.place, core.CPUPlace):
raise RuntimeError("train_from_dataset is verified on CPUPlace"
"We will open CUDAPlace in the future")
scope, trainer = self._prepare_trainer(
program=program,
dataset=dataset,
......
......@@ -67,6 +67,7 @@ __all__ = [
'cuda_places',
'cpu_places',
'cuda_pinned_places',
'in_dygraph_mode',
]
EMPTY_VAR_NAME = core.kEmptyVarName()
......@@ -79,7 +80,10 @@ _dygraph_tracer_ = None
_dygraph_current_expected_place_ = None
def _in_dygraph_mode():
def in_dygraph_mode():
'''
Returns(bool): True if the program is running in dynamic graph mode
'''
return _dygraph_tracer_ is not None
......@@ -396,7 +400,7 @@ class Variable(object):
if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype)
if _in_dygraph_mode():
if in_dygraph_mode():
# record vars in tracer rather than blocks
self._ivar = kwargs.get("ivar", None)
if not self._ivar:
......@@ -482,21 +486,21 @@ class Variable(object):
self.block.vars[name] = self
self.op = None
self.stop_gradient = stop_gradient
self._stop_gradient = stop_gradient
self.is_data = is_data
def _numpy(self):
def numpy(self):
new_ivar = self._ivar._copy_to(core.CPUPlace(), True)
return np.array(new_ivar.value().get_tensor())
def _backward(self):
def backward(self):
self._ivar._run_backward()
def _gradient(self):
def gradient(self):
new_ivar = self._ivar._grad_ivar()._copy_to(core.CPUPlace(), True)
return np.array(new_ivar.value().get_tensor())
def _clear_gradient(self):
def clear_gradient(self):
self._ivar._clear_gradient()
def __str__(self):
......@@ -516,7 +520,7 @@ class Variable(object):
Returns:
str: The debug string.
"""
if _in_dygraph_mode():
if in_dygraph_mode():
# TODO(panyx0718): add more dygraph debug info.
return 'name %s, dtype: %s shape: %s' % (self.name, self.dtype,
self.shape)
......@@ -535,7 +539,7 @@ class Variable(object):
__repr__ = __str__
def _set_desc(self, input):
def set_desc(self, input):
"""
Set the variable description.
......@@ -548,43 +552,43 @@ class Variable(object):
self.desc = input
@property
def _stop_gradient(self):
if _in_dygraph_mode():
def stop_gradient(self):
if in_dygraph_mode():
return self._ivar.stop_gradient
else:
return self.stop_gradient
return self._stop_gradient
@_stop_gradient.setter
def _stop_gradient(self, s):
if _in_dygraph_mode():
@stop_gradient.setter
def stop_gradient(self, s):
if in_dygraph_mode():
self._ivar.stop_gradient = s
else:
self.stop_gradient = s
self._stop_gradient = s
@property
def persistable(self):
if _in_dygraph_mode():
if in_dygraph_mode():
return self._ivar.persistable
else:
return self.desc.persistable()
@persistable.setter
def persistable(self, p):
if _in_dygraph_mode():
if in_dygraph_mode():
return self._ivar.persistable
else:
self.desc.set_persistable(p)
@property
def name(self):
if _in_dygraph_mode():
if in_dygraph_mode():
return self._ivar.name
else:
return cpt.to_text(self.desc.name())
@name.setter
def name(self, new_name):
if _in_dygraph_mode():
if in_dygraph_mode():
self._ivar.name = new_name
else:
self.desc.set_name(new_name)
......@@ -592,14 +596,14 @@ class Variable(object):
@property
def shape(self):
# convert to tuple, make it as same as numpy API.
if _in_dygraph_mode():
if in_dygraph_mode():
return self._ivar.shape
else:
return tuple(self.desc.shape())
@property
def dtype(self):
if _in_dygraph_mode():
if in_dygraph_mode():
return self._ivar.dtype
else:
return self.desc.dtype()
......@@ -611,7 +615,7 @@ class Variable(object):
@property
def type(self):
if _in_dygraph_mode():
if in_dygraph_mode():
return self._ivar.dtype
else:
return self.desc.type()
......@@ -721,7 +725,7 @@ class Variable(object):
name=unique_name.generate(".".join(self.name)),
dtype=self.dtype,
persistable=self.persistable,
stop_gradient=self._stop_gradient, )
stop_gradient=self.stop_gradient, )
else:
return self
......@@ -930,7 +934,7 @@ class Operator(object):
inputs=None,
outputs=None,
attrs=None):
if _in_dygraph_mode():
if in_dygraph_mode():
if type is None:
raise ValueError(
"`type` to initialized an Operator can not be None.")
......@@ -1049,7 +1053,7 @@ class Operator(object):
for arg in out_args:
out_arg_names.append(cpt.to_text(arg.name))
# TODO(minqiyang): could we remove variable's op in static mode?
if not _in_dygraph_mode():
if not in_dygraph_mode():
arg.op = self
self.desc.set_output(out_proto.name, out_arg_names)
......@@ -1095,7 +1099,7 @@ class Operator(object):
@property
def type(self):
if _in_dygraph_mode():
if in_dygraph_mode():
return self.iop.type
else:
return self.desc.type()
......@@ -1638,7 +1642,7 @@ class Block(object):
Returns:
Operator: the append Operator.
"""
if _in_dygraph_mode():
if in_dygraph_mode():
op = Operator(
block=self,
desc=None,
......@@ -1710,7 +1714,7 @@ class Block(object):
return self.ops[start:end]
def _prepend_op(self, *args, **kwargs):
if _in_dygraph_mode():
if in_dygraph_mode():
op = Operator(
self,
None,
......
......@@ -165,7 +165,7 @@ class ConstantInitializer(Initializer):
'force_cpu': self._force_cpu or force_init_on_cpu()
},
stop_gradient=True)
if not framework._in_dygraph_mode():
if not framework.in_dygraph_mode():
var.op = op
return op
......@@ -245,7 +245,7 @@ class UniformInitializer(Initializer):
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
if not framework._in_dygraph_mode():
if not framework.in_dygraph_mode():
var.op = op
return op
......@@ -324,7 +324,7 @@ class NormalInitializer(Initializer):
outputs={"Out": var},
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
if not framework._in_dygraph_mode():
if not framework.in_dygraph_mode():
var.op = op
return op
......@@ -403,7 +403,7 @@ class TruncatedNormalInitializer(Initializer):
outputs={"Out": var},
attrs={"in_dtype": out_var.dtype,
"out_dtype": var.dtype})
if not framework._in_dygraph_mode():
if not framework.in_dygraph_mode():
var.op = op
return op
......@@ -509,7 +509,7 @@ class XavierInitializer(Initializer):
"seed": self._seed
},
stop_gradient=True)
if not framework._in_dygraph_mode():
if not framework.in_dygraph_mode():
var.op = op
return op
......@@ -610,7 +610,7 @@ class MSRAInitializer(Initializer):
"seed": self._seed
},
stop_gradient=True)
if not framework._in_dygraph_mode():
if not framework.in_dygraph_mode():
var.op = op
return op
......@@ -709,7 +709,7 @@ class BilinearInitializer(Initializer):
'shape': list(shape),
value_name: values
})
if not framework._in_dygraph_mode():
if not framework.in_dygraph_mode():
var.op = op
return op
......@@ -768,7 +768,7 @@ class NumpyArrayInitializer(Initializer):
value_name: values
},
stop_gradient=True)
if not framework._in_dygraph_mode():
if not framework.in_dygraph_mode():
var.op = op
return op
......
......@@ -17,7 +17,7 @@ from __future__ import print_function
import copy
import six
from .framework import Parameter, dtype_is_floating, _in_dygraph_mode
from .framework import Parameter, dtype_is_floating, in_dygraph_mode
from . import unique_name
from paddle.fluid.initializer import Constant, Xavier
from .param_attr import ParamAttr
......
......@@ -17,7 +17,7 @@ from __future__ import print_function
import copy
import numpy as np
from .framework import Variable, default_main_program, default_startup_program, _in_dygraph_mode, _current_expected_place
from .framework import Variable, default_main_program, default_startup_program, in_dygraph_mode, _current_expected_place
from . import unique_name
from .param_attr import ParamAttr, WeightNormParamAttr
from . import core
......@@ -54,7 +54,7 @@ class LayerHelperBase(object):
Return Variable construct from value
"""
if isinstance(value, np.ndarray):
assert _in_dygraph_mode(
assert in_dygraph_mode(
), "to_variable could only be called in dygraph mode"
if not block:
......@@ -302,7 +302,7 @@ class LayerHelperBase(object):
param = self._create_weight_normalize(attr, shape, dtype)
WeightNormParamAttr.params_with_weight_norm.append(param)
return param
if _in_dygraph_mode():
if in_dygraph_mode():
# In dygraph mode, we want the returned parameter to be
# initialized so that it can be used imperatively.
return self.main_program.global_block().create_parameter(
......@@ -370,7 +370,7 @@ class LayerHelperBase(object):
initializer: initializer to use
"""
assert isinstance(var, Variable)
if _in_dygraph_mode():
if in_dygraph_mode():
initializer(var, var.block)
else:
self.startup_program.global_block().create_var(
......
......@@ -35,8 +35,8 @@ from ..dygraph import learning_rate_scheduler as imperate_lr
__all__ = [
'exponential_decay', 'natural_exp_decay', 'inverse_time_decay',
'polynomial_decay', 'piecewise_decay', 'noam_decay', 'append_LARS',
'cosine_decay', 'linear_lr_warmup'
'polynomial_decay', 'piecewise_decay', 'noam_decay', 'cosine_decay',
'linear_lr_warmup'
]
......@@ -349,24 +349,26 @@ def cosine_decay(learning_rate, step_each_epoch, epochs):
training progresses. By using this function, the learning rate will be decayed by
following cosine decay strategy.
decayed_lr = learning_rate * 0.5 * (math.cos(epoch * math.pi / epochs) + 1)
.. math::
decayed\_lr = learning\_rate * 0.5 * (math.cos * (epoch * \\frac{math.pi}{epochs} ) + 1)
Args:
learning_rate(Variable|float): The initial learning rate.
step_each_epoch(int): the number of steps in an epoch.
epochs(int): the number of epochs.
Returns:
Variable: The decayed learning rate.
Examples:
Returns:
Variable: The decayed learning rate.
..code-block:: python
Examples:
.. code-block:: python
base_lr = 0.1
lr = fluid.layers.cosine_decay(
learning_rate = base_lr, step_each_epoch=10000, epochs=120)
base_lr = 0.1
lr = fluid.layers.cosine_decay(
learning_rate = base_lr, step_each_epoch=10000, epochs=120)
"""
with default_main_program()._lr_schedule_guard():
if imperative_base.enabled():
decay = imperate_lr.CosineDecay(learning_rate, step_each_epoch,
......@@ -381,50 +383,6 @@ def cosine_decay(learning_rate, step_each_epoch, epochs):
return decayed_lr
def append_LARS(params_grads, learning_rate, weight_decay):
"""
Applies LARS (LAYER-WISE ADAPTIVE RATE SCALING) to learning rate for
each layer.
Args:
learning_rate: A learning rate Variable. This
is the global learning rate for LARS.
weight_decay: A Python `float` number.
Returns:
The decayed learning rate
Examples:
.. code-block:: python
learning_rate *= local_gw_ratio * sqrt(sumsq(param))
/ (sqrt(sumsq(gradient))+ weight_decay * sqrt(sumsq(param)))
"""
assert not imperative_base.enabled(
), "append_LARS is NOT supported in dygraph mode now"
def _balanced_weight(param_norm, grad_norm):
if weight_decay == 1.0:
return grad_norm + param_norm
else:
return grad_norm + weight_decay * param_norm
for param, grad in params_grads:
with param.block.program.optimized_guard(
[param, grad]), name_scope("optimizer"):
param_lr = param.optimize_attr['learning_rate']
param_norm = ops.sqrt(nn.reduce_sum(input=ops.square(param)))
grad_norm = ops.sqrt(nn.reduce_sum(input=ops.square(grad)))
if type(param_lr) == float and param_lr == 1.0:
decayed_lr = learning_rate * param_norm \
/ _balanced_weight(param_norm, grad_norm)
else:
decayed_lr = learning_rate * param_lr * param_norm \
/ _balanced_weight(param_norm, grad_norm)
# set back param local learning rate
param.optimize_attr['learning_rate'] = decayed_lr
def linear_lr_warmup(learning_rate, warmup_steps, start_lr, end_lr):
"""
Applies linear learning rate warmup before the normal learning rate
......
......@@ -23,7 +23,7 @@ import os
import inspect
from ..layer_helper import LayerHelper
from ..initializer import Normal, Constant, NumpyArrayInitializer
from ..framework import Variable, OpProtoHolder, _in_dygraph_mode
from ..framework import Variable, OpProtoHolder, in_dygraph_mode
from ..dygraph import base
from ..param_attr import ParamAttr
from .layer_function_generator import autodoc, templatedoc, _generate_doc_string_
......@@ -73,6 +73,8 @@ __all__ = [
'reduce_max',
'reduce_min',
'reduce_prod',
'reduce_all',
'reduce_any',
'sequence_first_step',
'sequence_last_step',
'sequence_slice',
......@@ -159,6 +161,7 @@ __all__ = [
'sum',
'slice',
'shape',
'rank',
'logical_and',
'logical_or',
'logical_xor',
......@@ -482,7 +485,7 @@ def dynamic_lstm(input,
forward, _ = fluid.layers.dynamic_lstm(
input=forward_proj, size=hidden_dim * 4, use_peepholes=False)
"""
assert _in_dygraph_mode(
assert in_dygraph_mode(
) is not True, "please use lstm instead of dynamic_lstm in dygraph mode!"
assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
helper = LayerHelper('lstm', **locals())
......@@ -868,7 +871,7 @@ def dynamic_lstmp(input,
proj_activation="tanh")
"""
assert _in_dygraph_mode(
assert in_dygraph_mode(
) is not True, "please use lstm instead of dynamic_lstmp in dygraph mode!"
assert bias_attr is not False, "bias_attr should not be False in dynamic_lstmp."
......@@ -1042,7 +1045,7 @@ def dynamic_gru(input,
hidden = fluid.layers.dynamic_gru(input=x, size=hidden_dim)
"""
assert _in_dygraph_mode(
assert in_dygraph_mode(
) is not True, "please use gru instead of dynamic_gru in dygraph mode!"
helper = LayerHelper('gru', **locals())
......@@ -1761,7 +1764,7 @@ def sequence_conv(input,
Variable: output of sequence_conv
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_conv', **locals())
dtype = helper.input_dtype()
......@@ -1822,7 +1825,7 @@ def sequence_softmax(input, use_cudnn=False, name=None):
dtype='float32', lod_level=1)
x_sequence_softmax = fluid.layers.sequence_softmax(input=x)
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_softmax', **locals())
dtype = helper.input_dtype()
......@@ -2316,7 +2319,7 @@ def sequence_pool(input, pool_type, is_test=False):
last_x = fluid.layers.sequence_pool(input=x, pool_type='last')
first_x = fluid.layers.sequence_pool(input=x, pool_type='first')
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_pool', **locals())
dtype = helper.input_dtype()
......@@ -2357,7 +2360,7 @@ def sequence_concat(input, name=None):
out = fluid.layers.sequence_concat(input=[seq1, seq2, seq3])
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_concat', **locals())
out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
......@@ -2486,7 +2489,7 @@ def sequence_slice(input, offset, length, name=None):
subseqs = fluid.layers.sequence_slice(input=seqs, offset=offset,
length=length)
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper("sequence_slice", **locals())
dtype = helper.input_dtype()
......@@ -3308,7 +3311,7 @@ def layer_norm(input,
>>> dtype='float32')
>>> x = fluid.layers.layer_norm(input=data, begin_norm_axis=1)
"""
assert _in_dygraph_mode(
assert in_dygraph_mode(
) is not True, "please use FC instead of fc in dygraph mode!"
helper = LayerHelper('layer_norm', **locals())
dtype = helper.input_dtype()
......@@ -3947,7 +3950,7 @@ def sequence_expand(x, y, ref_level=-1, name=None):
dtype='float32', lod_level=1)
out = layers.sequence_expand(x=x, y=y, ref_level=0)
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_expand', input=x, **locals())
dtype = helper.input_dtype()
......@@ -4015,7 +4018,7 @@ def sequence_expand_as(x, y, name=None):
dtype='float32', lod_level=1)
out = layers.sequence_expand_as(x=x, y=y)
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_expand_as', input=x, **locals())
dtype = helper.input_dtype()
......@@ -4063,7 +4066,7 @@ def sequence_pad(x, pad_value, maxlen=None, name=None):
out = fluid.layers.sequence_pad(x=x, pad_value=pad_value)
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_pad', input=x, **locals())
dtype = helper.input_dtype()
......@@ -4131,7 +4134,7 @@ def sequence_unpad(x, length, name=None):
out = fluid.layers.sequence_unpad(x=x, length=len)
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_unpad', input=x, **locals())
dtype = helper.input_dtype()
......@@ -4739,6 +4742,106 @@ def reduce_prod(input, dim=None, keep_dim=False, name=None):
return out
def reduce_all(input, dim=None, keep_dim=False, name=None):
"""
Computes the ``logical and`` of tensor elements over the given dimension.
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (list|int|None): The dimension along which the logical and is computed.
If :attr:`None`, compute the logical and over all elements of
:attr:`input` and return a Tensor variable with a single element,
otherwise must be in the range :math:`[-rank(input), rank(input))`.
If :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`.
keep_dim (bool): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension
than the :attr:`input` unless :attr:`keep_dim` is true.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The reduced Tensor variable.
Examples:
.. code-block:: python
# x is a bool Tensor variable with following elements:
# [[True, False]
# [True, True]]
# Each example is followed by the correspending output tensor.
fluid.layers.reduce_all(x) # False
fluid.layers.reduce_all(x, dim=0) # [True, False]
fluid.layers.reduce_all(x, dim=-1) # [False, True]
fluid.layers.reduce_all(x, dim=1,
keep_dim=True) # [[False], [True]]
"""
helper = LayerHelper('reduce_all', **locals())
out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
if dim is not None and not isinstance(dim, list):
dim = [dim]
helper.append_op(
type='reduce_all',
inputs={'X': input},
outputs={'Out': out},
attrs={
'dim': dim if dim != None else [0],
'keep_dim': keep_dim,
'reduce_all': True if dim == None else False
})
return out
def reduce_any(input, dim=None, keep_dim=False, name=None):
"""
Computes the ``logical or`` of tensor elements over the given dimension.
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (list|int|None): The dimension along which the logical or is computed.
If :attr:`None`, compute the logical or over all elements of
:attr:`input` and return a Tensor variable with a single element,
otherwise must be in the range :math:`[-rank(input), rank(input))`.
If :math:`dim[i] < 0`, the dimension to reduce is :math:`rank + dim[i]`.
keep_dim (bool): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension
than the :attr:`input` unless :attr:`keep_dim` is true.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The reduced Tensor variable.
Examples:
.. code-block:: python
# x is a bool Tensor variable with following elements:
# [[True, False]
# [False, False]]
# Each example is followed by the correspending output tensor.
fluid.layers.reduce_any(x) # True
fluid.layers.reduce_any(x, dim=0) # [True, False]
fluid.layers.reduce_any(x, dim=-1) # [True, False]
fluid.layers.reduce_any(x, dim=1,
keep_dim=True) # [[True], [False]]
"""
helper = LayerHelper('reduce_any', **locals())
out = helper.create_variable_for_type_inference(dtype=helper.input_dtype())
if dim is not None and not isinstance(dim, list):
dim = [dim]
helper.append_op(
type='reduce_any',
inputs={'X': input},
outputs={'Out': out},
attrs={
'dim': dim if dim != None else [0],
'keep_dim': keep_dim,
'reduce_all': True if dim == None else False
})
return out
def split(input, num_or_sections, dim=-1, name=None):
"""
Split the input tensor into multiple sub-tensors.
......@@ -4820,7 +4923,7 @@ def l2_normalize(x, axis, epsilon=1e-12, name=None):
the dimension to normalization is rank(X) + axis. -1 is the
last dimension.
epsilon(float): The epsilon value is used to avoid division by zero, \
the defalut value is 1e-10.
the defalut value is 1e-12.
name(str|None): A name for this layer(optional). If set None, the layer \
will be named automatically.
......@@ -5306,7 +5409,7 @@ def sequence_reshape(input, new_dim):
x = fluid.layers.data(shape=[5, 20], dtype='float32', lod_level=1)
x_reshaped = fluid.layers.sequence_reshape(input=x, new_dim=10)
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_reshape', **locals())
out = helper.create_variable_for_type_inference(helper.input_dtype())
......@@ -5842,7 +5945,7 @@ def im2sequence(input,
input=layer, stride=[1, 1], filter_size=[2, 2])
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
if isinstance(filter_size, int):
......@@ -6486,7 +6589,7 @@ def squeeze(input, axes, name=None):
x = layers.data(name='x', shape=[5, 1, 10])
y = layers.sequeeze(input=x, axes=[1])
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"squeeze layer is not supported in dygraph mode yet.")
helper = LayerHelper("squeeze", **locals())
out = helper.create_variable_for_type_inference(dtype=input.dtype)
......@@ -7139,10 +7242,10 @@ def image_resize(input,
out_shape(list|tuple|Variable|None): Output shape of image resize
layer, the shape is (out_h, out_w).
Default: None
scale(float|None): The multiplier for the input height or width.
At least one of out_shape or scale must be set.
And out_shape has a higher priority than scale.
Default: None
scale(float|None): The multiplier for the input height or width. At
least one of :attr:`out_shape` or :attr:`scale` must be set.
And :attr:`out_shape` has a higher priority than :attr:`scale`.
Default: None.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
resample(str): The resample method. It supports 'BILINEAR' and 'NEAREST'
......@@ -7180,6 +7283,7 @@ def image_resize(input,
or 'NEAREST' currently.
ValueError: One of out_shape and scale must not be None.
ValueError: out_shape length should be 2.
ValueError: scale should be greater than zero.
TypeError: align_corners shoule be a bool value
ValueError: align_mode can only be '0' or '1'
......@@ -7211,26 +7315,36 @@ def image_resize(input,
def _is_list_or_turple_(data):
return (isinstance(data, list) or isinstance(data, tuple))
out_h = 0
out_w = 0
inputs = {"X": input}
attrs = {
"out_h": 0,
"out_w": 0,
"interp_method": resample_type,
"align_corners": align_corners,
"align_mode": align_mode
}
if out_shape is not None:
if isinstance(out_shape, Variable):
warnings.warn("out_shape as Variable type is deprecated, \
it is recommended to use actual_shape instead of \
out_shape to specify output shape dynamically.")
inputs['OutSize'] = out_shape
elif not (_is_list_or_turple_(out_shape)):
raise TypeError("out_shape should be a list or tuple or Variable.")
elif len(out_shape) != 2:
raise ValueError("out_shape length should be 2.")
out_shape = list(map(int, out_shape))
out_h = out_shape[0]
out_w = out_shape[1]
else:
if not (_is_list_or_turple_(out_shape)):
raise TypeError(
"out_shape should be a list or tuple or Variable.")
if len(out_shape) != 2:
raise ValueError("out_shape length should be 2.")
out_shape = list(map(int, out_shape))
attrs['out_h'] = out_shape[0]
attrs['out_w'] = out_shape[1]
else:
out_h = int(input.shape[2] * scale)
out_w = int(input.shape[3] * scale)
if scale <= 0:
raise ValueError("scale should be greater than zero.")
attrs['scale'] = float(scale)
if isinstance(actual_shape, Variable):
inputs["OutSize"] = actual_shape
......@@ -7242,13 +7356,7 @@ def image_resize(input,
type='{}_interp'.format(resample_type),
inputs=inputs,
outputs={"Out": out},
attrs={
"out_h": out_h,
"out_w": out_w,
"interp_method": resample_type,
"align_corners": align_corners,
"align_mode": align_mode
})
attrs=attrs)
return out
......@@ -7316,11 +7424,14 @@ def resize_bilinear(input,
Args:
input(${x_type}): ${x_comment}.
out_shape(${out_size_type}): ${out_size_comment}.
out_shape(list|tuple|Variable|None): Output shape of resize bilinear
layer, the shape is (out_h, out_w).
Default: None
scale(float|None): The multiplier for the input height or width. At
least one of out_shape or scale must be set. And out_shape has
a higher priority than scale. Default: None.
least one of :attr:`out_shape` or :attr:`scale` must be set.
And :attr:`out_shape` has a higher priority than :attr:`scale`.
Default: None.
name(str|None): The output variable name.
actual_shape(Variable): An optional input to specify output shape
......@@ -7407,11 +7518,14 @@ def resize_nearest(input,
Args:
input(${x_type}): ${x_comment}.
out_shape(${out_size_type}): ${out_size_comment}.
out_shape(list|tuple|Variable|None): Output shape of resize nearest
layer, the shape is (out_h, out_w).
Default: None
scale(float|None): The multiplier for the input height or width. At
least one of out_shape or scale must be set. And out_shape has
a higher priority than scale. Default: None.
least one of :attr:`out_shape` or :attr:`scale` must be set.
And :attr:`out_shape` has a higher priority than :attr:`scale`.
Default: None.
name(str|None): The output variable name.
actual_shape(Variable): An optional input to specify output shape
......@@ -7621,7 +7735,7 @@ def sequence_scatter(input, index, updates, name=None):
output = fluid.layers.sequence_scatter(input, index, updates)
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_scatter', **locals())
dtype = helper.input_dtype()
......@@ -8711,7 +8825,7 @@ def sequence_enumerate(input, win_size, pad_value=0, name=None):
x = fluid.layers.data(shape[30, 1], dtype='int32', lod_level=1)
out = fluid.layers.sequence_enumerate(input=x, win_size=3, pad_value=0)
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_enumerate', **locals())
out = helper.create_variable_for_type_inference(
......@@ -8752,7 +8866,7 @@ def sequence_mask(x, maxlen=None, dtype='int64', name=None):
Variable: The output sequence mask.
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper('sequence_mask', **locals())
......@@ -9227,11 +9341,37 @@ def shape(input):
return out
def rank(input):
"""
**Rank Layer**
Returns the number of dimensions for a tensor, which is a 0-D int32 Tensor.
Args:
input (Variable): The input variable.
Returns:
Variable: The rank of the input variable.
Examples:
.. code-block:: python
input = layers.data(
name="input", shape=[3, 100, 100], dtype="float32")
rank = layers.rank(input) # 4
"""
ndims = len(input.shape)
out = assign(np.array(ndims, 'int32'))
return out
def _elementwise_op(helper):
op_type = helper.layer_type
x = helper.kwargs.get('x', None)
y = helper.kwargs.get('y', None)
if _in_dygraph_mode():
if in_dygraph_mode():
x = base.to_variable(x)
y = base.to_variable(y)
......@@ -9804,7 +9944,7 @@ def sequence_reverse(x, name=None):
Returns:
out(${y_type}): ${y_comment}
"""
assert not _in_dygraph_mode(), (
assert not in_dygraph_mode(), (
"sequence layer is not supported in dygraph mode yet.")
helper = LayerHelper("sequence_reverse", **locals())
if name is None:
......@@ -10992,7 +11132,7 @@ def pixel_shuffle(x, upscale_factor):
Returns:
Out(Variable): the pixel shuffle result is a tensor variable with the same shape and the same type as the input.
Out(Variable): Reshaped tensor according to the new dimension.
Raises:
......
......@@ -24,26 +24,11 @@ from .layer_function_generator import templatedoc
import numpy
__all__ = [
'create_tensor',
'create_parameter',
'create_global_var',
'cast',
'tensor_array_to_tensor',
'concat',
'sums',
'assign',
'fill_constant_batch_size_like',
'fill_constant',
'argmin',
'argmax',
'argsort',
'ones',
'zeros',
'reverse',
'has_inf',
'has_nan',
'isfinite',
'range',
'create_tensor', 'create_parameter', 'create_global_var', 'cast',
'tensor_array_to_tensor', 'concat', 'sums', 'assign',
'fill_constant_batch_size_like', 'fill_constant', 'argmin', 'argmax',
'argsort', 'ones', 'zeros', 'reverse', 'has_inf', 'has_nan', 'isfinite',
'range', 'linspace'
]
......@@ -826,3 +811,45 @@ def range(start, end, step, dtype):
'Step': step},
outputs={'Out': [out]})
return out
def linspace(start, stop, num, dtype):
"""
Return fixed number of evenly spaced values within a given interval.
First entry is start, and last entry is stop. In the case when Num is 1, only Start is returned. Like linspace function of numpy.
Args:
start(float|Variable): First entry in the sequence. It is a float scalar, or a tensor of shape [1] with type 'float32'|'float64'.
stop(float|Variable): Last entry in the sequence. It is a float scalar, or a tensor of shape [1] with type 'float32'|'float64'.
num(int|Variable): Number of entry in the sequence. It is an int scalar, or a tensor of shape [1] with type int32.
dtype(string): 'float32'|'float64', the data type of the output tensor.
Returns:
Variable: The tensor variable storing a 1-D tensor.
Examples:
.. code-block:: python
data = fluid.layers.linspace(0, 10, 5, 'float32') # [0.0, 2.5, 5.0, 7.5, 10.0]
data = fluid.layers.linspace(0, 10, 1, 'float32') # [0.0]
"""
helper = LayerHelper("linspace", **locals())
if not isinstance(start, Variable):
start = fill_constant([1], dtype, start)
if not isinstance(stop, Variable):
stop = fill_constant([1], dtype, stop)
if not isinstance(num, Variable):
num = fill_constant([1], 'int32', num)
out = helper.create_variable_for_type_inference(dtype=start.dtype)
helper.append_op(
type='linspace',
inputs={'Start': start,
'Stop': stop,
'Num': num},
outputs={'Out': [out]})
return out
......@@ -55,7 +55,7 @@ class Optimizer(object):
"""
def __init__(self, learning_rate, regularization=None, name=None):
if framework._in_dygraph_mode():
if framework.in_dygraph_mode():
if not isinstance(learning_rate, float) and \
not isinstance(learning_rate, LearningRateDecay):
raise TypeError(
......@@ -205,7 +205,7 @@ class Optimizer(object):
name = self._name + "_" + name
if (name in self._accumulators and
param.name in self._accumulators[name]):
if framework._in_dygraph_mode():
if framework.in_dygraph_mode():
return self._accumulators[name][param.name]
raise Exception("Accumulator {} already exists for parameter {}".
format(name, param.name))
......@@ -363,7 +363,7 @@ class Optimizer(object):
See examples in `apply_gradients`.
"""
self._dtype = loss.dtype
if framework._in_dygraph_mode():
if framework.in_dygraph_mode():
if parameter_list is not None:
parameters = parameter_list
else:
......@@ -448,7 +448,7 @@ class Optimizer(object):
Returns:
list: A list of operators appended to the current program.
"""
if framework._in_dygraph_mode():
if framework.in_dygraph_mode():
with program_guard(framework.default_main_program(),
framework.default_startup_program()):
optimize_ops = self._create_optimization_pass(params_grads)
......@@ -628,16 +628,16 @@ class DGCMomentumOptimizer(MomentumOptimizer):
Original paper is https://arxiv.org/abs/1712.01887
DGC reduce the communication bandwidth by sending only the important gradients (sparse update):\
DGC reduces the communication bandwidth by sending only the important gradients (sparse update):\
only gradients larger than a threshold are transmitted.
To avoid losing information, DGC accumulate the rest of the gradients locally.
To avoid losing information, DGC accumulates the rest of the gradients locally.
Eventually, these gradients become large enough to be transmitted.
Thus, DGC send the large gradients immediately but eventually send all of the gradients over time.
Thus, DGC sends the large gradients immediately but eventually send all of the gradients over time.
To ensure no loss of accuracy, DGC employs momentum correc-tionandlocal gradient clipping on top of the gradient sparsification to maintain model performance.
To ensure no loss of accuracy, DGC employs momentum correction and local gradient clipping on top of the gradient sparsification to maintain model performance.
DGC also uses momentum factor masking and warmup training to overcome the staleness problem caused by reduced communication.
......@@ -652,7 +652,7 @@ class DGCMomentumOptimizer(MomentumOptimizer):
learning_rate (float|Variable): the learning rate used to update parameters. \
Can be a float value or a Variable with one float value as data element.
momentum (float): Momentum factor.
rampup_begin_step (int): The begining step from which gradient compression is implemented.
rampup_begin_step (int): The beginning step from which gradient compression is implemented.
rampup_step (int): How long it use the sparsity periods. Default is 1.
for example: If the sparsity is [0.75, 0.9375, 0.984375, 0.996, 0.999], and the rampup_step is 5, \
it will use 0.75 at 0 step, and 0.9375 at 1 step, and so on. And when reach sparsity array ends, \
......@@ -660,9 +660,9 @@ class DGCMomentumOptimizer(MomentumOptimizer):
sparsity (list[float]): Get top important element from gradient tensor, the ratio is (1 - current sparsity).
use_nesterov (bool): Enables Nesterov momentum. True means use nesterov.
local_grad_clip_norm (float): Clip norm value if needed.
num_trainers: The number of training node.
num_trainers: The number of training nodes.
regularization: A Regularizer, such as fluid.regularizer.L2DecayRegularizer.
name: A optional name prefix.
name: An optional name prefix.
Examples:
.. code-block:: python
......
......@@ -6,4 +6,6 @@ foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
add_subdirectory(high-level-api)
if(WITH_HIGH_LEVEL_API_TEST)
add_subdirectory(high-level-api)
endif()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*_new_api.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
# This test is buggy
# py_test(test_understand_sentiment_dynamic_rnn SRCS
# test_understand_sentiment_dynamic_rnn.py SERIAL)
LIST(REMOVE_ITEM TEST_OPS test_understand_sentiment_dynamic_rnn_new_api)
add_subdirectory(fit_a_line)
add_subdirectory(recognize_digits)
add_subdirectory(image_classification)
add_subdirectory(understand_sentiment)
add_subdirectory(label_semantic_roles)
add_subdirectory(word2vec)
add_subdirectory(recommender_system)
add_subdirectory(machine_translation)
if(NOT APPLE)
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
else()
foreach(src ${TEST_OPS})
if(${src} STREQUAL "test_image_classification_vgg_new_api")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif(${src} STREQUAL "test_image_classification_resnet_new_api")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif(${src} STREQUAL "test_recognize_digits_conv_new_api")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif(${src} STREQUAL "test_recognize_digits_mlp_new_api")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif()
py_test(${src} SRCS ${src}.py)
endif()
endforeach()
endif()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
if(NOT APPLE)
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
else()
foreach(src ${TEST_OPS})
if(${src} STREQUAL "test_image_classification_vgg")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif(${src} STREQUAL "test_image_classification_resnet")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif()
py_test(${src} SRCS ${src}.py)
endif()
endforeach()
endif()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
if(NOT APPLE)
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
else()
foreach(src ${TEST_OPS})
if(${src} STREQUAL "test_recognize_digits_conv")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
elseif(${src} STREQUAL "test_recognize_digits_mlp")
message(WARNING "These tests has been disabled in OSX for random fail: \n" ${src})
else()
py_test(${src} SRCS ${src}.py)
endif()
endforeach()
endif()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# This test is buggy
# py_test(test_understand_sentiment_dynamic_rnn SRCS
# test_understand_sentiment_dynamic_rnn.py SERIAL)
LIST(REMOVE_ITEM TEST_OPS test_understand_sentiment_dynamic_rnn)
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
file(GLOB TEST_OPS RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string(REPLACE ".py" "" TEST_OPS "${TEST_OPS}")
# default test
foreach(src ${TEST_OPS})
py_test(${src} SRCS ${src}.py)
endforeach()
......@@ -74,7 +74,6 @@ list(REMOVE_ITEM TEST_OPS test_dgc_op)
list(REMOVE_ITEM TEST_OPS test_dist_se_resnext_nccl)
list(REMOVE_ITEM TEST_OPS test_dist_transformer)
list(REMOVE_ITEM TEST_OPS test_parallel_executor_transformer)
list(REMOVE_ITEM TEST_OPS test_image_classification_resnet)
list(REMOVE_ITEM TEST_OPS test_bilinear_interp_op)
list(REMOVE_ITEM TEST_OPS test_nearest_interp_op)
list(REMOVE_ITEM TEST_OPS test_imperative_resnet)
......@@ -125,10 +124,6 @@ if(NOT WIN32)
py_test_modules(test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL)
endif()
if(NOT APPLE)
py_test_modules(test_image_classification_resnet MODULES test_image_classification_resnet SERIAL)
endif()
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
# change the timeout from 600 to 2200, because in debug mode, this test need more time.
set_tests_properties(test_parallel_executor_seresnext PROPERTIES TIMEOUT 2200)
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import six
def fake_imdb_reader(word_dict_size,
sample_num,
lower_seq_len=100,
upper_seq_len=200,
class_dim=2):
def __reader__():
for _ in six.moves.range(sample_num):
length = np.random.random_integers(
low=lower_seq_len, high=upper_seq_len, size=[1])[0]
ids = np.random.random_integers(
low=0, high=word_dict_size - 1, size=[length]).astype('int64')
label = np.random.random_integers(
low=0, high=class_dim - 1, size=[1]).astype('int64')[0]
yield ids, label
return __reader__
......@@ -29,7 +29,8 @@ __all__ = ['TestParallelExecutorBase']
class TestParallelExecutorBase(unittest.TestCase):
def check_network_convergence(self,
@classmethod
def check_network_convergence(cls,
method,
use_cuda=True,
memory_opt=True,
......
......@@ -18,7 +18,7 @@ import numpy as np
import paddle.fluid as fluid
class L1(fluid.dygraph.Layer):
class L1(fluid.Layer):
def __init__(self, prefix):
super(L1, self).__init__(prefix)
self._param_attr = fluid.ParamAttr(
......@@ -32,7 +32,7 @@ class L1(fluid.dygraph.Layer):
return self.w1 + self.w2
class L2(fluid.dygraph.Layer):
class L2(fluid.Layer):
def __init__(self, prefix):
super(L2, self).__init__(prefix)
self.layer1 = L1(self.full_name())
......@@ -42,7 +42,7 @@ class L2(fluid.dygraph.Layer):
return self.layer1() + self.layer2()
class L3(fluid.dygraph.Layer):
class L3(fluid.Layer):
def __init__(self, prefix):
super(L3, self).__init__(prefix)
self.layer1 = L2(self.full_name())
......@@ -59,7 +59,7 @@ class TestBaseLayer(unittest.TestCase):
ret = l()
self.assertEqual(l.w1.name, "test_one_level/L1_0.w_0")
self.assertEqual(l.w2.name, "test_one_level/L1_0.w_1")
self.assertTrue(np.allclose(ret._numpy(), 0.2 * np.ones([2, 2])))
self.assertTrue(np.allclose(ret.numpy(), 0.2 * np.ones([2, 2])))
def test_three_level(self):
with fluid.dygraph.guard():
......@@ -72,7 +72,7 @@ class TestBaseLayer(unittest.TestCase):
self.assertEqual(names[3], "test_three_level/L3_0/L2_0/L1_1.w_1")
self.assertEqual(names[4], "test_three_level/L3_0/L2_1/L1_0.w_0")
self.assertEqual(names[5], "test_three_level/L3_0/L2_1/L1_0.w_1")
self.assertTrue(np.allclose(ret._numpy(), 0.8 * np.ones([2, 2])))
self.assertTrue(np.allclose(ret.numpy(), 0.8 * np.ones([2, 2])))
if __name__ == '__main__':
......
......@@ -91,17 +91,26 @@ class TestBilinearInterpOp(OpTest):
self.op_type = "bilinear_interp"
input_np = np.random.random(self.input_shape).astype("float32")
output_np = bilinear_interp_np(input_np, self.out_h, self.out_w,
self.out_size, self.actual_shape,
self.align_corners, self.align_mode)
if self.scale > 0:
out_h = int(self.input_shape[2] * self.scale)
out_w = int(self.input_shape[3] * self.scale)
else:
out_h = self.out_h
out_w = self.out_w
output_np = bilinear_interp_np(input_np, out_h, out_w, self.out_size,
self.actual_shape, self.align_corners,
self.align_mode)
self.inputs = {'X': input_np}
if self.out_size is not None:
self.inputs['OutSize'] = self.out_size
if self.actual_shape is not None:
self.inputs['OutSize'] = self.actual_shape
self.attrs = {
'out_h': self.out_h,
'out_w': self.out_w,
'scale': self.scale,
'interp_method': self.interp_method,
'align_corners': self.align_corners,
'align_mode': self.align_mode
......@@ -119,6 +128,7 @@ class TestBilinearInterpOp(OpTest):
self.input_shape = [2, 3, 4, 4]
self.out_h = 2
self.out_w = 2
self.scale = 0.
self.out_size = np.array([3, 3]).astype("int32")
self.align_corners = True
self.align_mode = 1
......@@ -130,6 +140,7 @@ class TestBilinearInterpCase1(TestBilinearInterpOp):
self.input_shape = [4, 1, 7, 8]
self.out_h = 1
self.out_w = 1
self.scale = 0.
self.align_corners = True
self.align_mode = 1
......@@ -140,6 +151,7 @@ class TestBilinearInterpCase2(TestBilinearInterpOp):
self.input_shape = [3, 3, 9, 6]
self.out_h = 12
self.out_w = 12
self.scale = 0.
self.align_corners = True
self.align_mode = 1
......@@ -150,6 +162,7 @@ class TestBilinearInterpCase3(TestBilinearInterpOp):
self.input_shape = [1, 1, 128, 64]
self.out_h = 64
self.out_w = 128
self.scale = 0.
self.align_corners = True
self.align_mode = 1
......@@ -160,6 +173,7 @@ class TestBilinearInterpCase4(TestBilinearInterpOp):
self.input_shape = [4, 1, 7, 8]
self.out_h = 1
self.out_w = 1
self.scale = 0.
self.out_size = np.array([2, 2]).astype("int32")
self.align_corners = True
self.align_mode = 1
......@@ -171,6 +185,7 @@ class TestBilinearInterpCase5(TestBilinearInterpOp):
self.input_shape = [3, 3, 9, 6]
self.out_h = 12
self.out_w = 12
self.scale = 0.
self.out_size = np.array([11, 11]).astype("int32")
self.align_corners = True
self.align_mode = 1
......@@ -182,6 +197,7 @@ class TestBilinearInterpCase6(TestBilinearInterpOp):
self.input_shape = [1, 1, 128, 64]
self.out_h = 64
self.out_w = 128
self.scale = 0.
self.out_size = np.array([65, 129]).astype("int32")
self.align_corners = True
self.align_mode = 1
......@@ -193,6 +209,7 @@ class TestBilinearInterpActualShape(TestBilinearInterpOp):
self.input_shape = [3, 2, 32, 16]
self.out_h = 64
self.out_w = 32
self.scale = 0.
self.out_size = np.array([66, 40]).astype("int32")
self.align_corners = True
self.align_mode = 1
......@@ -206,15 +223,25 @@ class TestBilinearInterpOpUint8(OpTest):
self.op_type = "bilinear_interp"
input_np = np.random.randint(
low=0, high=256, size=self.input_shape).astype("uint8")
output_np = bilinear_interp_np(input_np, self.out_h, self.out_w,
self.out_size, self.actual_shape,
self.align_corners, self.align_mode)
if self.scale > 0:
out_h = int(self.input_shape[2] * self.scale)
out_w = int(self.input_shape[3] * self.scale)
else:
out_h = self.out_h
out_w = self.out_w
output_np = bilinear_interp_np(input_np, out_h, out_w, self.out_size,
self.actual_shape, self.align_corners,
self.align_mode)
self.inputs = {'X': input_np}
if self.out_size is not None:
self.inputs['OutSize'] = self.out_size
self.attrs = {
'out_h': self.out_h,
'out_w': self.out_w,
'scale': self.scale,
'interp_method': self.interp_method,
'align_corners': self.align_corners,
'align_mode': self.align_mode
......@@ -229,6 +256,7 @@ class TestBilinearInterpOpUint8(OpTest):
self.input_shape = [1, 3, 9, 6]
self.out_h = 10
self.out_w = 9
self.scale = 0.
self.align_corners = True
self.align_mode = 1
......@@ -239,6 +267,7 @@ class TestBilinearInterpCase1Uint8(TestBilinearInterpOpUint8):
self.input_shape = [2, 3, 128, 64]
self.out_h = 120
self.out_w = 50
self.scale = 0.
self.align_corners = True
self.align_mode = 1
......@@ -249,6 +278,7 @@ class TestBilinearInterpCase2Uint8(TestBilinearInterpOpUint8):
self.input_shape = [4, 1, 7, 8]
self.out_h = 5
self.out_w = 13
self.scale = 0.
self.out_size = np.array([6, 15]).astype("int32")
self.align_corners = True
self.align_mode = 1
......@@ -272,5 +302,38 @@ class TestBilinearInterpWithMethod3(TestBilinearInterpOp):
self.align_mode = 0
class TestBilinearInterpScale1(TestBilinearInterpOp):
def init_test_case(self):
self.interp_method = 'bilinear'
self.input_shape = [2, 3, 16, 32]
self.out_h = 60
self.out_w = 25
self.scale = 2.
self.align_corners = True
self.align_mode = 1
class TestBilinearInterpScale2(TestBilinearInterpOp):
def init_test_case(self):
self.interp_method = 'bilinear'
self.input_shape = [2, 3, 16, 32]
self.out_h = 60
self.out_w = 25
self.scale = 1.
self.align_corners = True
self.align_mode = 1
class TestBilinearInterpScale3(TestBilinearInterpOp):
def init_test_case(self):
self.interp_method = 'bilinear'
self.input_shape = [2, 3, 16, 32]
self.out_h = 60
self.out_w = 25
self.scale = 1.5
self.align_corners = True
self.align_mode = 1
if __name__ == "__main__":
unittest.main()
......@@ -19,7 +19,7 @@ import time
import six
import unittest
EPOCH_NUM = 60
EPOCH_NUM = 20
BATCH_SIZE = 32
CLASS_NUM = 10
......
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册