diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_client.cc b/paddle/fluid/distributed/ps/service/brpc_ps_client.cc index b8ccd8e744dab1b2dcb31551893aa0df0180fbbc..f86b4b706b3e246629ec944e06857b88d3cfaad8 100644 --- a/paddle/fluid/distributed/ps/service/brpc_ps_client.cc +++ b/paddle/fluid/distributed/ps/service/brpc_ps_client.cc @@ -414,6 +414,16 @@ std::future BrpcPsClient::load(uint32_t table_id, return send_cmd(table_id, PS_LOAD_ONE_TABLE, {epoch, mode}); } +std::future BrpcPsClient::Load(const LoadSaveContext &load_context) { + if (load_context.table_id < 0) { + return send_cmd(-1, PS_LOAD_ALL_TABLE, + {load_context.epoch, load_context.mode}); + } else { + return send_cmd(load_context.table_id, PS_LOAD_ONE_TABLE, + {load_context.epoch, load_context.mode}); + } +} + std::future BrpcPsClient::save(const std::string &epoch, const std::string &mode) { VLOG(1) << "BrpcPsClient::save path " << epoch; @@ -427,6 +437,19 @@ std::future BrpcPsClient::save(uint32_t table_id, return send_save_cmd(table_id, PS_SAVE_ONE_TABLE, {epoch, mode}); } +std::future BrpcPsClient::Save(const LoadSaveContext &save_context) { + if (save_context.table_id < 0) { + VLOG(1) << "BrpcPsClient::save path " << save_context.epoch; + return send_save_cmd(-1, PS_SAVE_ALL_TABLE, + {save_context.epoch, save_context.mode}); + } else { + VLOG(1) << "BrpcPsClient::save one table path " << save_context.epoch + << " table_id " << save_context.table_id; + return send_save_cmd(save_context.table_id, PS_SAVE_ONE_TABLE, + {save_context.epoch, save_context.mode}); + } +} + std::future BrpcPsClient::clear() { return send_cmd(-1, PS_CLEAR_ALL_TABLE, {}); } @@ -505,6 +528,44 @@ std::future BrpcPsClient::barrier(size_t table_id, return send_cmd(table_id, PS_BARRIER, {std::to_string(barrier_type)}); } +std::future BrpcPsClient::Pull(RequestContext &pull_context) { + if (pull_context.value_type == Dense) { // pull dense + Region *dense_region = + reinterpret_cast(pull_context.dense_values); + pull_dense(dense_region, pull_context.num, pull_context.table); + } else { // pull sparse + uint64_t *keys = reinterpret_cast(pull_context.keys); + float **select_values = + reinterpret_cast(pull_context.sparse_values); + size_t table_id = pull_context.table; + size_t num = pull_context.num; + bool is_training = pull_context.is_training; + if (pull_context.training_mode == Geo) { // for geo + pull_sparse_param(select_values, table_id, keys, num, is_training); + } else if (pull_context.training_mode == Async) { // for async + pull_sparse(select_values, table_id, keys, num, is_training); + } + } +} + +std::future BrpcPsClient::Push(RequestContext &push_context) { + if (push_context.value_type == Dense) { // push dense + const Region *dense_region = push_context.push_context.push_dense_values; + push_dense(dense_region, push_context.num, push_context.table); + } else { // push sparse + size_t table_id = push_context.table; + size_t num = push_context.num; + bool is_training = push_context.is_training; + if (push_context.training_mode == Geo) { // for geo + // TODO(zhaocaibei) + } else if (push_context.training_mode == Async) { // for async + const uint64_t *keys = push_context.push_context.keys; + const float **update_values = push_context.push_context.push_values; + push_sparse(table_id, keys, update_values, num); + } + } +} + std::future BrpcPsClient::pull_geo_param(size_t table_id, std::vector *values, std::vector *keys, diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_client.h b/paddle/fluid/distributed/ps/service/brpc_ps_client.h index 59ed59933db868ae4c05b69529a2c12fd0f689e2..8b0cb0741b4004fbad444a9919ec540289067f55 100644 --- a/paddle/fluid/distributed/ps/service/brpc_ps_client.h +++ b/paddle/fluid/distributed/ps/service/brpc_ps_client.h @@ -163,12 +163,17 @@ class BrpcPsClient : public PSClient { std::future load(uint32_t table_id, const std::string &epoch, const std::string &mode) override; + std::future Load(const LoadSaveContext &load_context) override; + std::future save(const std::string &epoch, const std::string &mode) override; std::future save(uint32_t table_id, const std::string &epoch, const std::string &mode) override; + virtual std::future Save( + const LoadSaveContext &save_context) override; + std::future clear() override; std::future clear(uint32_t table_id) override; @@ -199,6 +204,10 @@ class BrpcPsClient : public PSClient { const uint64_t *keys, size_t num, bool is_training); + virtual std::future Pull(RequestContext &pull_context) override; + + virtual std::future Push(RequestContext &push_context) override; + virtual std::future print_table_stat(uint32_t table_id); virtual std::future barrier(size_t table_id, uint32_t barrier_type); diff --git a/paddle/fluid/distributed/ps/service/brpc_ps_server.h b/paddle/fluid/distributed/ps/service/brpc_ps_server.h index 4310c247438ceb9bff541fdd21e00ff70ff7b4fd..d81a3a5df07f1de534cd646138fecc4dc2c970e1 100644 --- a/paddle/fluid/distributed/ps/service/brpc_ps_server.h +++ b/paddle/fluid/distributed/ps/service/brpc_ps_server.h @@ -51,7 +51,7 @@ class BrpcPsServer : public PSServer { _server.Join(); return 0; } - virtual int32_t port(); + int32_t port(); private: virtual int32_t initialize(); diff --git a/paddle/fluid/distributed/ps/service/graph_brpc_server.h b/paddle/fluid/distributed/ps/service/graph_brpc_server.h index aee0190850753786ce0f083257458caf50a63d26..a978d97b296b0a529a121fcfb9723639421d1e5e 100644 --- a/paddle/fluid/distributed/ps/service/graph_brpc_server.h +++ b/paddle/fluid/distributed/ps/service/graph_brpc_server.h @@ -43,7 +43,7 @@ class GraphBrpcServer : public PSServer { _server.Join(); return 0; } - virtual int32_t port(); + int32_t port(); std::condition_variable *export_cv() { return &cv_; } diff --git a/paddle/fluid/distributed/ps/service/ps_client.h b/paddle/fluid/distributed/ps/service/ps_client.h index 21719fbdbf1d64ad26ae0053b73812440ed08b66..8a2bfbe31602be299366fdcbeb264e45a5c4f703 100644 --- a/paddle/fluid/distributed/ps/service/ps_client.h +++ b/paddle/fluid/distributed/ps/service/ps_client.h @@ -26,6 +26,7 @@ #include "paddle/fluid/distributed/ps/service/sendrecv.pb.h" #include "paddle/fluid/distributed/ps/table/accessor.h" #include "paddle/fluid/distributed/ps/table/graph/graph_node.h" +#include "paddle/fluid/distributed/ps/table/table.h" #include "paddle/fluid/platform/timer.h" namespace paddle { @@ -59,6 +60,41 @@ class PSClientClosure : public google::protobuf::Closure { std::vector>> _promises; }; +struct LoadSaveContext { + int table_id; + std::string epoch; + std::string mode; +}; + +enum TrainingMode { Async = 0, Sync = 1, Geo = 3 }; + +enum TrainingPhase { Init = 0, Train = 1, Save = 2 }; + +// enum ValueType { +// Sparse = 0, +// Dense = 1 +// }; + +struct PushContext { + const uint64_t *keys; + const float **push_values; + const Region *push_dense_values; +}; + +struct RequestContext { + int table; + TrainingMode training_mode; // 1 for async, 2 for geo, 3 for sync + TrainingPhase training_phase; // 1 for init, 2 for train + ValueType value_type; // 1 for sparse, 2 for dense + void *keys; + void **sparse_values; // for sparse values + Region *dense_values; // for dense values + PushContext push_context; + size_t num; + bool is_training; + void *callback; +}; + class PSClient { public: PSClient() {} @@ -86,6 +122,9 @@ class PSClient { // 指定table数据load virtual std::future load(uint32_t table_id, const std::string &epoch, const std::string &mode) = 0; + // context配置load选项 + virtual std::future Load(const LoadSaveContext &load_context) = 0; + // 全量table数据save value_accessor根据mode,可能有不同的save条件 virtual std::future save(const std::string &epoch, const std::string &mode) = 0; @@ -93,6 +132,8 @@ class PSClient { virtual std::future save(uint32_t table_id, const std::string &epoch, const std::string &mode) = 0; + virtual std::future Save(const LoadSaveContext &save_context) = 0; + // 清空table数据 virtual std::future clear() = 0; virtual std::future clear(uint32_t table_id) = 0; @@ -107,6 +148,8 @@ class PSClient { virtual std::future pull_dense(Region *regions, size_t region_num, size_t table_id) = 0; // 保留 + virtual std::future Push(RequestContext &push_context) = 0; + // firstly push dense param for parameter server // this is neccessary because dense weight initialized in trainer on cold // start @@ -117,6 +160,9 @@ class PSClient { virtual std::future push_dense(const Region *regions, size_t region_num, size_t table_id) = 0; + + virtual std::future Pull(RequestContext &pull_context) = 0; + // 使用keys进行pull请求,结果填充values // keys和values的个数均为num个,每个value占用select_size空间 // future结束前keys和values缓冲区不能再次使用 diff --git a/paddle/fluid/distributed/ps/service/ps_local_client.cc b/paddle/fluid/distributed/ps/service/ps_local_client.cc index 972cce135f189bee6dbba9e0b89baa288816827b..9e364b6d3ed7aabe3cd3bc944e697e11ac808a33 100644 --- a/paddle/fluid/distributed/ps/service/ps_local_client.cc +++ b/paddle/fluid/distributed/ps/service/ps_local_client.cc @@ -56,6 +56,19 @@ int32_t PsLocalClient::initialize() { return done(); } +std::future PsLocalClient::Load(const LoadSaveContext& load_context) { + if (load_context.table_id < 0) { + for (auto& it : _table_map) { + load(it.first, load_context.epoch, load_context.mode); + } + return done(); + } else { + auto* table_ptr = table(load_context.table_id); + table_ptr->load(load_context.epoch, load_context.mode); + return done(); + } +} + ::std::future PsLocalClient::save(const std::string& epoch, const std::string& mode) { // TODO @@ -74,6 +87,21 @@ int32_t PsLocalClient::initialize() { return done(); } +::std::future PsLocalClient::Save( + const LoadSaveContext& save_context) { + if (save_context.table_id < 0) { + for (auto& it : _table_map) { + save(it.first, save_context.epoch, save_context.mode); + } + return done(); + } else { + auto* table_ptr = table(save_context.table_id); + table_ptr->flush(); + table_ptr->save(save_context.epoch, save_context.mode); + return done(); + } +} + ::std::future PsLocalClient::clear() { // TODO return done(); @@ -93,6 +121,51 @@ int32_t PsLocalClient::initialize() { return done(); } +::std::future PsLocalClient::Pull(RequestContext& pull_context) { + if (pull_context.value_type == Dense) { // pull dense + Region* dense_region = reinterpret_cast(pull_context.dense_values); + pull_dense(dense_region, pull_context.num, pull_context.table); + } else { // pull sparse + uint64_t* keys = reinterpret_cast(pull_context.keys); + char** select_values = reinterpret_cast(pull_context.sparse_values); + size_t table_id = pull_context.table; + size_t num = pull_context.num; + pull_sparse_ptr(select_values, table_id, keys, num); + } +} + +::std::future PsLocalClient::Push(RequestContext& push_context) { + if (push_context.value_type == Dense) { // push dense + if (push_context.training_phase == Init) { + const Region* regions = push_context.push_context.push_dense_values; + size_t region_num = push_context.num; + push_dense_param(regions, region_num, push_context.table); + } else { + if (push_context.training_mode == Geo) { // geo + float* total_send_data = + reinterpret_cast(push_context.dense_values); + size_t total_send_data_size = push_context.num; + push_dense_raw_gradient(push_context.table, total_send_data, + total_send_data_size, push_context.callback); + } else { // async and sync + const Region* regions = push_context.push_context.push_dense_values; + size_t region_num = push_context.num; + push_dense(regions, region_num, push_context.table); + } + } + } else { // push sparse + if (push_context.training_mode == Async) { + const uint64_t* keys = push_context.push_context.keys; + const float** update_values = push_context.push_context.push_values; + size_t table_id = push_context.table; + size_t num = push_context.num; + push_sparse(table_id, keys, update_values, num); + } else { + // TODO + } + } +} + ::std::future PsLocalClient::pull_dense(Region* regions, size_t region_num, size_t table_id) { diff --git a/paddle/fluid/distributed/ps/service/ps_local_client.h b/paddle/fluid/distributed/ps/service/ps_local_client.h index e73974ac562861d86e679ddbc213335d10731281..83ca558e3d2cb1f62235cda06c221b0d9367b043 100644 --- a/paddle/fluid/distributed/ps/service/ps_local_client.h +++ b/paddle/fluid/distributed/ps/service/ps_local_client.h @@ -39,12 +39,16 @@ class PsLocalClient : public PSClient { virtual ::std::future load(uint32_t table_id, const std::string& epoch, const std::string& mode) override; + virtual std::future Load( + const LoadSaveContext& load_context) override; virtual ::std::future save(const std::string& epoch, const std::string& mode) override; virtual ::std::future save(uint32_t table_id, const std::string& epoch, const std::string& mode) override; + virtual std::future Save( + const LoadSaveContext& save_context) override; virtual ::std::future clear() override; virtual ::std::future clear(uint32_t table_id) override; @@ -55,6 +59,10 @@ class PsLocalClient : public PSClient { virtual ::std::future pull_dense(Region* regions, size_t region_num, size_t table_id); + virtual ::std::future Pull(RequestContext& pull_context) override; + + virtual ::std::future Push(RequestContext& push_context) override; + virtual ::std::future push_dense(const Region* regions, size_t region_num, size_t table_id); diff --git a/paddle/fluid/distributed/ps/service/ps_local_server.h b/paddle/fluid/distributed/ps/service/ps_local_server.h index 91f8bc4c9127115c9b5595270973d011778c6262..31b52126fc5767b445dfb605ff46b3fbc63c620c 100644 --- a/paddle/fluid/distributed/ps/service/ps_local_server.h +++ b/paddle/fluid/distributed/ps/service/ps_local_server.h @@ -28,7 +28,6 @@ class PsLocalServer : public PSServer { virtual uint64_t start() { return 0; } virtual uint64_t start(const std::string &ip, uint32_t port) { return 0; } virtual int32_t stop() { return 0; } - virtual int32_t port() { return 0; } virtual int32_t configure( const PSParameter &config, PSEnvironment &env, size_t server_rank, const std::vector &server_sub_program = {}) { diff --git a/paddle/fluid/distributed/ps/service/server.cc b/paddle/fluid/distributed/ps/service/server.cc index 5f1974e3e610c6772457514759bff83db944bf52..893f671359e40ce632185c78bade16404d23afc0 100644 --- a/paddle/fluid/distributed/ps/service/server.cc +++ b/paddle/fluid/distributed/ps/service/server.cc @@ -67,8 +67,6 @@ int32_t PSServer::configure( _config = config.server_param(); _rank = server_rank; _environment = &env; - _shuffled_ins = - paddle::framework::MakeChannel>(); size_t shard_num = env.get_ps_servers().size(); const auto &downpour_param = _config.downpour_server_param(); diff --git a/paddle/fluid/distributed/ps/service/server.h b/paddle/fluid/distributed/ps/service/server.h index 160d4a612829531d619c69a0cd5e9cd091f94868..d2804405b41989cbd9b5bed0afaf6d481d0658db 100644 --- a/paddle/fluid/distributed/ps/service/server.h +++ b/paddle/fluid/distributed/ps/service/server.h @@ -69,11 +69,6 @@ class PSServer { const PSParameter &config, PSEnvironment &env, size_t server_rank, const std::vector &server_sub_program = {}); - // return server_ip - virtual std::string ip() { return butil::my_ip_cstr(); } - // return server_port - virtual int32_t port() = 0; - virtual uint64_t start(const std::string &ip, uint32_t port) = 0; virtual int32_t stop() = 0; @@ -94,15 +89,6 @@ class PSServer { return &_table_map; } - typedef std::function MsgHandlerFunc; - virtual int registe_pserver2pserver_msg_handler(int msg_type, - MsgHandlerFunc handler) { - _msg_handler_map[msg_type] = handler; - return 0; - } - - paddle::framework::Channel> _shuffled_ins; - protected: virtual int32_t initialize() = 0; @@ -111,7 +97,6 @@ class PSServer { ServerParameter _config; PSEnvironment *_environment; std::unordered_map> _table_map; - std::unordered_map _msg_handler_map; protected: std::shared_ptr scope_; diff --git a/paddle/fluid/distributed/ps/table/accessor.h b/paddle/fluid/distributed/ps/table/accessor.h index 7c91a6086498037e56f9b89dc13243cfeb827c5c..07c211bb9c12866e3646a0dbdebfba189eb2507e 100644 --- a/paddle/fluid/distributed/ps/table/accessor.h +++ b/paddle/fluid/distributed/ps/table/accessor.h @@ -45,6 +45,17 @@ struct DataConverter { std::string deconverter; }; +struct AccessorInfo { + size_t dim; + size_t size; + size_t select_size; + size_t select_dim; + size_t update_size; + size_t update_dim; + size_t mf_size; + size_t fea_dim; +}; + class ValueAccessor { public: ValueAccessor() {} @@ -68,6 +79,8 @@ class ValueAccessor { } virtual int initialize() = 0; + virtual void GetTableInfo(AccessorInfo& info) = 0; + // value维度 virtual size_t dim() = 0; // value各个维度的size @@ -163,6 +176,7 @@ class ValueAccessor { TableAccessorParameter _config; std::unordered_map> _data_coverter_map; + AccessorInfo _accessor_info; }; REGISTER_PSCORE_REGISTERER(ValueAccessor); } // namespace distributed diff --git a/paddle/fluid/distributed/ps/table/common_dense_table.cc b/paddle/fluid/distributed/ps/table/common_dense_table.cc index 607469e2f7b0d5df79d4cb7477e0eaa3f4a8323a..cc0f5867a3d651bca9323452d1eb97355de4c160 100644 --- a/paddle/fluid/distributed/ps/table/common_dense_table.cc +++ b/paddle/fluid/distributed/ps/table/common_dense_table.cc @@ -128,6 +128,21 @@ int32_t CommonDenseTable::set_global_lr(float* lr) { return 0; } +int32_t CommonDenseTable::Pull(TableContext& context) { + CHECK(context.value_type == Dense); + float* pull_values = context.pull_context.values; + return pull_dense(pull_values, context.num); +} + +int32_t CommonDenseTable::Push(TableContext& context) { + CHECK(context.value_type == Dense); + if (context.pull_context.values != nullptr) { + const float* values = context.push_context.values; + return push_dense(values, context.num); + } + return 0; +} + int32_t CommonDenseTable::pull_dense(float* pull_values, size_t num) { std::copy(values_[param_idx_].begin(), values_[param_idx_].end(), pull_values); diff --git a/paddle/fluid/distributed/ps/table/common_dense_table.h b/paddle/fluid/distributed/ps/table/common_dense_table.h index a4c0f29ddb8770c8adc0d6885929aaac8a028e90..cad49a0a449c4735a74261574436a78789694d9b 100644 --- a/paddle/fluid/distributed/ps/table/common_dense_table.h +++ b/paddle/fluid/distributed/ps/table/common_dense_table.h @@ -40,6 +40,8 @@ class CommonDenseTable : public DenseTable { const std::string& name); virtual int32_t initialize_value(); virtual int32_t initialize_optimizer(); + virtual int32_t Pull(TableContext& context); + virtual int32_t Push(TableContext& context); int32_t pull_dense(float* pull_values, size_t num) override; int32_t push_dense_param(const float* values, size_t num) override; int32_t push_dense(const float* values, size_t num) override; diff --git a/paddle/fluid/distributed/ps/table/common_graph_table.h b/paddle/fluid/distributed/ps/table/common_graph_table.h index 7946569525cc4bb1351046632dfe5894611c4b67..f6f127621b947c41122f7803a90f39b640713b8e 100644 --- a/paddle/fluid/distributed/ps/table/common_graph_table.h +++ b/paddle/fluid/distributed/ps/table/common_graph_table.h @@ -454,6 +454,9 @@ class GraphTable : public SparseTable { int32_t get_server_index_by_id(int64_t id); Node *find_node(int64_t id); + virtual int32_t Pull(TableContext &context) { return 0; } + virtual int32_t Push(TableContext &context) { return 0; } + virtual int32_t pull_sparse(float *values, const PullSparseValue &pull_value) { return 0; diff --git a/paddle/fluid/distributed/ps/table/common_sparse_table.cc b/paddle/fluid/distributed/ps/table/common_sparse_table.cc index b44d08b937a96c806142f5d7f1ba2ae0bcdb0f5e..45be53335e1a181f7c1e2abb7326ac6b9800703f 100644 --- a/paddle/fluid/distributed/ps/table/common_sparse_table.cc +++ b/paddle/fluid/distributed/ps/table/common_sparse_table.cc @@ -355,6 +355,32 @@ int32_t CommonSparseTable::pour() { return 0; } +int32_t CommonSparseTable::Pull(TableContext& context) { + CHECK(context.value_type == Sparse); + if (context.use_ptr) { + char** pull_values = context.pull_context.ptr_values; + const uint64_t* keys = context.pull_context.keys; + return pull_sparse_ptr(pull_values, keys, context.num); + } else { + float* pull_values = context.pull_context.values; + const PullSparseValue& pull_value = context.pull_context.pull_value; + return pull_sparse(pull_values, pull_value); + } +} + +int32_t CommonSparseTable::Push(TableContext& context) { + CHECK(context.value_type == Sparse); + if (context.pull_context.values != nullptr) { + const float* values = context.push_context.values; + const uint64_t* keys = context.push_context.keys; + return push_sparse(keys, values, context.num); + } else { + const float** values = context.push_context.ptr_values; + const uint64_t* keys = context.push_context.keys; + return push_sparse(keys, values, context.num); + } +} + int32_t CommonSparseTable::pull_sparse(float* pull_values, const PullSparseValue& pull_value) { auto shard_num = task_pool_size_; diff --git a/paddle/fluid/distributed/ps/table/common_sparse_table.h b/paddle/fluid/distributed/ps/table/common_sparse_table.h index 82481dcd584e42b9b2bca1bcc5862b361e372b05..138c5447420663eae5ad94ea03a84360a46f8b3d 100644 --- a/paddle/fluid/distributed/ps/table/common_sparse_table.h +++ b/paddle/fluid/distributed/ps/table/common_sparse_table.h @@ -121,6 +121,9 @@ class CommonSparseTable : public SparseTable { virtual int32_t push_dense(const float* values, size_t num) { return 0; } // unused method end + virtual int32_t Pull(TableContext& context); + virtual int32_t Push(TableContext& context); + virtual int32_t initialize(); virtual int32_t initialize_shard() { return 0; } virtual int32_t initialize_value(); diff --git a/paddle/fluid/distributed/ps/table/common_table.h b/paddle/fluid/distributed/ps/table/common_table.h index bac826dfe0e20b42d5cc47467356bc5614383a44..3d291c0152246bffa748ea57cf1c96eff6f2f343 100644 --- a/paddle/fluid/distributed/ps/table/common_table.h +++ b/paddle/fluid/distributed/ps/table/common_table.h @@ -119,6 +119,9 @@ class BarrierTable : public Table { virtual void *get_shard(size_t shard_idx) { return 0; } + virtual int32_t Pull(TableContext &context) { return 0; } + virtual int32_t Push(TableContext &context) { return 0; } + int32_t pull_dense(float *values, size_t num) override { return 0; } int32_t push_dense(const float *values, size_t num) override { return 0; } diff --git a/paddle/fluid/distributed/ps/table/ctr_accessor.cc b/paddle/fluid/distributed/ps/table/ctr_accessor.cc index 866bd8114ccea329123e16585c33366e759d5df8..43e143dca901bb8264f666a1e4fd89a52102d894 100644 --- a/paddle/fluid/distributed/ps/table/ctr_accessor.cc +++ b/paddle/fluid/distributed/ps/table/ctr_accessor.cc @@ -38,6 +38,16 @@ int CtrCommonAccessor::initialize() { return 0; } +void CtrCommonAccessor::GetTableInfo(AccessorInfo& info) { + info.dim = dim(); + info.size = size(); + info.select_dim = select_dim(); + info.select_size = select_size(); + info.update_dim = update_dim(); + info.update_size = update_size(); + info.fea_dim = fea_dim(); +} + size_t CtrCommonAccessor::dim() { return common_feature_value.dim(); } size_t CtrCommonAccessor::dim_size(size_t dim) { diff --git a/paddle/fluid/distributed/ps/table/ctr_accessor.h b/paddle/fluid/distributed/ps/table/ctr_accessor.h index 1e31fec04649b19882269fa9cce5f5d7fb4978c1..bc46217955a8a677a9e5e16f740e2636d633908f 100644 --- a/paddle/fluid/distributed/ps/table/ctr_accessor.h +++ b/paddle/fluid/distributed/ps/table/ctr_accessor.h @@ -126,6 +126,7 @@ class CtrCommonAccessor : public ValueAccessor { virtual int initialize(); virtual ~CtrCommonAccessor() {} + virtual void GetTableInfo(AccessorInfo& info); // value维度 virtual size_t dim(); // value各个维度的size diff --git a/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc b/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc index b07bcf70ad7af416fc66e036c0061f9556cc4eae..bccf1fdebafa03442047048825ef85207711b6b3 100644 --- a/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc +++ b/paddle/fluid/distributed/ps/table/ctr_double_accessor.cc @@ -37,6 +37,16 @@ int DownpourCtrDoubleAccessor::initialize() { return 0; } +void DownpourCtrDoubleAccessor::GetTableInfo(AccessorInfo& info) { + info.dim = dim(); + info.size = size(); + info.select_dim = select_dim(); + info.select_size = select_size(); + info.update_dim = update_dim(); + info.update_size = update_size(); + info.fea_dim = fea_dim(); +} + size_t DownpourCtrDoubleAccessor::dim() { auto embedx_dim = _config.embedx_dim(); return DownpourCtrDoubleFeatureValue::dim(embedx_dim); diff --git a/paddle/fluid/distributed/ps/table/ctr_double_accessor.h b/paddle/fluid/distributed/ps/table/ctr_double_accessor.h index d7c717ace098821c3434888c9ff0ad699c923867..d7942634e86003c484710aad1d969e4d6371cb7f 100644 --- a/paddle/fluid/distributed/ps/table/ctr_double_accessor.h +++ b/paddle/fluid/distributed/ps/table/ctr_double_accessor.h @@ -168,6 +168,7 @@ class DownpourCtrDoubleAccessor : public ValueAccessor { DownpourCtrDoubleAccessor() {} virtual ~DownpourCtrDoubleAccessor() {} virtual int initialize(); + virtual void GetTableInfo(AccessorInfo& info); // value维度 virtual size_t dim(); // value各个维度的size diff --git a/paddle/fluid/distributed/ps/table/depends/sparse_utils.h b/paddle/fluid/distributed/ps/table/depends/sparse_utils.h index 708f7786bf3b0975791fcc74dddf62d2eb01e450..98e0250acc4d686dbde561ffb03edeb96444c406 100644 --- a/paddle/fluid/distributed/ps/table/depends/sparse_utils.h +++ b/paddle/fluid/distributed/ps/table/depends/sparse_utils.h @@ -58,7 +58,7 @@ struct PullSparseValue { std::vector* offset_shard) const { offset_shard->reserve(numel_ / shard_num + 1); for (int x = 0; x < numel_; ++x) { - if (feasigns_[x] % shard_num == shard_id) { + if (int(feasigns_[x] % shard_num) == shard_id) { offset_shard->push_back(x); } } diff --git a/paddle/fluid/distributed/ps/table/downpour_ctr_accessor.cc b/paddle/fluid/distributed/ps/table/downpour_ctr_accessor.cc index 5f22c3a436f1f0b89e6289844a9c56fbe888625d..e8ca7430351de7cbdc1e98607d6d9b884b6a376a 100644 --- a/paddle/fluid/distributed/ps/table/downpour_ctr_accessor.cc +++ b/paddle/fluid/distributed/ps/table/downpour_ctr_accessor.cc @@ -37,6 +37,16 @@ int DownpourCtrAccessor::initialize() { return 0; } +void DownpourCtrAccessor::GetTableInfo(AccessorInfo& info) { + info.dim = dim(); + info.size = size(); + info.select_dim = select_dim(); + info.select_size = select_size(); + info.update_dim = update_dim(); + info.update_size = update_size(); + info.fea_dim = fea_dim(); +} + size_t DownpourCtrAccessor::dim() { auto embedx_dim = _config.embedx_dim(); return DownpourCtrFeatureValue::dim(embedx_dim); diff --git a/paddle/fluid/distributed/ps/table/downpour_ctr_accessor.h b/paddle/fluid/distributed/ps/table/downpour_ctr_accessor.h index 5de7b12e01f0d6e619ee14b852b7aa308ec3b497..11991ad044ff63353c9a898469ec915163c2dea9 100644 --- a/paddle/fluid/distributed/ps/table/downpour_ctr_accessor.h +++ b/paddle/fluid/distributed/ps/table/downpour_ctr_accessor.h @@ -160,6 +160,7 @@ class DownpourCtrAccessor : public ValueAccessor { virtual ~DownpourCtrAccessor() {} virtual int initialize(); + virtual void GetTableInfo(AccessorInfo& info); // value维度 virtual size_t dim(); // value各个维度的size diff --git a/paddle/fluid/distributed/ps/table/memory_sparse_geo_table.h b/paddle/fluid/distributed/ps/table/memory_sparse_geo_table.h index 89c4fc15ae27998da3a3c7c3092baa9eee9846a0..3b43f99543fddabfaa24fc7da562203fc3f0d633 100644 --- a/paddle/fluid/distributed/ps/table/memory_sparse_geo_table.h +++ b/paddle/fluid/distributed/ps/table/memory_sparse_geo_table.h @@ -48,6 +48,8 @@ class MemorySparseGeoTable : public SparseTable { virtual int32_t save(const std::string& path, const std::string& param) { return 0; } + virtual int32_t Pull(TableContext& context) { return 0; } + virtual int32_t Push(TableContext& context) { return 0; } virtual int32_t flush() { return 0; } virtual int32_t shrink(const std::string& param) { return 0; } virtual void clear() { return; } diff --git a/paddle/fluid/distributed/ps/table/memory_sparse_table.cc b/paddle/fluid/distributed/ps/table/memory_sparse_table.cc index 7ce6e9005cf56ca295a6620a209551e303c112f3..98454ca747d314d76bb63706e853ded835df736a 100644 --- a/paddle/fluid/distributed/ps/table/memory_sparse_table.cc +++ b/paddle/fluid/distributed/ps/table/memory_sparse_table.cc @@ -390,6 +390,26 @@ std::pair MemorySparseTable::print_table_stat() { return {feasign_size, mf_size}; } +int32_t MemorySparseTable::Pull(TableContext& context) { + CHECK(context.value_type == Sparse); + if (context.use_ptr) { + char** pull_values = context.pull_context.ptr_values; + const uint64_t* keys = context.pull_context.keys; + return pull_sparse_ptr(pull_values, keys, context.num); + } else { + float* pull_values = context.pull_context.values; + const PullSparseValue& pull_value = context.pull_context.pull_value; + return pull_sparse(pull_values, pull_value); + } +} + +int32_t MemorySparseTable::Push(TableContext& context) { + CHECK(context.value_type == Sparse); + + const uint64_t* keys = context.push_context.keys; + return push_sparse(keys, context.push_context.ptr_values, context.num); +} + int32_t MemorySparseTable::pull_sparse(float* pull_values, const PullSparseValue& pull_value) { CostTimer timer("pserver_sparse_select_all"); diff --git a/paddle/fluid/distributed/ps/table/memory_sparse_table.h b/paddle/fluid/distributed/ps/table/memory_sparse_table.h index 5770f25f8f41dec286993d6b586959c8c0d3a0c0..d26c67319760da0496ae8a1c164adf0d5b63b1f2 100644 --- a/paddle/fluid/distributed/ps/table/memory_sparse_table.h +++ b/paddle/fluid/distributed/ps/table/memory_sparse_table.h @@ -48,6 +48,9 @@ class MemorySparseTable : public SparseTable { virtual int32_t push_dense(const float* values, size_t num) { return 0; } // unused method end + virtual int32_t Pull(TableContext& context); + virtual int32_t Push(TableContext& context); + virtual int32_t initialize(); virtual int32_t initialize_shard() { return 0; } virtual int32_t initialize_value(); diff --git a/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc b/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc index 60514b4e19ffaf63f285e25f1355660fabe58d48..5bc58bc5a1108b5f342036d9bd72c96287458401 100644 --- a/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc +++ b/paddle/fluid/distributed/ps/table/ssd_sparse_table.cc @@ -61,6 +61,21 @@ int32_t SSDSparseTable::initialize() { return 0; } +int32_t SSDSparseTable::Pull(TableContext& context) { + CHECK(context.value_type == Sparse); + if (context.use_ptr) { + char** pull_values = context.pull_context.ptr_values; + const uint64_t* keys = context.pull_context.keys; + return pull_sparse_ptr(pull_values, keys, context.num); + } else { + float* pull_values = context.pull_context.values; + const PullSparseValue& pull_value = context.pull_context.pull_value; + return pull_sparse(pull_values, pull_value); + } +} + +int32_t SSDSparseTable::Push(TableContext& context) { return 0; } + int32_t SSDSparseTable::pull_sparse(float* pull_values, const PullSparseValue& pull_value) { auto shard_num = task_pool_size_; diff --git a/paddle/fluid/distributed/ps/table/ssd_sparse_table.h b/paddle/fluid/distributed/ps/table/ssd_sparse_table.h index f5e8a7067e0e041f9913bef8e43ad8b35bdb2783..3a703d7d966d3e6026d13c0658f5979120cd2073 100644 --- a/paddle/fluid/distributed/ps/table/ssd_sparse_table.h +++ b/paddle/fluid/distributed/ps/table/ssd_sparse_table.h @@ -42,6 +42,9 @@ class SSDSparseTable : public CommonSparseTable { // exchange data virtual int32_t update_table(); + virtual int32_t Pull(TableContext& context); + virtual int32_t Push(TableContext& context); + virtual int32_t pull_sparse(float* values, const PullSparseValue& pull_value); virtual int32_t pull_sparse_ptr(char** pull_values, const uint64_t* keys, diff --git a/paddle/fluid/distributed/ps/table/table.h b/paddle/fluid/distributed/ps/table/table.h index da1bb668ccfa3c5f1a4f876a396847b6b3853772..2bd2a42b6c58f0753de86aa4e60ac7e0611bd7f7 100644 --- a/paddle/fluid/distributed/ps/table/table.h +++ b/paddle/fluid/distributed/ps/table/table.h @@ -32,6 +32,30 @@ namespace paddle { namespace distributed { + +enum ValueType { Sparse = 0, Dense = 1 }; + +struct PullContext { + const uint64_t *keys; + const PullSparseValue pull_value; + float *values; + char **ptr_values; +}; + +struct TablePushContext { + const uint64_t *keys; + const float *values; + const float **ptr_values; +}; + +struct TableContext { + ValueType value_type; + PullContext pull_context; + TablePushContext push_context; + size_t num; + bool use_ptr; +}; + class Table { public: Table() {} @@ -39,6 +63,8 @@ class Table { virtual int32_t initialize(const TableParameter &config, const FsClientParameter &fs_config); + virtual int32_t Pull(TableContext &context) = 0; + virtual int32_t Push(TableContext &context) = 0; virtual int32_t pull_dense(float *values, size_t num) = 0; virtual int32_t push_dense(const float *values, size_t num) = 0; // for push global_step diff --git a/paddle/fluid/distributed/ps/table/tensor_accessor.cc b/paddle/fluid/distributed/ps/table/tensor_accessor.cc index 70a580c1e53a931dc2affd29db01b72691c68a39..8c5349bff832caaa0a1b411723df8b3e9bcdcd4f 100644 --- a/paddle/fluid/distributed/ps/table/tensor_accessor.cc +++ b/paddle/fluid/distributed/ps/table/tensor_accessor.cc @@ -20,6 +20,16 @@ namespace distributed { int CommMergeAccessor::initialize() { return 0; } +void CommMergeAccessor::GetTableInfo(AccessorInfo &info) { + info.dim = dim(); + info.size = size(); + info.select_dim = select_dim(); + info.select_size = select_size(); + info.update_dim = update_dim(); + info.update_size = update_size(); + info.fea_dim = fea_dim(); +} + // value 维度 size_t CommMergeAccessor::dim() { return 0; } diff --git a/paddle/fluid/distributed/ps/table/tensor_accessor.h b/paddle/fluid/distributed/ps/table/tensor_accessor.h index 5041b8fdf8733eff676b5fce1a972e39182df48e..1873b743b44ec736f0470c3eff1f5b0280c235bf 100644 --- a/paddle/fluid/distributed/ps/table/tensor_accessor.h +++ b/paddle/fluid/distributed/ps/table/tensor_accessor.h @@ -30,6 +30,7 @@ class CommMergeAccessor : public ValueAccessor { CommMergeAccessor() {} virtual ~CommMergeAccessor() {} virtual int initialize(); + virtual void GetTableInfo(AccessorInfo &info); // value维度 virtual size_t dim(); // value各个维度的size diff --git a/paddle/fluid/distributed/ps/table/tensor_table.h b/paddle/fluid/distributed/ps/table/tensor_table.h index 64d81327acc55ba0655bfc33efaa0d9d9f59649e..23a62365c0f5a374f3820e2e790e6085cfda1c06 100644 --- a/paddle/fluid/distributed/ps/table/tensor_table.h +++ b/paddle/fluid/distributed/ps/table/tensor_table.h @@ -48,6 +48,8 @@ class TensorTable : public Table { TensorTable() {} virtual ~TensorTable() {} + virtual int32_t Pull(TableContext &context) { return 0; } + virtual int32_t Push(TableContext &context) { return 0; } int32_t pull_dense(float *values, size_t num) override { return 0; } int32_t push_dense(const float *values, size_t num) override { return 0; } diff --git a/paddle/fluid/distributed/ps/wrapper/fleet.cc b/paddle/fluid/distributed/ps/wrapper/fleet.cc index 0588dbdf0fc61298d33eeb6db5b3de91a6de8256..c887cfeb71eef1c8b861b0d5958dca983e9feaaf 100644 --- a/paddle/fluid/distributed/ps/wrapper/fleet.cc +++ b/paddle/fluid/distributed/ps/wrapper/fleet.cc @@ -30,6 +30,32 @@ bool FleetWrapper::is_initialized_ = false; std::shared_ptr FleetWrapper::pserver_ptr_ = NULL; +void FleetWrapper::Stop() { StopServer(); } + +void FleetWrapper::Load(WrapperContext& context) { + auto table_id = context.table_id; + if (table_id >= 0 && context.meta != "") { + LoadSparseOnServer(context.path, context.meta, context.table_id); + return; + } + if (table_id < 0) { // laod all + LoadModel(context.path, context.mode); + } else { // load one table + LoadModelOneTable(table_id, context.path, context.mode); + } + return; +} + +void FleetWrapper::Save(WrapperContext& context) { + auto table_id = context.table_id; + if (table_id < 0) { + SaveModel(context.path, context.mode); + } else { + SaveModelOneTable(table_id, context.path, context.mode); + } + return; +} + void FleetWrapper::SetClient2ClientConfig(int request_timeout_ms, int connect_timeout_ms, int max_retry) { diff --git a/paddle/fluid/distributed/ps/wrapper/fleet.h b/paddle/fluid/distributed/ps/wrapper/fleet.h index a535b8c5bf8f9bf72a2fa895b8a0fd82ffb2e0a3..d68c453c6d51b04131ce562cafddbbdb06ac0356 100644 --- a/paddle/fluid/distributed/ps/wrapper/fleet.h +++ b/paddle/fluid/distributed/ps/wrapper/fleet.h @@ -25,6 +25,7 @@ limitations under the License. */ #include "paddle/fluid/distributed/ps/service/communicator/communicator_common.h" #include "paddle/fluid/distributed/ps/service/ps_service/service.h" +#include "paddle/fluid/distributed/ps/wrapper/ps_wrapper.h" #include "paddle/fluid/framework/archive.h" #include "paddle/fluid/framework/io/fs.h" #include "paddle/fluid/framework/io/shell.h" @@ -54,7 +55,7 @@ using framework::Variable; using RpcCtxMap = std::unordered_map; -class FleetWrapper { +class FleetWrapper : public PSWrapper { public: virtual ~FleetWrapper() {} FleetWrapper() { @@ -68,7 +69,13 @@ class FleetWrapper { // pserver request max retry client2client_max_retry_ = 3; } + virtual int32_t Initialize(InitContext& context) { return 0; } + virtual void Stop() override; + + virtual void Load(WrapperContext& context) override; + + virtual void Save(WrapperContext& context) override; // set client to client communication config void SetClient2ClientConfig(int request_timeout_ms, int connect_timeout_ms, int max_retry); diff --git a/paddle/fluid/distributed/ps/wrapper/ps_wrapper.h b/paddle/fluid/distributed/ps/wrapper/ps_wrapper.h index c92835aa995adfd3158fc344b490efb2c3133ec0..ca02ad31195ef2cdee649f5348d3f735c38097b8 100755 --- a/paddle/fluid/distributed/ps/wrapper/ps_wrapper.h +++ b/paddle/fluid/distributed/ps/wrapper/ps_wrapper.h @@ -1,18 +1,84 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#ifndef PADDLE_FLUID_DISTRIBUTED_PS_WRAPPER_PS_WRAPPER_H_ -#define PADDLE_FLUID_DISTRIBUTED_PS_WRAPPER_PS_WRAPPER_H_ - -#endif // PADDLE_FLUID_DISTRIBUTED_PS_WRAPPER_PS_WRAPPER_H_ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "paddle/fluid/distributed/ps/service/communicator/communicator_common.h" +#include "paddle/fluid/distributed/ps/service/ps_service/service.h" +#include "paddle/fluid/framework/archive.h" +#include "paddle/fluid/framework/io/fs.h" +#include "paddle/fluid/framework/io/shell.h" +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/framework/variable_helper.h" +#include "paddle/fluid/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN + +namespace paddle { +namespace framework { +class Scope; +class SelectedRows; +class Variable; +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace distributed { + +class PSCore; + +using framework::LoDTensor; +using framework::Scope; +using phi::SelectedRows; +using framework::Variable; + +using RpcCtxMap = std::unordered_map; + +struct WrapperContext { + uint32_t table_id; + const std::string path; + const int mode; + const std::string meta; +}; + +struct InitContext { + const std::vector dev_ids; // for gpu +}; + +class PSWrapper { + public: + virtual ~PSWrapper() {} + PSWrapper() {} + // init server + + virtual int32_t Initialize(InitContext& context) = 0; + + virtual void Stop() = 0; + + virtual void Load(WrapperContext& context) = 0; + + virtual void Save(WrapperContext& context) = 0; +}; + +} // end namespace distributed +} // end namespace paddle diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index b8d59e8dd8b4c60e28323955effd232eb2b51945..df2cdc35626a8aa27899f7340fa14285299a11d1 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -2032,7 +2032,15 @@ static std::string GenerateSingleOpBase( const char* ATTRS_TEMPLATE = " auto& %s = this->attr_map_;\n"; std::string grad_attrs_str = paddle::string::Sprintf(ATTRS_TEMPLATE, attrs_name); - + if (fwd_op_type == "cast") { + // swtich in out dtype + const char* CAST_GRAD = + " auto temp_type = %s[\"in_dtype\"];\n" + " %s[\"in_dtype\"] = %s[\"out_dtype\"];\n" + " %s[\"out_dtype\"] = temp_type;\n"; + grad_attrs_str += paddle::string::Sprintf(CAST_GRAD, attrs_name, attrs_name, + attrs_name, attrs_name); + } // Handle dynamic grad attributes grad_attrs_str += HandleDynamicGradAttributes(fwd_op_type, attrs_name); generated_grad_function_body += grad_attrs_str; diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc index 163d25e85ce8c085087331c6e3273075aed5e5f4..038ad09aa4d8bef1282c024559b60d0eed7e48d1 100644 --- a/paddle/fluid/eager/grad_tensor_holder.cc +++ b/paddle/fluid/eager/grad_tensor_holder.cc @@ -93,7 +93,7 @@ void GradTensorHolder::add(size_t slot_id, size_t rank, // Create new tensor->impl and fill it with 1.0 if (t.defined()) { // Fill 1.0 - buffer_[slot_id][rank] = paddle::experimental::ones_like(t); + buffer_[slot_id][rank] = paddle::experimental::ones_like(t, t.dtype()); } } } diff --git a/paddle/fluid/operators/pad3d_op.cc b/paddle/fluid/operators/pad3d_op.cc index 7b9a4ab1557bf0ce0ed2bd348298373f0ba672cf..e4952a243262bedc5477908cd8aedeb158e344b8 100644 --- a/paddle/fluid/operators/pad3d_op.cc +++ b/paddle/fluid/operators/pad3d_op.cc @@ -16,7 +16,9 @@ limitations under the License. */ #include #include #include +#include "paddle/fluid/framework/infershape_utils.h" #include "paddle/fluid/framework/op_registry.h" +#include "paddle/phi/infermeta/unary.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace paddle { @@ -24,734 +26,10 @@ namespace operators { using framework::Tensor; -template -void ConstPad3DFuncNCDHW(const T* in_data, T* out_data, const int in_depth, - const int in_height, const int in_width, - const int out_depth, const int out_height, - const int out_width, const int pad_front, - const int pad_top, const int pad_left, const int out_d, - const int out_h, const int out_w, const T value) { - int in_d = out_d - pad_front; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - out_data[out_d * out_height * out_width + out_h * out_width + out_w] = - (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || - in_h >= in_height || in_w >= in_width) - ? value - : in_data[in_d * in_height * in_width + in_h * in_width + in_w]; -} - -template -void ConstPad3DFuncNDHWC(const T* in_data, T* out_data, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, const int out_d, const int out_h, - const int out_w, const T value) { - int in_d = out_d - pad_front; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - const int out_index = - (out_d * out_height * out_width + out_h * out_width + out_w) * channels; - if (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || - in_h >= in_height || in_w >= in_width) { - for (int c = 0; c < channels; ++c) { - out_data[out_index + c] = value; - } - } else { - const int in_index = - (in_d * in_height * in_width + in_h * in_width + in_w) * channels; - for (int c = 0; c < channels; ++c) { - out_data[out_index + c] = in_data[in_index + c]; - } - } -} - -template -void ReflectPad3DFuncNCDHW(const T* in_data, T* out_data, const int in_depth, - const int in_height, const int in_width, - const int out_depth, const int out_height, - const int out_width, const int pad_front, - const int pad_top, const int pad_left, - const int out_d, const int out_h, const int out_w, - const T value) { - int in_d = out_d - pad_front; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - - in_d = std::max(in_d, -in_d); // reflect by 0 - in_d = std::min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth - in_h = std::max(in_h, -in_h); // reflect by 0 - in_h = std::min(in_h, 2 * in_height - in_h - 2); // reflect by in_height - in_w = std::max(in_w, -in_w); // reflect by 0 - in_w = std::min(in_w, 2 * in_width - in_w - 2); // reflect by in_width - - out_data[out_d * out_height * out_width + out_h * out_width + out_w] = - in_data[in_d * in_height * in_width + in_h * in_width + in_w]; -} - -template -void ReflectPad3DFuncNDHWC(const T* in_data, T* out_data, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, const int out_d, const int out_h, - const int out_w, const T value) { - int in_d = out_d - pad_front; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - - in_d = std::max(in_d, -in_d); - in_d = std::min(in_d, 2 * in_depth - in_d - 2); - in_h = std::max(in_h, -in_h); - in_h = std::min(in_h, 2 * in_height - in_h - 2); - in_w = std::max(in_w, -in_w); - in_w = std::min(in_w, 2 * in_width - in_w - 2); - - const int out_index = - (out_d * out_height * out_width + out_h * out_width + out_w) * channels; - const int in_index = - (in_d * in_height * in_width + in_h * in_width + in_w) * channels; - for (int c = 0; c < channels; ++c) { - out_data[out_index + c] = in_data[in_index + c]; - } -} - -template -void ReplicatePad3DFuncNCDHW(const T* in_data, T* out_data, const int in_depth, - const int in_height, const int in_width, - const int out_depth, const int out_height, - const int out_width, const int pad_front, - const int pad_top, const int pad_left, - const int out_d, const int out_h, const int out_w, - const T value) { - int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0)); - int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); - int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); - - out_data[out_d * out_height * out_width + out_h * out_width + out_w] = - in_data[in_d * in_height * in_width + in_h * in_width + in_w]; -} - -template -void ReplicatePad3DFuncNDHWC(const T* in_data, T* out_data, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, const int out_d, - const int out_h, const int out_w, const T value) { - int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0)); - int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); - int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); - - const int out_index = - (out_d * out_height * out_width + out_h * out_width + out_w) * channels; - const int in_index = - (in_d * in_height * in_width + in_h * in_width + in_w) * channels; - for (int c = 0; c < channels; ++c) { - out_data[out_index + c] = in_data[in_index + c]; - } -} - -template -void CircularPad3DFuncNCDHW(const T* in_data, T* out_data, const int in_depth, - const int in_height, const int in_width, - const int out_depth, const int out_height, - const int out_width, const int pad_front, - const int pad_top, const int pad_left, - const int out_d, const int out_h, const int out_w, - const T value) { - int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; - int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; - int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; - - out_data[out_d * out_height * out_width + out_h * out_width + out_w] = - in_data[in_d * in_height * in_width + in_h * in_width + in_w]; -} - -template -void CircularPad3DFuncNDHWC(const T* in_data, T* out_data, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, const int out_d, - const int out_h, const int out_w, const T value) { - int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; - int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; - int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; - - const int out_index = - (out_d * out_height * out_width + out_h * out_width + out_w) * channels; - const int in_index = - (in_d * in_height * in_width + in_h * in_width + in_w) * channels; - for (int c = 0; c < channels; ++c) { - out_data[out_index + c] = in_data[in_index + c]; - } -} - -template -void Pad3DNCDHW(const T* in_data, const int num, const int channels, - const int in_depth, const int in_height, const int in_width, - const int out_depth, const int out_height, const int out_width, - const int pad_front, const int pad_top, const int pad_left, - T value, T* out_data, - void (*pad_func)(const T*, T*, const int, const int, const int, - const int, const int, const int, const int, - const int, const int, const int, const int, - const int, const T)) { - for (int n = 0; n < num; ++n) { - for (int c = 0; c < channels; ++c) { - for (int out_d = 0; out_d < out_depth; ++out_d) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - pad_func(in_data, out_data, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, - pad_left, out_d, out_h, out_w, value); - } - } - } - in_data += in_depth * in_height * in_width; - out_data += out_depth * out_height * out_width; - } - } -} - -template -void Pad3DNDHWC(const T* in_data, const int num, const int channels, - const int in_depth, const int in_height, const int in_width, - const int out_depth, const int out_height, const int out_width, - const int pad_front, const int pad_top, const int pad_left, - T value, T* out_data, - void (*pad_func)(const T*, T*, const int, const int, const int, - const int, const int, const int, const int, - const int, const int, const int, const int, - const int, const int, const T)) { - for (int n = 0; n < num; ++n) { - for (int out_d = 0; out_d < out_depth; ++out_d) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - pad_func(in_data, out_data, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, - pad_left, out_d, out_h, out_w, value); - } - } - } - in_data += in_depth * in_height * in_width * channels; - out_data += out_depth * out_height * out_width * channels; - } -} - -template -void ConstPad3DGradNCDHW(T* d_in_data, const T* d_out_data, const int in_depth, - const int in_height, const int in_width, - const int out_depth, const int out_height, - const int out_width, const int pad_front, - const int pad_top, const int pad_left, const int out_d, - const int out_h, const int out_w) { - int in_d = out_d - pad_front; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - if (!(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || - in_h >= in_height || in_w >= in_width)) { - d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] = - d_out_data[out_d * out_height * out_width + out_h * out_width + out_w]; - } -} - -template -void ConstPad3DGradNDHWC(T* d_in_data, const T* d_out_data, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, const int out_d, const int out_h, - const int out_w) { - int in_d = out_d - pad_front; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - - const int out_index = - (out_d * out_height * out_width + out_h * out_width + out_w) * channels; - if (!(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || - in_h >= in_height || in_w >= in_width)) { - const int in_index = - (in_d * in_height * in_width + in_h * in_width + in_w) * channels; - for (int c = 0; c < channels; ++c) { - d_in_data[in_index + c] = d_out_data[out_index + c]; - } - } -} - -template -void ReflectPad3DGradNCDHW(T* d_in_data, const T* d_out_data, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, const int out_d, const int out_h, - const int out_w) { - int in_d = out_d - pad_front; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - - in_d = std::max(in_d, -in_d); // reflect by 0 - in_d = std::min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth - in_h = std::max(in_h, -in_h); // reflect by 0 - in_h = std::min(in_h, 2 * in_height - in_h - 2); // reflect by in_height - in_w = std::max(in_w, -in_w); // reflect by 0 - in_w = std::min(in_w, 2 * in_width - in_w - 2); // reflect by in_width - - d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] += - d_out_data[out_d * out_height * out_width + out_h * out_width + out_w]; -} - -template -void ReflectPad3DGradNDHWC(T* d_in_data, const T* d_out_data, - const int channels, const int in_depth, - const int in_height, const int in_width, - const int out_depth, const int out_height, - const int out_width, const int pad_front, - const int pad_top, const int pad_left, - const int out_d, const int out_h, const int out_w) { - int in_d = out_d - pad_front; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - - in_d = std::max(in_d, -in_d); - in_d = std::min(in_d, 2 * in_depth - in_d - 2); - in_h = std::max(in_h, -in_h); - in_h = std::min(in_h, 2 * in_height - in_h - 2); - in_w = std::max(in_w, -in_w); - in_w = std::min(in_w, 2 * in_width - in_w - 2); - - const int out_index = - (out_d * out_height * out_width + out_h * out_width + out_w) * channels; - const int in_index = - (in_d * in_height * in_width + in_h * in_width + in_w) * channels; - for (int c = 0; c < channels; ++c) { - d_in_data[in_index + c] += d_out_data[out_index + c]; - } -} - -template -void ReplicatePad3DGradNCDHW(T* d_in_data, const T* d_out_data, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, const int out_d, - const int out_h, const int out_w) { - int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0)); - int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); - int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); - - d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] += - d_out_data[out_d * out_height * out_width + out_h * out_width + out_w]; -} - -template -void ReplicatePad3DGradNDHWC(T* d_in_data, const T* d_out_data, - const int channels, const int in_depth, - const int in_height, const int in_width, - const int out_depth, const int out_height, - const int out_width, const int pad_front, - const int pad_top, const int pad_left, - const int out_d, const int out_h, - const int out_w) { - int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0)); - int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); - int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); - - const int out_index = - (out_d * out_height * out_width + out_h * out_width + out_w) * channels; - const int in_index = - (in_d * in_height * in_width + in_h * in_width + in_w) * channels; - for (int c = 0; c < channels; ++c) { - d_in_data[in_index + c] += d_out_data[out_index + c]; - } -} - -template -void CircularPad3DGradNCDHW(T* d_in_data, const T* d_out_data, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, const int out_d, - const int out_h, const int out_w) { - int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; - int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; - int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; - d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] += - d_out_data[out_d * out_height * out_width + out_h * out_width + out_w]; -} - -template -void CircularPad3DGradNDHWC(T* d_in_data, const T* d_out_data, - const int channels, const int in_depth, - const int in_height, const int in_width, - const int out_depth, const int out_height, - const int out_width, const int pad_front, - const int pad_top, const int pad_left, - const int out_d, const int out_h, const int out_w) { - int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; - int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; - int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; - - const int out_index = - (out_d * out_height * out_width + out_h * out_width + out_w) * channels; - const int in_index = - (in_d * in_height * in_width + in_h * in_width + in_w) * channels; - for (int c = 0; c < channels; ++c) { - d_in_data[in_index + c] += d_out_data[out_index + c]; - } -} - -template -void Pad3DGradNCDHW(T* d_in_data, const int num, const int channels, - const int in_depth, const int in_height, const int in_width, - const int out_depth, const int out_height, - const int out_width, const int pad_front, const int pad_top, - const int pad_left, const T* d_out_data, - void (*pad_func)(T*, const T*, const int, const int, - const int, const int, const int, const int, - const int, const int, const int, const int, - const int, const int)) { - for (int n = 0; n < num; ++n) { - for (int c = 0; c < channels; ++c) { - for (int out_d = 0; out_d < out_depth; ++out_d) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - pad_func(d_in_data, d_out_data, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, - pad_left, out_d, out_h, out_w); - } - } - } - d_in_data += in_depth * in_height * in_width; - d_out_data += out_depth * out_height * out_width; - } - } -} - -template -void Pad3DGradNDHWC(T* d_in_data, const int num, const int channels, - const int in_depth, const int in_height, const int in_width, - const int out_depth, const int out_height, - const int out_width, const int pad_front, const int pad_top, - const int pad_left, const T* d_out_data, - void (*pad_func)(T*, const T*, const int, const int, - const int, const int, const int, const int, - const int, const int, const int, const int, - const int, const int, const int)) { - for (int n = 0; n < num; ++n) { - for (int out_d = 0; out_d < out_depth; ++out_d) { - for (int out_h = 0; out_h < out_height; ++out_h) { - for (int out_w = 0; out_w < out_width; ++out_w) { - pad_func(d_in_data, d_out_data, channels, in_depth, in_height, - in_width, out_depth, out_height, out_width, pad_front, - pad_top, pad_left, out_d, out_h, out_w); - } - } - } - d_in_data += in_depth * in_height * in_width * channels; - d_out_data += out_depth * out_height * out_width * channels; - } -} - -static inline std::vector GetPaddings( - const framework::ExecutionContext& context) { - std::vector paddings(6); - auto* paddings_t = context.Input("Paddings"); - if (paddings_t) { - auto paddings_data = paddings_t->data(); - std::memcpy(paddings.data(), paddings_data, paddings.size() * sizeof(int)); - } else { - auto pads = context.Attr>("paddings"); - std::copy(pads.begin(), pads.end(), paddings.data()); - } - return paddings; -} - -template -class Pad3dCPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - std::vector pads = GetPaddings(context); - auto mode = context.Attr("mode"); - auto data_format = context.Attr("data_format"); - T value = static_cast(context.Attr("value")); - - auto* x = context.Input("X"); - auto in_dims = x->dims(); - const T* in_data = x->data(); - - auto* out = context.Output("Out"); - if (data_format == "NCDHW") { - out->Resize({in_dims[0], in_dims[1], in_dims[2] + pads[4] + pads[5], - in_dims[3] + pads[2] + pads[3], - in_dims[4] + pads[0] + pads[1]}); - } else { - out->Resize({in_dims[0], in_dims[1] + pads[4] + pads[5], - in_dims[2] + pads[2] + pads[3], - in_dims[3] + pads[0] + pads[1], in_dims[4]}); - } - auto out_dims = out->dims(); - T* out_data = out->mutable_data(context.GetPlace()); - - int channels = in_dims[1]; - int in_depth = in_dims[2]; - int in_height = in_dims[3]; - int in_width = in_dims[4]; - int out_depth = out_dims[2]; - int out_height = out_dims[3]; - int out_width = out_dims[4]; - if (data_format == "NDHWC") { - channels = in_dims[4]; - in_depth = in_dims[1]; - in_height = in_dims[2]; - in_width = in_dims[3]; - out_depth = out_dims[1]; - out_height = out_dims[2]; - out_width = out_dims[3]; - } - - if (mode == "reflect") { - PADDLE_ENFORCE_GT(in_depth, pads[4], - platform::errors::InvalidArgument( - "The depth of Input(X)'s dimension should be " - "greater than pad_front" - " in reflect mode" - ", but received depth(%d) and pad_front(%d).", - in_depth, pads[4])); - PADDLE_ENFORCE_GT(in_depth, pads[5], - platform::errors::InvalidArgument( - "The depth of Input(X)'s dimension should be " - "greater than pad_back" - " in reflect mode" - ", but received depth(%d) and pad_back(%d).", - in_depth, pads[5])); - - PADDLE_ENFORCE_GT(in_height, pads[2], - platform::errors::InvalidArgument( - "The height of Input(X)'s dimension should be " - "greater than pad_top" - " in reflect mode" - ", but received depth(%d) and pad_top(%d).", - in_height, pads[2])); - PADDLE_ENFORCE_GT(in_height, pads[3], - platform::errors::InvalidArgument( - "The height of Input(X)'s dimension should be " - "greater than pad_bottom" - " in reflect mode" - ", but received depth(%d) and pad_bottom(%d).", - in_height, pads[3])); - - PADDLE_ENFORCE_GT(in_width, pads[0], - platform::errors::InvalidArgument( - "The width of Input(X)'s dimension should be " - "greater than pad_left" - " in reflect mode" - ", but received depth(%d) and pad_left(%d).", - in_width, pads[0])); - PADDLE_ENFORCE_GT(in_width, pads[1], - platform::errors::InvalidArgument( - "The width of Input(X)'s dimension should be " - "greater than pad_right" - " in reflect mode" - ", but received depth(%d) and pad_right(%d).", - in_width, pads[1])); - } else if (mode == "circular" || mode == "replicate") { - PADDLE_ENFORCE_NE(in_depth * in_height * in_width, 0, - platform::errors::InvalidArgument( - "The input tensor size can not be 0 for circular " - "or replicate padding mode.")); - } - - const int pad_left = pads[0]; - const int pad_top = pads[2]; - const int pad_front = pads[4]; - const int num = in_dims[0]; - if (data_format == "NCDHW") { - std::map - func_map; - - func_map["reflect"] = ReflectPad3DFuncNCDHW; - func_map["replicate"] = ReplicatePad3DFuncNCDHW; - func_map["circular"] = CircularPad3DFuncNCDHW; - func_map["constant"] = ConstPad3DFuncNCDHW; - Pad3DNCDHW(in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - value, out_data, func_map[mode]); - } else { - std::map - func_map; - - func_map["reflect"] = ReflectPad3DFuncNDHWC; - func_map["replicate"] = ReplicatePad3DFuncNDHWC; - func_map["circular"] = CircularPad3DFuncNDHWC; - func_map["constant"] = ConstPad3DFuncNDHWC; - Pad3DNDHWC(in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - value, out_data, func_map[mode]); - } - } -}; - -template -class Pad3dGradCPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - std::vector pads = GetPaddings(context); - auto mode = context.Attr("mode"); - auto data_format = context.Attr("data_format"); - auto* d_out = context.Input(framework::GradVarName("Out")); - auto* d_in = context.Output(framework::GradVarName("X")); - auto d_in_dims = d_in->dims(); - auto d_out_dims = d_out->dims(); - const T* d_out_data = d_out->data(); - T* d_in_data = d_in->mutable_data(context.GetPlace()); - phi::funcs::SetConstant set_zero; - set_zero(context.template device_context(), - d_in, static_cast(0)); - const int pad_left = pads[0]; - const int pad_top = pads[2]; - const int pad_front = pads[4]; - const int num = d_in_dims[0]; - if (data_format == "NCDHW") { - const int channels = d_in_dims[1]; - const int in_depth = d_in_dims[2]; - const int in_height = d_in_dims[3]; - const int in_width = d_in_dims[4]; - const int out_depth = d_out_dims[2]; - const int out_height = d_out_dims[3]; - const int out_width = d_out_dims[4]; - - std::map - func_map; - - func_map["reflect"] = ReflectPad3DGradNCDHW; - func_map["replicate"] = ReplicatePad3DGradNCDHW; - func_map["circular"] = CircularPad3DGradNCDHW; - func_map["constant"] = ConstPad3DGradNCDHW; - - Pad3DGradNCDHW(d_in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, - pad_left, d_out_data, func_map[mode]); - } else { - const int channels = d_in_dims[4]; - const int in_depth = d_in_dims[1]; - const int in_height = d_in_dims[2]; - const int in_width = d_in_dims[3]; - const int out_depth = d_out_dims[1]; - const int out_height = d_out_dims[2]; - const int out_width = d_out_dims[3]; - - std::map - func_map; - - func_map["reflect"] = ReflectPad3DGradNDHWC; - func_map["replicate"] = ReplicatePad3DGradNDHWC; - func_map["circular"] = CircularPad3DGradNDHWC; - func_map["constant"] = ConstPad3DGradNDHWC; - - Pad3DGradNDHWC(d_in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, - pad_left, d_out_data, func_map[mode]); - } - } -}; - class Pad3dOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Pad3d"); - OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Pad3d"); - - auto x_dim = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ(x_dim.size(), 5, - platform::errors::InvalidArgument( - "The size of Input(X)'s dimension should be equal to " - "5, but received %d. ", - x_dim.size())); - - std::vector out_dims(x_dim.size()); - auto data_format = ctx->Attrs().Get("data_format"); - out_dims[0] = x_dim[0]; - if (ctx->HasInput("Paddings")) { - auto paddings_dim = ctx->GetInputDim("Paddings"); - PADDLE_ENFORCE_EQ(paddings_dim.size(), 1, - platform::errors::InvalidArgument( - "Size of Input(Paddings)'s dimension should be " - "equal to 1, but received %d.", - paddings_dim.size())); - if (ctx->IsRuntime()) { - PADDLE_ENFORCE_EQ(paddings_dim[0], 6, - platform::errors::InvalidArgument( - "Shape of Input(Paddings) should be equal to " - "[6], but received [%d].", - paddings_dim[0])); - } - out_dims[1] = x_dim[1]; - out_dims[2] = x_dim[2]; - out_dims[3] = x_dim[3]; - } else { - auto paddings = ctx->Attrs().Get>("paddings"); - PADDLE_ENFORCE_EQ( - paddings.size(), 6, - platform::errors::InvalidArgument( - "Size of paddings should be equal to 4, but received %d.", - static_cast(paddings.size()))); - if (data_format == "NCDHW") { - out_dims[1] = x_dim[1]; // channel - out_dims[2] = ((!ctx->IsRuntime()) && (x_dim[2] < 0)) - ? x_dim[2] - : (x_dim[2] + paddings[4] + paddings[5]); // depth - - out_dims[3] = ((!ctx->IsRuntime()) && (x_dim[3] < 0)) - ? x_dim[3] - : (x_dim[3] + paddings[2] + paddings[3]); // height - - out_dims[4] = ((!ctx->IsRuntime()) && (x_dim[4] < 0)) - ? x_dim[4] - : (x_dim[4] + paddings[0] + paddings[1]); // width - } else { // NDHWC - out_dims[4] = x_dim[4]; // channel - - out_dims[1] = ((!ctx->IsRuntime()) && (x_dim[1] < 0)) - ? x_dim[1] - : (x_dim[1] + paddings[4] + paddings[5]); // depth - out_dims[2] = ((!ctx->IsRuntime()) && (x_dim[2] < 0)) - ? x_dim[2] - : (x_dim[2] + paddings[2] + paddings[3]); // height - out_dims[3] = ((!ctx->IsRuntime()) && (x_dim[3] < 0)) - ? x_dim[3] - : (x_dim[3] + paddings[0] + paddings[1]); // width - } - } - - ctx->SetOutputDim("Out", phi::make_ddim(out_dims)); - ctx->ShareLoD("X", /*->*/ "Out"); - } - protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { @@ -921,15 +199,14 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(Pad3dOpGradNoNeedBufferVarsInferer, "X"); namespace ops = paddle::operators; +DECLARE_INFER_SHAPE_FUNCTOR(pad3d, Pad3dInferShapeFunctor, + PD_INFER_META(phi::Pad3dInferMeta)); + REGISTER_OPERATOR(pad3d, ops::Pad3dOp, ops::Pad3dOpMaker, ops::Pad3dOpGradMaker, - ops::Pad3dOpGradMaker); + ops::Pad3dOpGradMaker, + Pad3dInferShapeFunctor); REGISTER_OPERATOR(pad3d_grad, ops::Pad3dOpGrad, ops::Pad3dOpDoubleGradMaker, ops::Pad3dOpDoubleGradMaker, ops::Pad3dOpGradNoNeedBufferVarsInferer); -REGISTER_OP_CPU_KERNEL(pad3d, ops::Pad3dCPUKernel, - ops::Pad3dCPUKernel, ops::Pad3dCPUKernel, - ops::Pad3dCPUKernel); -REGISTER_OP_CPU_KERNEL(pad3d_grad, ops::Pad3dGradCPUKernel, - ops::Pad3dGradCPUKernel); diff --git a/paddle/fluid/operators/pad3d_op.cu b/paddle/fluid/operators/pad3d_op.cu deleted file mode 100644 index 9ab0eb9d445da9b1d0f64cf4f7a721026dab5476..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/pad3d_op.cu +++ /dev/null @@ -1,793 +0,0 @@ -/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/device/gpu/gpu_info.h" -#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -using platform::PADDLE_CUDA_NUM_THREADS; - -using framework::Tensor; - -template -__global__ void Pad3DConstNCDHW(const int nthreads, const T* in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, T value, T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int nc = index / out_width; - - const int out_w = index % out_width; - const int out_h = nc % out_height; - nc /= out_height; - const int out_d = nc % out_depth; - nc /= out_depth; - - int in_d = out_d - pad_front; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - out_data[index] = - (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || - in_h >= in_height || in_w >= in_width) - ? value - : in_data[nc * in_depth * in_height * in_width + - in_d * in_height * in_width + in_h * in_width + in_w]; - } -} - -template -__global__ void Pad3DConstNDHWC(const int nthreads, const T* in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, T value, T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int n = index / channels; - const int c = index % channels; - const int out_w = n % out_width; - n /= out_width; - const int out_h = n % out_height; - n /= out_height; - const int out_d = n % out_depth; - n /= out_depth; - const int in_d = out_d - pad_front; - const int in_h = out_h - pad_top; - const int in_w = out_w - pad_left; - - out_data[index] = - (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || - in_h >= in_height || in_w >= in_width) - ? value - : in_data[n * in_depth * in_height * in_width * channels + - in_d * in_height * in_width * channels + - in_h * in_width * channels + in_w * channels + c]; - } -} - -template -__global__ void Pad3DReflectNCDHW(const int nthreads, const T* in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int nc = index / out_width; - - const int out_w = index % out_width; - const int out_h = nc % out_height; - nc /= out_height; - const int out_d = nc % out_depth; - nc /= out_depth; - - int in_d = out_d - pad_front; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - - in_d = max(in_d, -in_d); // reflect by 0 - in_d = min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth - in_h = max(in_h, -in_h); // reflect by 0 - in_h = min(in_h, 2 * in_height - in_h - 2); // reflect by in_height - in_w = max(in_w, -in_w); // reflect by 0 - in_w = min(in_w, 2 * in_width - in_w - 2); // reflect by in_width - out_data[index] = - in_data[(nc * in_depth * in_height + in_d * in_height + in_h) * - in_width + - in_w]; - } -} - -template -__global__ void Pad3DReflectNDHWC(const int nthreads, const T* in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int n = index / channels; - const int c = index % channels; - const int out_w = n % out_width; - n /= out_width; - const int out_h = n % out_height; - n /= out_height; - const int out_d = n % out_depth; - n /= out_depth; - int in_d = out_d - pad_front; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - - in_d = max(in_d, -in_d); - in_d = min(in_d, 2 * in_depth - in_d - 2); - in_h = max(in_h, -in_h); - in_h = min(in_h, 2 * in_height - in_h - 2); - in_w = max(in_w, -in_w); - in_w = min(in_w, 2 * in_width - in_w - 2); - - out_data[index] = in_data[n * in_depth * in_height * in_width * channels + - in_d * in_height * in_width * channels + - in_h * in_width * channels + in_w * channels + c]; - } -} - -template -__global__ void Pad3DReplicateNCDHW(const int nthreads, const T* in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int nc = index / out_width; - - const int out_w = index % out_width; - const int out_h = nc % out_height; - nc /= out_height; - const int out_d = nc % out_depth; - nc /= out_depth; - - int in_d = min(in_depth - 1, max(out_d - pad_front, 0)); - int in_h = min(in_height - 1, max(out_h - pad_top, 0)); - int in_w = min(in_width - 1, max(out_w - pad_left, 0)); - - out_data[index] = - in_data[(nc * in_depth * in_height + in_d * in_height + in_h) * - in_width + - in_w]; - } -} - -template -__global__ void Pad3DReplicateNDHWC(const int nthreads, const T* in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int n = index / channels; - const int c = index % channels; - const int out_w = n % out_width; - n /= out_width; - const int out_h = n % out_height; - n /= out_height; - const int out_d = n % out_depth; - n /= out_depth; - - int in_d = min(in_depth - 1, max(out_d - pad_front, 0)); - int in_h = min(in_height - 1, max(out_h - pad_top, 0)); - int in_w = min(in_width - 1, max(out_w - pad_left, 0)); - - out_data[index] = in_data[n * in_depth * in_height * in_width * channels + - in_d * in_height * in_width * channels + - in_h * in_width * channels + in_w * channels + c]; - } -} - -template -__global__ void Pad3DCircularNCDHW(const int nthreads, const T* in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int nc = index / out_width; - - const int out_w = index % out_width; - const int out_h = nc % out_height; - nc /= out_height; - const int out_d = nc % out_depth; - nc /= out_depth; - - int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; - int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; - int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; - - out_data[index] = - in_data[(nc * in_depth * in_height + in_d * in_height + in_h) * - in_width + - in_w]; - } -} - -template -__global__ void Pad3DCircularNDHWC(const int nthreads, const T* in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, T* out_data) { - CUDA_KERNEL_LOOP(index, nthreads) { - int n = index / channels; - const int c = index % channels; - const int out_w = n % out_width; - n /= out_width; - const int out_h = n % out_height; - n /= out_height; - const int out_d = n % out_depth; - n /= out_depth; - - int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; - int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; - int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; - - out_data[index] = in_data[n * in_depth * in_height * in_width * channels + - in_d * in_height * in_width * channels + - in_h * in_width * channels + in_w * channels + c]; - } -} - -template -__global__ void Pad3DGradConstNCDHW(const int in_size, T* d_in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, const T* d_out_data) { - CUDA_KERNEL_LOOP(in_index, in_size) { - const int in_w = in_index % in_width; - - int nc = in_index / in_width; - const int in_h = nc % in_height; - - nc /= in_height; - const int in_d = nc % in_depth; - - nc /= in_depth; - - const int out_d = in_d + pad_front; - const int out_h = in_h + pad_top; - const int out_w = in_w + pad_left; - d_in_data[in_index] = - d_out_data[nc * out_depth * out_height * out_width + - out_d * out_height * out_width + out_h * out_width + out_w]; - } -} - -template -__global__ void Pad3DGradConstNDHWC(const int in_size, T* d_in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, const T* d_out_data) { - CUDA_KERNEL_LOOP(in_index, in_size) { - const int c = in_index % channels; - int n = in_index / channels; - - const int in_w = n % in_width; - n /= in_width; - - const int in_h = n % in_height; - n /= in_height; - - const int in_d = n % in_depth; - n /= in_depth; - - const int out_d = in_d + pad_front; - const int out_h = in_h + pad_top; - const int out_w = in_w + pad_left; - - d_in_data[in_index] = - d_out_data[n * out_depth * out_height * out_width * channels + - out_d * out_height * out_width * channels + - out_h * out_width * channels + out_w * channels + c]; - } -} - -template -__global__ void Pad3DGradReflectNCDHW(const int out_size, T* d_in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, const T* d_out_data) { - CUDA_KERNEL_LOOP(out_index, out_size) { - int nc = out_index / out_width; - const int out_w = out_index % out_width; - const int out_h = nc % out_height; - nc /= out_height; - const int out_d = nc % out_depth; - nc /= out_depth; - - int in_d = out_d - pad_front; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - - in_d = max(in_d, -in_d); - in_h = max(in_h, -in_h); - in_w = max(in_w, -in_w); - - in_d = min(in_d, 2 * in_depth - in_d - 2); - in_h = min(in_h, 2 * in_height - in_h - 2); - in_w = min(in_w, 2 * in_width - in_w - 2); - - platform::CudaAtomicAdd( - &d_in_data[nc * in_depth * in_height * in_width + - in_d * in_height * in_width + in_h * in_width + in_w], - d_out_data[out_index]); - } -} - -template -__global__ void Pad3DGradReflectNDHWC(const int out_size, T* d_in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, const int out_width, - const int pad_front, const int pad_top, - const int pad_left, const T* d_out_data) { - CUDA_KERNEL_LOOP(out_index, out_size) { - const int c = out_index % channels; - int n = out_index / channels; - const int out_w = n % out_width; - n /= out_width; - const int out_h = n % out_height; - n /= out_height; - const int out_d = n % out_depth; - n /= out_depth; - - int in_d = out_d - pad_front; - int in_h = out_h - pad_top; - int in_w = out_w - pad_left; - - in_d = max(in_d, -in_d); - in_h = max(in_h, -in_h); - in_w = max(in_w, -in_w); - - in_d = min(in_d, in_depth * 2 - in_d - 2); - in_h = min(in_h, in_height * 2 - in_h - 2); - in_w = min(in_w, in_width * 2 - in_w - 2); - platform::CudaAtomicAdd( - &d_in_data[n * in_depth * in_height * in_width * channels + - in_d * in_height * in_width * channels + - in_h * in_width * channels + in_w * channels + c], - d_out_data[out_index]); - } -} - -template -__global__ void Pad3DGradReplicateNCDHW( - const int out_size, T* d_in_data, const int num, const int channels, - const int in_depth, const int in_height, const int in_width, - const int out_depth, const int out_height, const int out_width, - const int pad_front, const int pad_top, const int pad_left, - const T* d_out_data) { - CUDA_KERNEL_LOOP(out_index, out_size) { - int nc = out_index / out_width; - const int out_w = out_index % out_width; - const int out_h = nc % out_height; - nc /= out_height; - const int out_d = nc % out_depth; - nc /= out_depth; - - const int in_d = min(in_depth - 1, max(out_d - pad_front, 0)); - const int in_h = min(in_height - 1, max(out_h - pad_top, 0)); - const int in_w = min(in_width - 1, max(out_w - pad_left, 0)); - - platform::CudaAtomicAdd( - &d_in_data[nc * in_depth * in_height * in_width + - in_d * in_height * in_width + in_h * in_width + in_w], - d_out_data[out_index]); - } -} - -template -__global__ void Pad3DGradReplicateNDHWC( - const int out_size, T* d_in_data, const int num, const int channels, - const int in_depth, const int in_height, const int in_width, - const int out_depth, const int out_height, const int out_width, - const int pad_front, const int pad_top, const int pad_left, - const T* d_out_data) { - CUDA_KERNEL_LOOP(out_index, out_size) { - const int c = out_index % channels; - int n = out_index / channels; - const int out_w = n % out_width; - n /= out_width; - const int out_h = n % out_height; - n /= out_height; - const int out_d = n % out_depth; - n /= out_depth; - - const int in_d = min(in_depth - 1, max(out_d - pad_front, 0)); - const int in_h = min(in_height - 1, max(out_h - pad_top, 0)); - const int in_w = min(in_width - 1, max(out_w - pad_left, 0)); - - platform::CudaAtomicAdd( - &d_in_data[n * in_depth * in_height * in_width * channels + - in_d * in_height * in_width * channels + - in_h * in_width * channels + in_w * channels + c], - d_out_data[out_index]); - } -} - -template -__global__ void Pad3DGradCircularNCDHW(const int out_size, T* d_in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, - const int out_width, const int pad_front, - const int pad_top, const int pad_left, - const T* d_out_data) { - CUDA_KERNEL_LOOP(out_index, out_size) { - int nc = out_index / out_width; - const int out_w = out_index % out_width; - const int out_h = nc % out_height; - nc /= out_height; - const int out_d = nc % out_depth; - nc /= out_depth; - - int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; - int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; - int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; - - platform::CudaAtomicAdd( - &d_in_data[nc * in_depth * in_height * in_width + - in_d * in_height * in_width + in_h * in_width + in_w], - d_out_data[out_index]); - } -} - -template -__global__ void Pad3DGradCircularNDHWC(const int out_size, T* d_in_data, - const int num, const int channels, - const int in_depth, const int in_height, - const int in_width, const int out_depth, - const int out_height, - const int out_width, const int pad_front, - const int pad_top, const int pad_left, - const T* d_out_data) { - CUDA_KERNEL_LOOP(out_index, out_size) { - const int c = out_index % channels; - int n = out_index / channels; - const int out_w = n % out_width; - n /= out_width; - const int out_h = n % out_height; - n /= out_height; - const int out_d = n % out_depth; - n /= out_depth; - - int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; - int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; - int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; - - platform::CudaAtomicAdd( - &d_in_data[n * in_depth * in_height * in_width * channels + - in_d * in_height * in_width * channels + - in_h * in_width * channels + in_w * channels + c], - d_out_data[out_index]); - } -} - -static inline std::vector GetPaddings( - const framework::ExecutionContext& context) { - std::vector paddings(6); - auto* paddings_data = context.Input("Paddings"); - if (paddings_data) { - Tensor pads; - framework::TensorCopySync(*paddings_data, platform::CPUPlace(), &pads); - auto pads_data = pads.data(); - std::memcpy(paddings.data(), pads_data, paddings.size() * sizeof(int)); - } else { - auto pads = context.Attr>("paddings"); - std::copy(pads.begin(), pads.end(), paddings.data()); - } - return paddings; -} - -template -class Pad3dCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - std::vector pads = GetPaddings(context); - auto mode = context.Attr("mode"); - auto data_format = context.Attr("data_format"); - T value = static_cast(context.Attr("value")); - - auto* x = context.Input("X"); - auto in_dims = x->dims(); - const T* in_data = x->data(); - auto* out = context.Output("Out"); - auto out_dims = out->dims(); - if (data_format == "NCDHW") { - out_dims[0] = in_dims[0]; - out_dims[1] = in_dims[1]; - out_dims[2] = in_dims[2] + pads[4] + pads[5]; - out_dims[3] = in_dims[3] + pads[2] + pads[3]; - out_dims[4] = in_dims[4] + pads[0] + pads[1]; - } else { - out_dims[0] = in_dims[0]; - out_dims[1] = in_dims[1] + pads[4] + pads[5]; - out_dims[2] = in_dims[2] + pads[2] + pads[3]; - out_dims[3] = in_dims[3] + pads[0] + pads[1]; - out_dims[4] = in_dims[4]; - } - T* out_data = out->mutable_data(out_dims, context.GetPlace()); - - int channels = in_dims[1]; - int in_depth = in_dims[2]; - int in_height = in_dims[3]; - int in_width = in_dims[4]; - int out_depth = out_dims[2]; - int out_height = out_dims[3]; - int out_width = out_dims[4]; - if (data_format == "NDHWC") { - channels = in_dims[4]; - in_depth = in_dims[1]; - in_height = in_dims[2]; - in_width = in_dims[3]; - out_depth = out_dims[1]; - out_height = out_dims[2]; - out_width = out_dims[3]; - } - - if (mode == "reflect") { - PADDLE_ENFORCE_GT(in_depth, pads[4], - platform::errors::InvalidArgument( - "The depth of Input(X)'s dimension should be " - "greater than pad_front" - " in reflect mode" - ", but received depth(%d) and pad_front(%d).", - in_depth, pads[4])); - PADDLE_ENFORCE_GT(in_depth, pads[5], - platform::errors::InvalidArgument( - "The depth of Input(X)'s dimension should be " - "greater than pad_back" - " in reflect mode" - ", but received depth(%d) and pad_back(%d).", - in_depth, pads[5])); - - PADDLE_ENFORCE_GT(in_height, pads[2], - platform::errors::InvalidArgument( - "The height of Input(X)'s dimension should be " - "greater than pad_top" - " in reflect mode" - ", but received depth(%d) and pad_top(%d).", - in_height, pads[2])); - PADDLE_ENFORCE_GT(in_height, pads[3], - platform::errors::InvalidArgument( - "The height of Input(X)'s dimension should be " - "greater than pad_bottom" - " in reflect mode" - ", but received depth(%d) and pad_bottom(%d).", - in_height, pads[3])); - - PADDLE_ENFORCE_GT(in_width, pads[0], - platform::errors::InvalidArgument( - "The width of Input(X)'s dimension should be " - "greater than pad_left" - " in reflect mode" - ", but received depth(%d) and pad_left(%d).", - in_width, pads[0])); - PADDLE_ENFORCE_GT(in_width, pads[1], - platform::errors::InvalidArgument( - "The width of Input(X)'s dimension should be " - "greater than pad_right" - " in reflect mode" - ", but received depth(%d) and pad_right(%d).", - in_width, pads[1])); - } else if (mode == "circular" || mode == "replicate") { - PADDLE_ENFORCE_NE(in_depth * in_height * in_width, 0, - platform::errors::InvalidArgument( - "The input tensor size can not be 0 for circular " - "or replicate padding mode.")); - } - - const int pad_left = pads[0]; - const int pad_top = pads[2]; - const int pad_front = pads[4]; - const int num = in_dims[0]; - - auto stream = context.cuda_device_context().stream(); - int block = PADDLE_CUDA_NUM_THREADS; - const int out_size = out->numel(); - int grid = (out_size + block - 1) / block; - - if (data_format == "NCDHW") { - if (mode == "reflect") { - Pad3DReflectNCDHW<<>>( - out_size, in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - out_data); - } else if (mode == "replicate") { - Pad3DReplicateNCDHW<<>>( - out_size, in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - out_data); - } else if (mode == "circular") { - Pad3DCircularNCDHW<<>>( - out_size, in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - out_data); - } else { - Pad3DConstNCDHW<<>>( - out_size, in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - value, out_data); - } - } else { - if (mode == "reflect") { - Pad3DReflectNDHWC<<>>( - out_size, in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - out_data); - } else if (mode == "replicate") { - Pad3DReplicateNDHWC<<>>( - out_size, in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - out_data); - } else if (mode == "circular") { - Pad3DCircularNDHWC<<>>( - out_size, in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - out_data); - } else { - Pad3DConstNDHWC<<>>( - out_size, in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - value, out_data); - } - } - } -}; - -template -class Pad3dGradCUDAKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& context) const override { - std::vector pads = GetPaddings(context); - auto mode = context.Attr("mode"); - auto data_format = context.Attr("data_format"); - auto* d_out = context.Input(framework::GradVarName("Out")); - auto* d_in = context.Output(framework::GradVarName("X")); - auto d_in_dims = d_in->dims(); - auto d_out_dims = d_out->dims(); - const T* d_out_data = d_out->data(); - T* d_in_data = d_in->mutable_data(context.GetPlace()); - - phi::funcs::SetConstant set_zero; - set_zero(context.template device_context(), - d_in, static_cast(0)); - - const int pad_left = pads[0]; - const int pad_top = pads[2]; - const int pad_front = pads[4]; - - const int num = d_in_dims[0]; - - auto stream = context.cuda_device_context().stream(); - int block = PADDLE_CUDA_NUM_THREADS; - const int out_size = d_out->numel(); - const int in_size = d_in->numel(); - int grid = (out_size + block - 1) / block; - - if (data_format == "NCDHW") { - const int channels = d_in_dims[1]; - const int in_depth = d_in_dims[2]; - const int in_height = d_in_dims[3]; - const int in_width = d_in_dims[4]; - const int out_depth = d_out_dims[2]; - const int out_height = d_out_dims[3]; - const int out_width = d_out_dims[4]; - - if (mode == "reflect") { - Pad3DGradReflectNCDHW<<>>( - out_size, d_in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - d_out_data); - } else if (mode == "replicate") { - Pad3DGradReplicateNCDHW<<>>( - out_size, d_in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - d_out_data); - } else if (mode == "circular") { - Pad3DGradCircularNCDHW<<>>( - out_size, d_in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - d_out_data); - } else { - grid = (in_size + block - 1) / block; - Pad3DGradConstNCDHW<<>>( - in_size, d_in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - d_out_data); - } - } else { - const int channels = d_in_dims[4]; - const int in_depth = d_in_dims[1]; - const int in_height = d_in_dims[2]; - const int in_width = d_in_dims[3]; - const int out_depth = d_out_dims[1]; - const int out_height = d_out_dims[2]; - const int out_width = d_out_dims[3]; - if (mode == "reflect") { - Pad3DGradReflectNDHWC<<>>( - out_size, d_in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - d_out_data); - } else if (mode == "replicate") { - Pad3DGradReplicateNDHWC<<>>( - out_size, d_in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - d_out_data); - } else if (mode == "circular") { - Pad3DGradCircularNDHWC<<>>( - out_size, d_in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - d_out_data); - } else { - grid = (in_size + block - 1) / block; - Pad3DGradConstNDHWC<<>>( - in_size, d_in_data, num, channels, in_depth, in_height, in_width, - out_depth, out_height, out_width, pad_front, pad_top, pad_left, - d_out_data); - } - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_CUDA_KERNEL(pad3d, ops::Pad3dCUDAKernel, - ops::Pad3dCUDAKernel, - ops::Pad3dCUDAKernel, ops::Pad3dCUDAKernel, - ops::Pad3dCUDAKernel); -REGISTER_OP_CUDA_KERNEL(pad3d_grad, ops::Pad3dGradCUDAKernel, - ops::Pad3dGradCUDAKernel, - ops::Pad3dGradCUDAKernel); diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index 01186922b51af83e3e12dcd1f6b48c52c8fb8aa8..24bb2c9e65cde6909d5f934eb41b0a363044dde9 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -31,6 +31,7 @@ limitations under the License. */ #include "paddle/phi/common/data_type.h" #include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/dense_tensor.h" + namespace paddle { namespace pybind { @@ -62,10 +63,10 @@ int TensorDtype2NumpyDtype(phi::DataType dtype) { return pybind11::detail::npy_api::NPY_INT32_; case phi::DataType::INT64: return pybind11::detail::npy_api::NPY_INT64_; - case phi::DataType::FLOAT16: - return pybind11::detail::NPY_FLOAT16_; case phi::DataType::BFLOAT16: return pybind11::detail::NPY_UINT16_; + case phi::DataType::FLOAT16: + return pybind11::detail::NPY_FLOAT16_; case phi::DataType::FLOAT32: return pybind11::detail::npy_api::NPY_FLOAT_; case phi::DataType::FLOAT64: diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index bcbc8f5262ce33a5c994f5acb5c19406162109d3..7c5f38744f8923805d1e9b521c58813293cdce9b 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -877,6 +877,77 @@ void PadInferMeta(const MetaTensor& input, out->set_dtype(input.dtype()); } +void Pad3dInferMeta(const MetaTensor& x, + const ScalarArray& paddings_scalar_array, + const std::string& mode, + float value, + const std::string& data_format, + MetaTensor* out, + MetaConfig config) { + auto x_dim = x.dims(); + PADDLE_ENFORCE_EQ(x_dim.size(), + 5, + errors::InvalidArgument( + "The size of Input(X)'s dimension should be equal to " + "5, but received %d. ", + x_dim.size())); + + std::vector out_dims(x_dim.size()); + out_dims[0] = x_dim[0]; + if (paddings_scalar_array.FromTensor()) { + if (config.is_runtime) { + PADDLE_ENFORCE_EQ( + paddings_scalar_array.GetData().size(), + 6, + errors::InvalidArgument("Shape of Input(Paddings) should be equal to " + "[6], but received [%d].", + paddings_scalar_array.GetData().size())); + } + out_dims[1] = x_dim[1]; + out_dims[2] = x_dim[2]; + out_dims[3] = x_dim[3]; + } else { + auto paddings = paddings_scalar_array.GetData(); + + PADDLE_ENFORCE_EQ( + paddings.size(), + 6, + errors::InvalidArgument( + "Size of paddings should be equal to 6, but received %d.", + static_cast(paddings.size()))); + if (data_format == "NCDHW") { + out_dims[1] = x_dim[1]; // channel + out_dims[2] = ((!config.is_runtime) && (x_dim[2] < 0)) + ? x_dim[2] + : (x_dim[2] + paddings[4] + paddings[5]); // depth + + out_dims[3] = ((!config.is_runtime) && (x_dim[3] < 0)) + ? x_dim[3] + : (x_dim[3] + paddings[2] + paddings[3]); // height + + out_dims[4] = ((!config.is_runtime) && (x_dim[4] < 0)) + ? x_dim[4] + : (x_dim[4] + paddings[0] + paddings[1]); // width + } else { // NDHWC + out_dims[4] = x_dim[4]; // channel + + out_dims[1] = ((!config.is_runtime) && (x_dim[1] < 0)) + ? x_dim[1] + : (x_dim[1] + paddings[4] + paddings[5]); // depth + out_dims[2] = ((!config.is_runtime) && (x_dim[2] < 0)) + ? x_dim[2] + : (x_dim[2] + paddings[2] + paddings[3]); // height + out_dims[3] = ((!config.is_runtime) && (x_dim[3] < 0)) + ? x_dim[3] + : (x_dim[3] + paddings[0] + paddings[1]); // width + } + } + + out->set_dims(phi::make_ddim(out_dims)); + out->set_dtype(x.dtype()); + out->share_lod(x); +} + void PixelShuffleInferMeta(const MetaTensor& x, int upscale_factor, const std::string& data_format, diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h index 1b4ff7c69ac36f06042f5dc3678018c57aa1b0a1..d84283a65c4d19445dce61e9cf8ee6f70a83905f 100644 --- a/paddle/phi/infermeta/unary.h +++ b/paddle/phi/infermeta/unary.h @@ -147,6 +147,14 @@ void PadInferMeta(const MetaTensor& input, MetaTensor* out, MetaConfig config = MetaConfig()); +void Pad3dInferMeta(const MetaTensor& x, + const ScalarArray& paddings, + const std::string& mode, + float value, + const std::string& data_format, + MetaTensor* out, + MetaConfig config = MetaConfig()); + void PixelShuffleInferMeta(const MetaTensor& x, int upscale_factor, const std::string& data_format, diff --git a/paddle/phi/kernels/cpu/pad3d_grad_kernel.cc b/paddle/phi/kernels/cpu/pad3d_grad_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..b1adb3e206da97918dc69ee4694de1be525b382e --- /dev/null +++ b/paddle/phi/kernels/cpu/pad3d_grad_kernel.cc @@ -0,0 +1,480 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/pad3d_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +namespace phi { + +template +void ConstPad3DGradNCDHW(T* d_in_data, + const T* d_out_data, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + if (!(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || + in_h >= in_height || in_w >= in_width)) { + d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] = + d_out_data[out_d * out_height * out_width + out_h * out_width + out_w]; + } +} + +template +void ConstPad3DGradNDHWC(T* d_in_data, + const T* d_out_data, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + if (!(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || + in_h >= in_height || in_w >= in_width)) { + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + d_in_data[in_index + c] = d_out_data[out_index + c]; + } + } +} + +template +void ReflectPad3DGradNCDHW(T* d_in_data, + const T* d_out_data, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = std::max(in_d, -in_d); // reflect by 0 + in_d = std::min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth + in_h = std::max(in_h, -in_h); // reflect by 0 + in_h = std::min(in_h, 2 * in_height - in_h - 2); // reflect by in_height + in_w = std::max(in_w, -in_w); // reflect by 0 + in_w = std::min(in_w, 2 * in_width - in_w - 2); // reflect by in_width + + d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] += + d_out_data[out_d * out_height * out_width + out_h * out_width + out_w]; +} + +template +void ReflectPad3DGradNDHWC(T* d_in_data, + const T* d_out_data, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = std::max(in_d, -in_d); + in_d = std::min(in_d, 2 * in_depth - in_d - 2); + in_h = std::max(in_h, -in_h); + in_h = std::min(in_h, 2 * in_height - in_h - 2); + in_w = std::max(in_w, -in_w); + in_w = std::min(in_w, 2 * in_width - in_w - 2); + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + d_in_data[in_index + c] += d_out_data[out_index + c]; + } +} + +template +void ReplicatePad3DGradNCDHW(T* d_in_data, + const T* d_out_data, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w) { + int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0)); + int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); + int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); + + d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] += + d_out_data[out_d * out_height * out_width + out_h * out_width + out_w]; +} + +template +void ReplicatePad3DGradNDHWC(T* d_in_data, + const T* d_out_data, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w) { + int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0)); + int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); + int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + d_in_data[in_index + c] += d_out_data[out_index + c]; + } +} + +template +void CircularPad3DGradNCDHW(T* d_in_data, + const T* d_out_data, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w) { + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] += + d_out_data[out_d * out_height * out_width + out_h * out_width + out_w]; +} + +template +void CircularPad3DGradNDHWC(T* d_in_data, + const T* d_out_data, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w) { + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + d_in_data[in_index + c] += d_out_data[out_index + c]; + } +} + +template +void Pad3DGradNCDHW(T* d_in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const T* d_out_data, + void (*pad_func)(T*, + const T*, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int)) { + for (int n = 0; n < num; ++n) { + for (int c = 0; c < channels; ++c) { + for (int out_d = 0; out_d < out_depth; ++out_d) { + for (int out_h = 0; out_h < out_height; ++out_h) { + for (int out_w = 0; out_w < out_width; ++out_w) { + pad_func(d_in_data, + d_out_data, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + out_d, + out_h, + out_w); + } + } + } + d_in_data += in_depth * in_height * in_width; + d_out_data += out_depth * out_height * out_width; + } + } +} + +template +void Pad3DGradNDHWC(T* d_in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const T* d_out_data, + void (*pad_func)(T*, + const T*, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int)) { + for (int n = 0; n < num; ++n) { + for (int out_d = 0; out_d < out_depth; ++out_d) { + for (int out_h = 0; out_h < out_height; ++out_h) { + for (int out_w = 0; out_w < out_width; ++out_w) { + pad_func(d_in_data, + d_out_data, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + out_d, + out_h, + out_w); + } + } + } + d_in_data += in_depth * in_height * in_width * channels; + d_out_data += out_depth * out_height * out_width * channels; + } +} + +template +void Pad3dGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const ScalarArray& paddings, + const std::string& mode, + float pad_value, + const std::string& data_format, + DenseTensor* x_grad) { + std::vector pads = paddings.GetData(); + + auto* d_out = &out_grad; + auto* d_in = x_grad; + auto d_in_dims = d_in->dims(); + auto d_out_dims = d_out->dims(); + const T* d_out_data = d_out->data(); + T* d_in_data = dev_ctx.template Alloc(d_in); + phi::funcs::SetConstant()(dev_ctx, d_in, static_cast(0)); + + const int pad_left = pads[0]; + const int pad_top = pads[2]; + const int pad_front = pads[4]; + const int num = d_in_dims[0]; + if (data_format == "NCDHW") { + const int channels = d_in_dims[1]; + const int in_depth = d_in_dims[2]; + const int in_height = d_in_dims[3]; + const int in_width = d_in_dims[4]; + const int out_depth = d_out_dims[2]; + const int out_height = d_out_dims[3]; + const int out_width = d_out_dims[4]; + + std::map + func_map; + + func_map["reflect"] = ReflectPad3DGradNCDHW; + func_map["replicate"] = ReplicatePad3DGradNCDHW; + func_map["circular"] = CircularPad3DGradNCDHW; + func_map["constant"] = ConstPad3DGradNCDHW; + + Pad3DGradNCDHW(d_in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + d_out_data, + func_map[mode]); + } else { + const int channels = d_in_dims[4]; + const int in_depth = d_in_dims[1]; + const int in_height = d_in_dims[2]; + const int in_width = d_in_dims[3]; + const int out_depth = d_out_dims[1]; + const int out_height = d_out_dims[2]; + const int out_width = d_out_dims[3]; + + std::map + func_map; + + func_map["reflect"] = ReflectPad3DGradNDHWC; + func_map["replicate"] = ReplicatePad3DGradNDHWC; + func_map["circular"] = CircularPad3DGradNDHWC; + func_map["constant"] = ConstPad3DGradNDHWC; + + Pad3DGradNDHWC(d_in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + d_out_data, + func_map[mode]); + } +} +} // namespace phi + +PD_REGISTER_KERNEL( + pad3d_grad, CPU, ALL_LAYOUT, phi::Pad3dGradKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/pad3d_kernel.cc b/paddle/phi/kernels/cpu/pad3d_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..0dc01f485f3aa9ba6ff0b6d089887ff04847054c --- /dev/null +++ b/paddle/phi/kernels/cpu/pad3d_kernel.cc @@ -0,0 +1,578 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/pad3d_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void ConstPad3DFuncNCDHW(const T* in_data, + T* out_data, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w, + const T value) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + out_data[out_d * out_height * out_width + out_h * out_width + out_w] = + (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || + in_h >= in_height || in_w >= in_width) + ? value + : in_data[in_d * in_height * in_width + in_h * in_width + in_w]; +} + +template +void ConstPad3DFuncNDHWC(const T* in_data, + T* out_data, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w, + const T value) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + if (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || + in_h >= in_height || in_w >= in_width) { + for (int c = 0; c < channels; ++c) { + out_data[out_index + c] = value; + } + } else { + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + out_data[out_index + c] = in_data[in_index + c]; + } + } +} + +template +void ReflectPad3DFuncNCDHW(const T* in_data, + T* out_data, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w, + const T value) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = std::max(in_d, -in_d); // reflect by 0 + in_d = std::min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth + in_h = std::max(in_h, -in_h); // reflect by 0 + in_h = std::min(in_h, 2 * in_height - in_h - 2); // reflect by in_height + in_w = std::max(in_w, -in_w); // reflect by 0 + in_w = std::min(in_w, 2 * in_width - in_w - 2); // reflect by in_width + + out_data[out_d * out_height * out_width + out_h * out_width + out_w] = + in_data[in_d * in_height * in_width + in_h * in_width + in_w]; +} + +template +void ReflectPad3DFuncNDHWC(const T* in_data, + T* out_data, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w, + const T value) { + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = std::max(in_d, -in_d); + in_d = std::min(in_d, 2 * in_depth - in_d - 2); + in_h = std::max(in_h, -in_h); + in_h = std::min(in_h, 2 * in_height - in_h - 2); + in_w = std::max(in_w, -in_w); + in_w = std::min(in_w, 2 * in_width - in_w - 2); + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + out_data[out_index + c] = in_data[in_index + c]; + } +} + +template +void ReplicatePad3DFuncNCDHW(const T* in_data, + T* out_data, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w, + const T value) { + int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0)); + int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); + int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); + + out_data[out_d * out_height * out_width + out_h * out_width + out_w] = + in_data[in_d * in_height * in_width + in_h * in_width + in_w]; +} + +template +void ReplicatePad3DFuncNDHWC(const T* in_data, + T* out_data, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w, + const T value) { + int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0)); + int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0)); + int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0)); + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + out_data[out_index + c] = in_data[in_index + c]; + } +} + +template +void CircularPad3DFuncNCDHW(const T* in_data, + T* out_data, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w, + const T value) { + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + out_data[out_d * out_height * out_width + out_h * out_width + out_w] = + in_data[in_d * in_height * in_width + in_h * in_width + in_w]; +} + +template +void CircularPad3DFuncNDHWC(const T* in_data, + T* out_data, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const int out_d, + const int out_h, + const int out_w, + const T value) { + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + const int out_index = + (out_d * out_height * out_width + out_h * out_width + out_w) * channels; + const int in_index = + (in_d * in_height * in_width + in_h * in_width + in_w) * channels; + for (int c = 0; c < channels; ++c) { + out_data[out_index + c] = in_data[in_index + c]; + } +} + +template +void Pad3DNCDHW(const T* in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + T value, + T* out_data, + void (*pad_func)(const T*, + T*, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const T)) { + for (int n = 0; n < num; ++n) { + for (int c = 0; c < channels; ++c) { + for (int out_d = 0; out_d < out_depth; ++out_d) { + for (int out_h = 0; out_h < out_height; ++out_h) { + for (int out_w = 0; out_w < out_width; ++out_w) { + pad_func(in_data, + out_data, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + out_d, + out_h, + out_w, + value); + } + } + } + in_data += in_depth * in_height * in_width; + out_data += out_depth * out_height * out_width; + } + } +} + +template +void Pad3DNDHWC(const T* in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + T value, + T* out_data, + void (*pad_func)(const T*, + T*, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const int, + const T)) { + for (int n = 0; n < num; ++n) { + for (int out_d = 0; out_d < out_depth; ++out_d) { + for (int out_h = 0; out_h < out_height; ++out_h) { + for (int out_w = 0; out_w < out_width; ++out_w) { + pad_func(in_data, + out_data, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + out_d, + out_h, + out_w, + value); + } + } + } + in_data += in_depth * in_height * in_width * channels; + out_data += out_depth * out_height * out_width * channels; + } +} + +template +void Pad3dKernel(const Context& dev_ctx, + const DenseTensor& x, + const ScalarArray& paddings, + const std::string& mode, + float pad_value, + const std::string& data_format, + DenseTensor* out) { + T value = static_cast(pad_value); + std::vector pads = paddings.GetData(); + + auto in_dims = x.dims(); + const T* in_data = x.data(); + + if (data_format == "NCDHW") { + out->Resize({in_dims[0], + in_dims[1], + in_dims[2] + pads[4] + pads[5], + in_dims[3] + pads[2] + pads[3], + in_dims[4] + pads[0] + pads[1]}); + } else { + out->Resize({in_dims[0], + in_dims[1] + pads[4] + pads[5], + in_dims[2] + pads[2] + pads[3], + in_dims[3] + pads[0] + pads[1], + in_dims[4]}); + } + + auto out_dims = out->dims(); + T* out_data = dev_ctx.template Alloc(out); + + int channels = in_dims[1]; + int in_depth = in_dims[2]; + int in_height = in_dims[3]; + int in_width = in_dims[4]; + int out_depth = out_dims[2]; + int out_height = out_dims[3]; + int out_width = out_dims[4]; + if (data_format == "NDHWC") { + channels = in_dims[4]; + in_depth = in_dims[1]; + in_height = in_dims[2]; + in_width = in_dims[3]; + out_depth = out_dims[1]; + out_height = out_dims[2]; + out_width = out_dims[3]; + } + + if (mode == "reflect") { + PADDLE_ENFORCE_GT( + in_depth, + pads[4], + errors::InvalidArgument("The depth of Input(X)'s dimension should be " + "greater than pad_front" + " in reflect mode" + ", but received depth(%d) and pad_front(%d).", + in_depth, + pads[4])); + PADDLE_ENFORCE_GT( + in_depth, + pads[5], + errors::InvalidArgument("The depth of Input(X)'s dimension should be " + "greater than pad_back" + " in reflect mode" + ", but received depth(%d) and pad_back(%d).", + in_depth, + pads[5])); + + PADDLE_ENFORCE_GT( + in_height, + pads[2], + errors::InvalidArgument("The height of Input(X)'s dimension should be " + "greater than pad_top" + " in reflect mode" + ", but received depth(%d) and pad_top(%d).", + in_height, + pads[2])); + PADDLE_ENFORCE_GT( + in_height, + pads[3], + errors::InvalidArgument("The height of Input(X)'s dimension should be " + "greater than pad_bottom" + " in reflect mode" + ", but received depth(%d) and pad_bottom(%d).", + in_height, + pads[3])); + + PADDLE_ENFORCE_GT( + in_width, + pads[0], + errors::InvalidArgument("The width of Input(X)'s dimension should be " + "greater than pad_left" + " in reflect mode" + ", but received depth(%d) and pad_left(%d).", + in_width, + pads[0])); + PADDLE_ENFORCE_GT( + in_width, + pads[1], + errors::InvalidArgument("The width of Input(X)'s dimension should be " + "greater than pad_right" + " in reflect mode" + ", but received depth(%d) and pad_right(%d).", + in_width, + pads[1])); + } else if (mode == "circular" || mode == "replicate") { + PADDLE_ENFORCE_NE(in_depth * in_height * in_width, + 0, + errors::InvalidArgument( + "The input tensor size can not be 0 for circular " + "or replicate padding mode.")); + } + + const int pad_left = pads[0]; + const int pad_top = pads[2]; + const int pad_front = pads[4]; + const int num = in_dims[0]; + if (data_format == "NCDHW") { + std::map + func_map; + + func_map["reflect"] = ReflectPad3DFuncNCDHW; + func_map["replicate"] = ReplicatePad3DFuncNCDHW; + func_map["circular"] = CircularPad3DFuncNCDHW; + func_map["constant"] = ConstPad3DFuncNCDHW; + Pad3DNCDHW(in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + value, + out_data, + func_map[mode]); + } else { + std::map + func_map; + + func_map["reflect"] = ReflectPad3DFuncNDHWC; + func_map["replicate"] = ReplicatePad3DFuncNDHWC; + func_map["circular"] = CircularPad3DFuncNDHWC; + func_map["constant"] = ConstPad3DFuncNDHWC; + Pad3DNDHWC(in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + value, + out_data, + func_map[mode]); + } +} + +} // namespace phi + +PD_REGISTER_KERNEL( + pad3d, CPU, ALL_LAYOUT, phi::Pad3dKernel, float, double, int, int64_t) {} diff --git a/paddle/phi/kernels/gpu/pad3d_grad_kernel.cu b/paddle/phi/kernels/gpu/pad3d_grad_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..5ca8f3d73daded476052b77459bd68f2184ab290 --- /dev/null +++ b/paddle/phi/kernels/gpu/pad3d_grad_kernel.cu @@ -0,0 +1,507 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/pad3d_grad_kernel.h" + +#include "paddle/fluid/platform/device/gpu/gpu_info.h" +#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +namespace phi { + +using paddle::platform::PADDLE_CUDA_NUM_THREADS; + +template +__global__ void Pad3DGradConstNCDHW(const int in_size, + T* d_in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const T* d_out_data) { + CUDA_KERNEL_LOOP(in_index, in_size) { + const int in_w = in_index % in_width; + + int nc = in_index / in_width; + const int in_h = nc % in_height; + + nc /= in_height; + const int in_d = nc % in_depth; + + nc /= in_depth; + + const int out_d = in_d + pad_front; + const int out_h = in_h + pad_top; + const int out_w = in_w + pad_left; + d_in_data[in_index] = + d_out_data[nc * out_depth * out_height * out_width + + out_d * out_height * out_width + out_h * out_width + out_w]; + } +} + +template +__global__ void Pad3DGradConstNDHWC(const int in_size, + T* d_in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const T* d_out_data) { + CUDA_KERNEL_LOOP(in_index, in_size) { + const int c = in_index % channels; + int n = in_index / channels; + + const int in_w = n % in_width; + n /= in_width; + + const int in_h = n % in_height; + n /= in_height; + + const int in_d = n % in_depth; + n /= in_depth; + + const int out_d = in_d + pad_front; + const int out_h = in_h + pad_top; + const int out_w = in_w + pad_left; + + d_in_data[in_index] = + d_out_data[n * out_depth * out_height * out_width * channels + + out_d * out_height * out_width * channels + + out_h * out_width * channels + out_w * channels + c]; + } +} + +template +__global__ void Pad3DGradReflectNCDHW(const int out_size, + T* d_in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const T* d_out_data) { + CUDA_KERNEL_LOOP(out_index, out_size) { + int nc = out_index / out_width; + const int out_w = out_index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = max(in_d, -in_d); + in_h = max(in_h, -in_h); + in_w = max(in_w, -in_w); + + in_d = min(in_d, 2 * in_depth - in_d - 2); + in_h = min(in_h, 2 * in_height - in_h - 2); + in_w = min(in_w, 2 * in_width - in_w - 2); + + paddle::platform::CudaAtomicAdd( + &d_in_data[nc * in_depth * in_height * in_width + + in_d * in_height * in_width + in_h * in_width + in_w], + d_out_data[out_index]); + } +} + +template +__global__ void Pad3DGradReflectNDHWC(const int out_size, + T* d_in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const T* d_out_data) { + CUDA_KERNEL_LOOP(out_index, out_size) { + const int c = out_index % channels; + int n = out_index / channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = max(in_d, -in_d); + in_h = max(in_h, -in_h); + in_w = max(in_w, -in_w); + + in_d = min(in_d, in_depth * 2 - in_d - 2); + in_h = min(in_h, in_height * 2 - in_h - 2); + in_w = min(in_w, in_width * 2 - in_w - 2); + paddle::platform::CudaAtomicAdd( + &d_in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c], + d_out_data[out_index]); + } +} + +template +__global__ void Pad3DGradReplicateNCDHW(const int out_size, + T* d_in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const T* d_out_data) { + CUDA_KERNEL_LOOP(out_index, out_size) { + int nc = out_index / out_width; + const int out_w = out_index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + const int in_d = min(in_depth - 1, max(out_d - pad_front, 0)); + const int in_h = min(in_height - 1, max(out_h - pad_top, 0)); + const int in_w = min(in_width - 1, max(out_w - pad_left, 0)); + + paddle::platform::CudaAtomicAdd( + &d_in_data[nc * in_depth * in_height * in_width + + in_d * in_height * in_width + in_h * in_width + in_w], + d_out_data[out_index]); + } +} + +template +__global__ void Pad3DGradReplicateNDHWC(const int out_size, + T* d_in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const T* d_out_data) { + CUDA_KERNEL_LOOP(out_index, out_size) { + const int c = out_index % channels; + int n = out_index / channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + + const int in_d = min(in_depth - 1, max(out_d - pad_front, 0)); + const int in_h = min(in_height - 1, max(out_h - pad_top, 0)); + const int in_w = min(in_width - 1, max(out_w - pad_left, 0)); + + paddle::platform::CudaAtomicAdd( + &d_in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c], + d_out_data[out_index]); + } +} + +template +__global__ void Pad3DGradCircularNCDHW(const int out_size, + T* d_in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const T* d_out_data) { + CUDA_KERNEL_LOOP(out_index, out_size) { + int nc = out_index / out_width; + const int out_w = out_index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + paddle::platform::CudaAtomicAdd( + &d_in_data[nc * in_depth * in_height * in_width + + in_d * in_height * in_width + in_h * in_width + in_w], + d_out_data[out_index]); + } +} + +template +__global__ void Pad3DGradCircularNDHWC(const int out_size, + T* d_in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + const T* d_out_data) { + CUDA_KERNEL_LOOP(out_index, out_size) { + const int c = out_index % channels; + int n = out_index / channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + paddle::platform::CudaAtomicAdd( + &d_in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c], + d_out_data[out_index]); + } +} + +template +void Pad3dGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const ScalarArray& paddings, + const std::string& mode, + float pad_value, + const std::string& data_format, + DenseTensor* x_grad) { + std::vector pads = paddings.GetData(); + auto* d_out = &out_grad; + auto* d_in = x_grad; + auto d_in_dims = d_in->dims(); + auto d_out_dims = d_out->dims(); + const T* d_out_data = d_out->data(); + T* d_in_data = dev_ctx.template Alloc(d_in); + + phi::funcs::SetConstant()(dev_ctx, d_in, static_cast(0)); + + const int pad_left = pads[0]; + const int pad_top = pads[2]; + const int pad_front = pads[4]; + + const int num = d_in_dims[0]; + + auto stream = dev_ctx.stream(); + int block = PADDLE_CUDA_NUM_THREADS; + const int out_size = d_out->numel(); + const int in_size = d_in->numel(); + int grid = (out_size + block - 1) / block; + + if (data_format == "NCDHW") { + const int channels = d_in_dims[1]; + const int in_depth = d_in_dims[2]; + const int in_height = d_in_dims[3]; + const int in_width = d_in_dims[4]; + const int out_depth = d_out_dims[2]; + const int out_height = d_out_dims[3]; + const int out_width = d_out_dims[4]; + + if (mode == "reflect") { + Pad3DGradReflectNCDHW<<>>(out_size, + d_in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + d_out_data); + } else if (mode == "replicate") { + Pad3DGradReplicateNCDHW<<>>(out_size, + d_in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + d_out_data); + } else if (mode == "circular") { + Pad3DGradCircularNCDHW<<>>(out_size, + d_in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + d_out_data); + } else { + grid = (in_size + block - 1) / block; + Pad3DGradConstNCDHW<<>>(in_size, + d_in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + d_out_data); + } + } else { + const int channels = d_in_dims[4]; + const int in_depth = d_in_dims[1]; + const int in_height = d_in_dims[2]; + const int in_width = d_in_dims[3]; + const int out_depth = d_out_dims[1]; + const int out_height = d_out_dims[2]; + const int out_width = d_out_dims[3]; + if (mode == "reflect") { + Pad3DGradReflectNDHWC<<>>(out_size, + d_in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + d_out_data); + } else if (mode == "replicate") { + Pad3DGradReplicateNDHWC<<>>(out_size, + d_in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + d_out_data); + } else if (mode == "circular") { + Pad3DGradCircularNDHWC<<>>(out_size, + d_in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + d_out_data); + } else { + grid = (in_size + block - 1) / block; + Pad3DGradConstNDHWC<<>>(in_size, + d_in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + d_out_data); + } + } +} + +} // namespace phi + +PD_REGISTER_KERNEL( + pad3d_grad, GPU, ALL_LAYOUT, phi::Pad3dGradKernel, float, double) {} diff --git a/paddle/phi/kernels/gpu/pad3d_kernel.cu b/paddle/phi/kernels/gpu/pad3d_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..2cef77cc0eef96d910d1b4f8c1b0ba736034063a --- /dev/null +++ b/paddle/phi/kernels/gpu/pad3d_kernel.cu @@ -0,0 +1,588 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/pad3d_kernel.h" + +#include + +#include "paddle/fluid/platform/device/gpu/gpu_info.h" +#include "paddle/fluid/platform/device/gpu/gpu_primitives.h" +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +using paddle::platform::PADDLE_CUDA_NUM_THREADS; + +template +__global__ void Pad3DConstNCDHW(const int nthreads, + const T* in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + T value, + T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int nc = index / out_width; + + const int out_w = index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + out_data[index] = + (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || + in_h >= in_height || in_w >= in_width) + ? value + : in_data[nc * in_depth * in_height * in_width + + in_d * in_height * in_width + in_h * in_width + in_w]; + } +} + +template +__global__ void Pad3DConstNDHWC(const int nthreads, + const T* in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + T value, + T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int n = index / channels; + const int c = index % channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + const int in_d = out_d - pad_front; + const int in_h = out_h - pad_top; + const int in_w = out_w - pad_left; + + out_data[index] = + (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth || + in_h >= in_height || in_w >= in_width) + ? value + : in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c]; + } +} + +template +__global__ void Pad3DReflectNCDHW(const int nthreads, + const T* in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int nc = index / out_width; + + const int out_w = index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = max(in_d, -in_d); // reflect by 0 + in_d = min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth + in_h = max(in_h, -in_h); // reflect by 0 + in_h = min(in_h, 2 * in_height - in_h - 2); // reflect by in_height + in_w = max(in_w, -in_w); // reflect by 0 + in_w = min(in_w, 2 * in_width - in_w - 2); // reflect by in_width + out_data[index] = + in_data[(nc * in_depth * in_height + in_d * in_height + in_h) * + in_width + + in_w]; + } +} + +template +__global__ void Pad3DReflectNDHWC(const int nthreads, + const T* in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int n = index / channels; + const int c = index % channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + int in_d = out_d - pad_front; + int in_h = out_h - pad_top; + int in_w = out_w - pad_left; + + in_d = max(in_d, -in_d); + in_d = min(in_d, 2 * in_depth - in_d - 2); + in_h = max(in_h, -in_h); + in_h = min(in_h, 2 * in_height - in_h - 2); + in_w = max(in_w, -in_w); + in_w = min(in_w, 2 * in_width - in_w - 2); + + out_data[index] = in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c]; + } +} + +template +__global__ void Pad3DReplicateNCDHW(const int nthreads, + const T* in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int nc = index / out_width; + + const int out_w = index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + int in_d = min(in_depth - 1, max(out_d - pad_front, 0)); + int in_h = min(in_height - 1, max(out_h - pad_top, 0)); + int in_w = min(in_width - 1, max(out_w - pad_left, 0)); + + out_data[index] = + in_data[(nc * in_depth * in_height + in_d * in_height + in_h) * + in_width + + in_w]; + } +} + +template +__global__ void Pad3DReplicateNDHWC(const int nthreads, + const T* in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int n = index / channels; + const int c = index % channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + + int in_d = min(in_depth - 1, max(out_d - pad_front, 0)); + int in_h = min(in_height - 1, max(out_h - pad_top, 0)); + int in_w = min(in_width - 1, max(out_w - pad_left, 0)); + + out_data[index] = in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c]; + } +} + +template +__global__ void Pad3DCircularNCDHW(const int nthreads, + const T* in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int nc = index / out_width; + + const int out_w = index % out_width; + const int out_h = nc % out_height; + nc /= out_height; + const int out_d = nc % out_depth; + nc /= out_depth; + + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + out_data[index] = + in_data[(nc * in_depth * in_height + in_d * in_height + in_h) * + in_width + + in_w]; + } +} + +template +__global__ void Pad3DCircularNDHWC(const int nthreads, + const T* in_data, + const int num, + const int channels, + const int in_depth, + const int in_height, + const int in_width, + const int out_depth, + const int out_height, + const int out_width, + const int pad_front, + const int pad_top, + const int pad_left, + T* out_data) { + CUDA_KERNEL_LOOP(index, nthreads) { + int n = index / channels; + const int c = index % channels; + const int out_w = n % out_width; + n /= out_width; + const int out_h = n % out_height; + n /= out_height; + const int out_d = n % out_depth; + n /= out_depth; + + int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth; + int in_h = ((out_h - pad_top) % in_height + in_height) % in_height; + int in_w = ((out_w - pad_left) % in_width + in_width) % in_width; + + out_data[index] = in_data[n * in_depth * in_height * in_width * channels + + in_d * in_height * in_width * channels + + in_h * in_width * channels + in_w * channels + c]; + } +} + +template +void Pad3dKernel(const Context& dev_ctx, + const DenseTensor& x, + const ScalarArray& paddings, + const std::string& mode, + float pad_value, + const std::string& data_format, + DenseTensor* out) { + std::vector pads = paddings.GetData(); + + auto in_dims = x.dims(); + const T* in_data = x.data(); + auto out_dims = out->dims(); + T value = static_cast(pad_value); + + if (data_format == "NCDHW") { + out_dims[0] = in_dims[0]; + out_dims[1] = in_dims[1]; + out_dims[2] = in_dims[2] + pads[4] + pads[5]; + out_dims[3] = in_dims[3] + pads[2] + pads[3]; + out_dims[4] = in_dims[4] + pads[0] + pads[1]; + } else { + out_dims[0] = in_dims[0]; + out_dims[1] = in_dims[1] + pads[4] + pads[5]; + out_dims[2] = in_dims[2] + pads[2] + pads[3]; + out_dims[3] = in_dims[3] + pads[0] + pads[1]; + out_dims[4] = in_dims[4]; + } + out->Resize(out_dims); + T* out_data = dev_ctx.template Alloc(out); + + int channels = in_dims[1]; + int in_depth = in_dims[2]; + int in_height = in_dims[3]; + int in_width = in_dims[4]; + int out_depth = out_dims[2]; + int out_height = out_dims[3]; + int out_width = out_dims[4]; + if (data_format == "NDHWC") { + channels = in_dims[4]; + in_depth = in_dims[1]; + in_height = in_dims[2]; + in_width = in_dims[3]; + out_depth = out_dims[1]; + out_height = out_dims[2]; + out_width = out_dims[3]; + } + + if (mode == "reflect") { + PADDLE_ENFORCE_GT( + in_depth, + pads[4], + errors::InvalidArgument("The depth of Input(X)'s dimension should be " + "greater than pad_front" + " in reflect mode" + ", but received depth(%d) and pad_front(%d).", + in_depth, + pads[4])); + PADDLE_ENFORCE_GT( + in_depth, + pads[5], + errors::InvalidArgument("The depth of Input(X)'s dimension should be " + "greater than pad_back" + " in reflect mode" + ", but received depth(%d) and pad_back(%d).", + in_depth, + pads[5])); + + PADDLE_ENFORCE_GT( + in_height, + pads[2], + errors::InvalidArgument("The height of Input(X)'s dimension should be " + "greater than pad_top" + " in reflect mode" + ", but received depth(%d) and pad_top(%d).", + in_height, + pads[2])); + PADDLE_ENFORCE_GT( + in_height, + pads[3], + errors::InvalidArgument("The height of Input(X)'s dimension should be " + "greater than pad_bottom" + " in reflect mode" + ", but received depth(%d) and pad_bottom(%d).", + in_height, + pads[3])); + + PADDLE_ENFORCE_GT( + in_width, + pads[0], + errors::InvalidArgument("The width of Input(X)'s dimension should be " + "greater than pad_left" + " in reflect mode" + ", but received depth(%d) and pad_left(%d).", + in_width, + pads[0])); + PADDLE_ENFORCE_GT( + in_width, + pads[1], + errors::InvalidArgument("The width of Input(X)'s dimension should be " + "greater than pad_right" + " in reflect mode" + ", but received depth(%d) and pad_right(%d).", + in_width, + pads[1])); + } else if (mode == "circular" || mode == "replicate") { + PADDLE_ENFORCE_NE(in_depth * in_height * in_width, + 0, + errors::InvalidArgument( + "The input tensor size can not be 0 for circular " + "or replicate padding mode.")); + } + + const int pad_left = pads[0]; + const int pad_top = pads[2]; + const int pad_front = pads[4]; + const int num = in_dims[0]; + + auto stream = dev_ctx.stream(); + int block = PADDLE_CUDA_NUM_THREADS; + const int out_size = out->numel(); + int grid = (out_size + block - 1) / block; + + if (data_format == "NCDHW") { + if (mode == "reflect") { + Pad3DReflectNCDHW<<>>(out_size, + in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + out_data); + } else if (mode == "replicate") { + Pad3DReplicateNCDHW<<>>(out_size, + in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + out_data); + } else if (mode == "circular") { + Pad3DCircularNCDHW<<>>(out_size, + in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + out_data); + } else { + Pad3DConstNCDHW<<>>(out_size, + in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + value, + out_data); + } + } else { + if (mode == "reflect") { + Pad3DReflectNDHWC<<>>(out_size, + in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + out_data); + } else if (mode == "replicate") { + Pad3DReplicateNDHWC<<>>(out_size, + in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + out_data); + } else if (mode == "circular") { + Pad3DCircularNDHWC<<>>(out_size, + in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + out_data); + } else { + Pad3DConstNDHWC<<>>(out_size, + in_data, + num, + channels, + in_depth, + in_height, + in_width, + out_depth, + out_height, + out_width, + pad_front, + pad_top, + pad_left, + value, + out_data); + } + } +} + +} // namespace phi + +PD_REGISTER_KERNEL(pad3d, + GPU, + ALL_LAYOUT, + phi::Pad3dKernel, + phi::dtype::float16, + float, + double, + int, + int64_t) {} diff --git a/paddle/phi/kernels/pad3d_grad_kernel.h b/paddle/phi/kernels/pad3d_grad_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..38f1e5335e8c240058fb3b52a8ae59a0c438b61c --- /dev/null +++ b/paddle/phi/kernels/pad3d_grad_kernel.h @@ -0,0 +1,32 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/common/scalar_array.h" +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void Pad3dGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + const ScalarArray& paddings, + const std::string& mode, + float pad_value, + const std::string& data_format, + DenseTensor* x_grad); + +} // namespace phi diff --git a/paddle/phi/kernels/pad3d_kernel.h b/paddle/phi/kernels/pad3d_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..d8876c3e7bc74f6f03413f00279bfaa355907c6b --- /dev/null +++ b/paddle/phi/kernels/pad3d_kernel.h @@ -0,0 +1,31 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/common/scalar_array.h" +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void Pad3dKernel(const Context& dev_ctx, + const DenseTensor& x, + const ScalarArray& paddings, + const std::string& mode, + float pad_value, + const std::string& data_format, + DenseTensor* out); + +} // namespace phi diff --git a/paddle/phi/ops/compat/pad3d_sig.cc b/paddle/phi/ops/compat/pad3d_sig.cc new file mode 100644 index 0000000000000000000000000000000000000000..c43b98fa27e6baef55ad1dcbc11cb764ba9cb944 --- /dev/null +++ b/paddle/phi/ops/compat/pad3d_sig.cc @@ -0,0 +1,45 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/core/compat/op_utils.h" + +namespace phi { + +KernelSignature Pad3dOpArgumentMapping(const ArgumentMappingContext& ctx) { + if (ctx.HasInput("Paddings")) { + return KernelSignature( + "pad3d", {"X"}, {"Paddings", "mode", "value", "data_format"}, {"Out"}); + } + + return KernelSignature( + "pad3d", {"X"}, {"paddings", "mode", "value", "data_format"}, {"Out"}); +} + +KernelSignature Pad3dGradOpArgumentMapping(const ArgumentMappingContext& ctx) { + if (ctx.HasInput("Paddings")) { + return KernelSignature("pad3d_grad", + {"X", GradVarName("Out")}, + {"Paddings", "mode", "value", "data_format"}, + {GradVarName("X")}); + } + return KernelSignature("pad3d_grad", + {"X", GradVarName("Out")}, + {"paddings", "mode", "value", "data_format"}, + {GradVarName("X")}); +} + +} // namespace phi + +PD_REGISTER_ARG_MAPPING_FN(pad3d, phi::Pad3dOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(pad3d_grad, phi::Pad3dGradOpArgumentMapping); diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index 9439982858530e1e81156be4b32ef2d91dc4a33a..b4c5a36d288b7ee0f6e771d72b21bd54d1e3d669 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -612,7 +612,7 @@ def grad(outputs, if no_grad_vars is None: no_grad_vars = [] - elif isinstance(no_grad_vars, core.VarBase): + elif isinstance(no_grad_vars, (core.VarBase, core.eager.Tensor)): no_grad_vars = [no_grad_vars] elif isinstance(no_grad_vars, core.eager.Tensor): no_grad_vars = [no_grad_vars] @@ -718,13 +718,13 @@ def to_variable(value, name=None, zero_copy=None, dtype=None): y.shape # [3L, 2L] """ - support_type = (list, tuple, np.ndarray, core.VarBase, framework.Variable, - core.Tensor, core.LoDTensor) + support_type = (list, tuple, np.ndarray, core.eager.Tensor, core.VarBase, + framework.Variable, core.Tensor, core.LoDTensor) if not isinstance(value, support_type): raise TypeError( "The type of 'value' in fluid.dygraph.to_variable must be %s, but received %s." % (support_type, type(value))) - if isinstance(value, (core.VarBase, framework.Variable)): + if isinstance(value, (core.eager.Tensor, core.VarBase, framework.Variable)): return value elif isinstance(value, (core.Tensor, core.LoDTensor)): return core.VarBase(value) diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 2b67a2029727f6b8f917239094a1b906d5cd6a62..af30b2b2444b44f1b27e8f277eb380557255517d 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -28,6 +28,7 @@ from .math_op_patch import monkey_patch_math_varbase from .parallel import scale_loss from paddle.fluid.data_feeder import convert_dtype, _PADDLE_DTYPE_2_NUMPY_DTYPE import paddle.utils.deprecated as deprecated +from paddle import _C_ops class TensorHookRemoveHelper(object): @@ -782,7 +783,7 @@ def monkey_patch_varbase(): @framework.dygraph_only def clone(self): - return _C_ops_.assign(self) + return _C_ops.assign(self) @framework.dygraph_only def value(self): diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index d0a94238a7aeb21f9d1baf8154cbe3b7f2b77a72..fb787215d910e9924622147b86c328af5e1994de 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -316,7 +316,8 @@ def _dygraph_not_support_(func): def _dygraph_only_(func): def __impl__(*args, **kwargs): - assert in_dygraph_mode( + assert ( + in_dygraph_mode() or _in_eager_mode() ), "We only support '%s()' in dynamic graph mode, please call 'paddle.disable_static()' to enter dynamic graph mode." % func.__name__ return func(*args, **kwargs) diff --git a/python/paddle/fluid/tests/test_detection.py b/python/paddle/fluid/tests/test_detection.py index 9348b0b50a1c08e7103dc3cc32169f4a6a40591c..c45045509201df89d6a07b8c0aadc7ef9130cf2f 100644 --- a/python/paddle/fluid/tests/test_detection.py +++ b/python/paddle/fluid/tests/test_detection.py @@ -886,6 +886,7 @@ class TestDistributeFpnProposals(LayerTest): refer_level=4, refer_scale=224, rois_num=rois_num_dy) + print(type(multi_rois_dy)) output_dy = multi_rois_dy + [restore_ind_dy] + rois_num_per_level_dy output_dy_np = [] for output in output_dy: @@ -973,4 +974,5 @@ class TestBoxDecoderAndAssign(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/distribution/test_distribution_expfamily.py b/python/paddle/fluid/tests/unittests/distribution/test_distribution_expfamily.py index cc2e14d6d6c2ef237351e372c75ca7e700de3fbf..341ec852c52197f689870f0a6c45141ebe318301 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_distribution_expfamily.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_distribution_expfamily.py @@ -50,3 +50,7 @@ class TestExponentialFamilyException(unittest.TestCase): def test_entropy_exception(self): with self.assertRaises(NotImplementedError): paddle.distribution.ExponentialFamily.entropy(self.dist) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/distribution/test_kl.py b/python/paddle/fluid/tests/unittests/distribution/test_kl.py index a1413722446e287688d7e120a3ef31ea67cc798b..55358380c8b23fdfd512b259aca06901d5623e38 100644 --- a/python/paddle/fluid/tests/unittests/distribution/test_kl.py +++ b/python/paddle/fluid/tests/unittests/distribution/test_kl.py @@ -112,3 +112,7 @@ class TestKLExpfamilyExpFamily(unittest.TestCase): kl._kl_expfamily_expfamily(self.p, self.q), rtol=config.RTOL.get(config.DEFAULT_DTYPE), atol=config.ATOL.get(config.DEFAULT_DTYPE)) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/sequence/test_sequence_concat.py b/python/paddle/fluid/tests/unittests/sequence/test_sequence_concat.py index 737c085dde6acf5e3645b2127f42b1d8b5a7aa1d..34b6f6dc8e5453b42e10c45d5423c6e17d2d0506 100644 --- a/python/paddle/fluid/tests/unittests/sequence/test_sequence_concat.py +++ b/python/paddle/fluid/tests/unittests/sequence/test_sequence_concat.py @@ -20,6 +20,7 @@ import sys sys.path.append("../") from op_test import OpTest +import paddle from paddle import fluid @@ -115,4 +116,5 @@ class TestSequenceConcatOpError(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_Tensor_type.py b/python/paddle/fluid/tests/unittests/test_Tensor_type.py index 59395b94279ea7ec4fe43221deede7e82be8f38e..f1427d29782b969d9571f79c9a7bc62bf4e77070 100644 --- a/python/paddle/fluid/tests/unittests/test_Tensor_type.py +++ b/python/paddle/fluid/tests/unittests/test_Tensor_type.py @@ -39,6 +39,7 @@ class TensorTypeTest(unittest.TestCase): tensorx = paddle.tensor.logic.Tensor(inx) typex_str = str(type(tensorx)) + expectx = "" self.assertEqual((typex_str == expectx), True) diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index ecac22553cbcda7cc2dae179603f407eddc8652a..d05c9a3c313bb634effd9280e3d9503142166ee4 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -1202,4 +1202,5 @@ class TestMultiTensorAdam(unittest.TestCase): if __name__ == "__main__": + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py index b440e745b1082e98a832ea076cc052cbc106eeab..789cfa82658f43d2adb148fe41fd2fb380e96fba 100644 --- a/python/paddle/fluid/tests/unittests/test_base_layer.py +++ b/python/paddle/fluid/tests/unittests/test_base_layer.py @@ -451,4 +451,5 @@ class TestLayerTo(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_create_parameter.py b/python/paddle/fluid/tests/unittests/test_create_parameter.py index 763fb64816c9c66055b3ead2886e4ba29e0406f7..199558acd4ef64f4d63c04920ba0b0e0295df96c 100644 --- a/python/paddle/fluid/tests/unittests/test_create_parameter.py +++ b/python/paddle/fluid/tests/unittests/test_create_parameter.py @@ -18,6 +18,7 @@ import numpy as np import paddle.fluid as fluid from paddle.fluid import Program, program_guard from paddle.fluid import ParamAttr, initializer +import paddle class TestCreateParameterError(unittest.TestCase): @@ -50,4 +51,5 @@ class TestCreateParameterError(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_ctc_align.py b/python/paddle/fluid/tests/unittests/test_ctc_align.py index f5934debfd7b663b24a0949012ea2aa85e07ece8..ffc5bc184efc222d3adb57e158814c0f592b9405 100644 --- a/python/paddle/fluid/tests/unittests/test_ctc_align.py +++ b/python/paddle/fluid/tests/unittests/test_ctc_align.py @@ -20,6 +20,7 @@ import numpy as np from op_test import OpTest from test_softmax_op import stable_softmax import paddle.fluid as fluid +import paddle def CTCAlign(input, lod, blank, merge_repeated, padding=0, input_length=None): @@ -229,4 +230,5 @@ class BadInputTestCTCAlignr(unittest.TestCase): if __name__ == "__main__": + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_diff_op.py b/python/paddle/fluid/tests/unittests/test_diff_op.py index 1ae780f488d2dc6bf37f88505a67723ea867dd94..4a96827bd7c3c56320a58261abe1824786164d10 100644 --- a/python/paddle/fluid/tests/unittests/test_diff_op.py +++ b/python/paddle/fluid/tests/unittests/test_diff_op.py @@ -211,4 +211,5 @@ class TestDiffOpPreAppendAxis(TestDiffOp): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py b/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py index f95546f15f0024ccd8b7cd8464f0a8eb70662d8d..27d82fcc8903be20a378a45e0f4f3b01aa3d3bb7 100644 --- a/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py +++ b/python/paddle/fluid/tests/unittests/test_dygraph_weight_norm.py @@ -190,4 +190,5 @@ class TestDygraphRemoveWeightNorm(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_exponential_op.py b/python/paddle/fluid/tests/unittests/test_exponential_op.py index ccbc0a1676302b4c29b524601930cc855847e0fc..7a3ae203be62d644f076ae9b6bc2bf5b8641ccdf 100644 --- a/python/paddle/fluid/tests/unittests/test_exponential_op.py +++ b/python/paddle/fluid/tests/unittests/test_exponential_op.py @@ -209,4 +209,5 @@ class TestExponentialAPI(unittest.TestCase): if __name__ == "__main__": + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fmin_op.py b/python/paddle/fluid/tests/unittests/test_fmin_op.py index 5cdf096be6708c47dd1f56dc97243be70c6d63d5..7231823c375324aa7bbf7d45db14b4457ca4a8dd 100644 --- a/python/paddle/fluid/tests/unittests/test_fmin_op.py +++ b/python/paddle/fluid/tests/unittests/test_fmin_op.py @@ -189,3 +189,8 @@ class TestElementwiseFmin2Op(OpTest): """test_check_grad_ingore_y""" self.check_grad( ['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y')) + + +if __name__ == "__main__": + paddle.enable_static() + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_initializer.py b/python/paddle/fluid/tests/unittests/test_initializer.py index bff10c9c4ca26d342a6849a0b23a490058d6b7f7..8dc822c69b2c5df34968fbcd39b8d8438700add2 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer.py +++ b/python/paddle/fluid/tests/unittests/test_initializer.py @@ -1025,4 +1025,5 @@ class TestDiracInitializer3(TestDiracInitializer1): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_inner.py b/python/paddle/fluid/tests/unittests/test_inner.py index de9decd0b8961115b7ee2e6dac44bfb40fcc5c1f..ff9f15ebbfc8204de042d7731ed94035152f46eb 100644 --- a/python/paddle/fluid/tests/unittests/test_inner.py +++ b/python/paddle/fluid/tests/unittests/test_inner.py @@ -163,4 +163,5 @@ class TestMultiplyError(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_io_save_load.py b/python/paddle/fluid/tests/unittests/test_io_save_load.py index 89ca28510b9b929b1fe36e0c9883da020e71555c..83aadbf68d569f904d56abfcab91236bd637095b 100644 --- a/python/paddle/fluid/tests/unittests/test_io_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_io_save_load.py @@ -88,4 +88,5 @@ class TestWhenTrainWithNoGrad(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_isclose_op.py b/python/paddle/fluid/tests/unittests/test_isclose_op.py index aa39284d11349eed027a1a496ce6d8b2b5e92e3d..2bb58d7c5741f2655bdcbffecedf8762704c07f3 100644 --- a/python/paddle/fluid/tests/unittests/test_isclose_op.py +++ b/python/paddle/fluid/tests/unittests/test_isclose_op.py @@ -210,6 +210,9 @@ class TestIscloseOpFloat64(TestIscloseOp): self.atol = np.array([0]).astype("float64") self.equal_nan = False + def test_check_output(self): + self.check_output() + class TestIscloseOpLargeDimInput(TestIscloseOp): def set_args(self): @@ -222,4 +225,5 @@ class TestIscloseOpLargeDimInput(TestIscloseOp): if __name__ == "__main__": + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_log_softmax.py b/python/paddle/fluid/tests/unittests/test_log_softmax.py index 16f954708d4d4149f46a18cfd48e35dfbe147153..423eeaf3ada45e7d04dca3512bdba0b067583222 100644 --- a/python/paddle/fluid/tests/unittests/test_log_softmax.py +++ b/python/paddle/fluid/tests/unittests/test_log_softmax.py @@ -175,4 +175,5 @@ class TestNNFunctionalLogSoftmaxAPI(unittest.TestCase): if __name__ == "__main__": + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py index 6d94144fc7788d0dc79cfb10f97667a257621a04..60dd4948f996e505f59d7e12b92569000843c528 100644 --- a/python/paddle/fluid/tests/unittests/test_lr_scheduler.py +++ b/python/paddle/fluid/tests/unittests/test_lr_scheduler.py @@ -555,4 +555,5 @@ class TestLRScheduler(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_mean_iou.py b/python/paddle/fluid/tests/unittests/test_mean_iou.py index e2e118ac9e3b46499055c2dd46755d5401d5abd5..4e89a9034a341777f09958d9709b64a12020ec28 100644 --- a/python/paddle/fluid/tests/unittests/test_mean_iou.py +++ b/python/paddle/fluid/tests/unittests/test_mean_iou.py @@ -19,6 +19,7 @@ import unittest import numpy as np from op_test import OpTest import paddle.fluid as fluid +import paddle def compute_mean_iou(predictions, labels, num_classes, in_wrongs, in_corrects, @@ -129,4 +130,5 @@ class TestMeanIOUOpError(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_minus_op.py b/python/paddle/fluid/tests/unittests/test_minus_op.py index 54253b17b967871b03628023c5a9fdb339af1828..461ff6a9273cdb39c73901da3f77fca021335f0c 100644 --- a/python/paddle/fluid/tests/unittests/test_minus_op.py +++ b/python/paddle/fluid/tests/unittests/test_minus_op.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np from op_test import OpTest +import paddle class TestMinusOp(OpTest): @@ -36,4 +37,5 @@ class TestMinusOp(OpTest): if __name__ == "__main__": + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_norm_all.py b/python/paddle/fluid/tests/unittests/test_norm_all.py index 575bc653618a583e883783cd1fffe1db371eccff..ef912699455d1b4ea2aa2899f20d0e2e09634f77 100644 --- a/python/paddle/fluid/tests/unittests/test_norm_all.py +++ b/python/paddle/fluid/tests/unittests/test_norm_all.py @@ -588,4 +588,5 @@ class API_NormTest(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py index 9e0cf6ddef2d619e4d3b32260f7ddf5f31186ae5..8945d35c131fd8de89e2a421bbbd4b16aa01c9d8 100644 --- a/python/paddle/fluid/tests/unittests/test_paddle_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_paddle_save_load.py @@ -315,7 +315,9 @@ class TestSaveLoadAny(unittest.TestCase): paddle.save(tensor, path) t_dygraph = paddle.load(path) np_dygraph = paddle.load(path, return_numpy=True) - self.assertTrue(isinstance(t_dygraph, paddle.fluid.core.VarBase)) + self.assertTrue( + isinstance(t_dygraph, (paddle.fluid.core.VarBase, + paddle.fluid.core.eager.Tensor))) self.assertTrue(np.array_equal(tensor.numpy(), np_dygraph)) self.assertTrue(np.array_equal(tensor.numpy(), t_dygraph.numpy())) paddle.enable_static() @@ -685,27 +687,34 @@ class TestSaveLoadAny(unittest.TestCase): np.array(v), np.array(load_tensor2['k2'][k]))) self.assertTrue(load_tensor2['epoch'] == 123) - self.assertTrue(isinstance(load_tensor3[0], fluid.core.VarBase)) + self.assertTrue( + isinstance(load_tensor3[0], (fluid.core.VarBase, + fluid.core.eager.Tensor))) self.assertTrue(np.array_equal(load_tensor3[0].numpy(), obj3[0])) - self.assertTrue(isinstance(load_tensor3[1], fluid.core.VarBase)) + self.assertTrue( + isinstance(load_tensor3[1], (fluid.core.VarBase, + fluid.core.eager.Tensor))) self.assertTrue(np.array_equal(load_tensor3[1].numpy(), obj3[1])) for k, v in state_dict.items(): self.assertTrue( - isinstance(load_tensor3[2]["state_dict"][k], - fluid.core.VarBase)) + isinstance(load_tensor3[2]["state_dict"][k], ( + fluid.core.VarBase, fluid.core.eager.Tensor))) self.assertTrue( np.array_equal(load_tensor3[2]["state_dict"][k].numpy(), np.array(v))) for k, v in state_dict.items(): self.assertTrue( - isinstance(load_tensor3[2]["opt"][k], fluid.core.VarBase)) + isinstance(load_tensor3[2]["opt"][k], ( + fluid.core.VarBase, fluid.core.eager.Tensor))) self.assertTrue( np.array_equal(load_tensor3[2]["opt"][k].numpy(), np.array(v))) - self.assertTrue(isinstance(load_tensor4[0], fluid.core.VarBase)) + self.assertTrue( + isinstance(load_tensor4[0], (fluid.core.VarBase, + fluid.core.eager.Tensor))) self.assertTrue(np.array_equal(load_tensor4[0].numpy(), obj4[0])) load_array1 = paddle.load(path1, return_numpy=True) diff --git a/python/paddle/fluid/tests/unittests/test_renorm_op.py b/python/paddle/fluid/tests/unittests/test_renorm_op.py index 3ea2002a9786fdd3f6c034e84176d0cae46ca591..e00a892cf7197bc94d85e9082651e26a4bb3bbb9 100644 --- a/python/paddle/fluid/tests/unittests/test_renorm_op.py +++ b/python/paddle/fluid/tests/unittests/test_renorm_op.py @@ -54,7 +54,7 @@ class TestRenormAPI(unittest.TestCase): def test_dygraph_api(self): self.input_data() # case axis none - with fluid.dygraph.guard(): + with fluid.dygraph.guard(fluid.CPUPlace()): input = [[[2.0, 2, -2], [3, 0.3, 3]], [[2, -8, 2], [3.1, 3.7, 3]]] x = paddle.to_tensor(input, stop_gradient=False) y = paddle.renorm(x, 1.0, 2, 2.05) @@ -94,4 +94,5 @@ class TestRenormAPI(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py b/python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py index ca324b4a8fd0581e7483c12321f54acaa1965f54..1bfc1b00aa8227e6ccaefcaf1044774ed1404f45 100644 --- a/python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py +++ b/python/paddle/fluid/tests/unittests/test_retinanet_detection_output.py @@ -23,6 +23,7 @@ from test_multiclass_nms_op import iou from test_multiclass_nms_op import nms import paddle.fluid as fluid from paddle.fluid import Program, program_guard +import paddle def multiclass_nms(prediction, class_num, keep_top_k, nms_threshold): @@ -518,4 +519,5 @@ class TestRetinanetDetectionOutOpError(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py index 9a97f57aaae5f290b20e34242b1b43e5e352223d..74409c8671059673121d0a73ed85d2cad8e3d6f2 100644 --- a/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py +++ b/python/paddle/fluid/tests/unittests/test_smooth_l1_loss.py @@ -178,4 +178,5 @@ class SmoothL1Loss(unittest.TestCase): if __name__ == "__main__": + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_tile_op.py b/python/paddle/fluid/tests/unittests/test_tile_op.py index a01cf590e0cc6cd136335e9a74720a2b460dbb53..6f3d99f80ce21d9c3de194a8fe4c03fdbc83416f 100644 --- a/python/paddle/fluid/tests/unittests/test_tile_op.py +++ b/python/paddle/fluid/tests/unittests/test_tile_op.py @@ -22,7 +22,7 @@ import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard -# Situation 1: repeat_times is a list (without tensor) +#Situation 1: repeat_times is a list (without tensor) class TestTileOpRank1(OpTest): def setUp(self): self.op_type = "tile" diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index dbd40c349bbc81d39b8a929ee5b3e7b81a083406..57a7f94bedce9fb3cd9981e6ae21f6d902fd04d9 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -1361,4 +1361,5 @@ class TestVarBaseCopyGradientFrom(unittest.TestCase): if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py index 15d5640b11fe501e0d9f83168c434f9f02d7877c..59e285c1200b88cadd2016421b1a8de70c7dad34 100644 --- a/python/paddle/hapi/model.py +++ b/python/paddle/hapi/model.py @@ -68,8 +68,9 @@ def to_list(value): def to_numpy(var): - assert isinstance(var, (Variable, fluid.core.VarBase)), "not a variable" - if isinstance(var, fluid.core.VarBase): + assert isinstance(var, (Variable, fluid.core.VarBase, + fluid.core.eager.Tensor)), "not a variable" + if isinstance(var, (fluid.core.VarBase, fluid.core.eager.Tensor)): return var.numpy() t = global_scope().find_var(var.name).get_tensor() return np.array(t) diff --git a/python/paddle/metric/metrics.py b/python/paddle/metric/metrics.py index 3ff91aa077954510a8e38bdee06b03968796b0f0..ba8aecd5d03ed8f4603eb6d4ebbbd799b3fc7a54 100644 --- a/python/paddle/metric/metrics.py +++ b/python/paddle/metric/metrics.py @@ -282,7 +282,7 @@ class Accuracy(Metric): Return: Tensor: the accuracy of current step. """ - if isinstance(correct, paddle.Tensor): + if isinstance(correct, (paddle.Tensor, paddle.fluid.core.eager.Tensor)): correct = correct.numpy() num_samples = np.prod(np.array(correct.shape[:-1])) accs = [] @@ -410,12 +410,12 @@ class Precision(Metric): the shape should keep the same as preds. The data type is 'int32' or 'int64'. """ - if isinstance(preds, paddle.Tensor): + if isinstance(preds, (paddle.Tensor, paddle.fluid.core.eager.Tensor)): preds = preds.numpy() elif not _is_numpy_(preds): raise ValueError("The 'preds' must be a numpy ndarray or Tensor.") - if isinstance(labels, paddle.Tensor): + if isinstance(labels, (paddle.Tensor, paddle.fluid.core.eager.Tensor)): labels = labels.numpy() elif not _is_numpy_(labels): raise ValueError("The 'labels' must be a numpy ndarray or Tensor.") @@ -543,12 +543,12 @@ class Recall(Metric): the shape should keep the same as preds. Shape: [batch_size, 1], Dtype: 'int32' or 'int64'. """ - if isinstance(preds, paddle.Tensor): + if isinstance(preds, (paddle.Tensor, paddle.fluid.core.eager.Tensor)): preds = preds.numpy() elif not _is_numpy_(preds): raise ValueError("The 'preds' must be a numpy ndarray or Tensor.") - if isinstance(labels, paddle.Tensor): + if isinstance(labels, (paddle.Tensor, paddle.fluid.core.eager.Tensor)): labels = labels.numpy() elif not _is_numpy_(labels): raise ValueError("The 'labels' must be a numpy ndarray or Tensor.") @@ -698,12 +698,12 @@ class Auc(Metric): (batch_size, 1), labels[i] is either o or 1, representing the label of the instance i. """ - if isinstance(labels, paddle.Tensor): + if isinstance(labels, (paddle.Tensor, paddle.fluid.core.eager.Tensor)): labels = labels.numpy() elif not _is_numpy_(labels): raise ValueError("The 'labels' must be a numpy ndarray or Tensor.") - if isinstance(preds, paddle.Tensor): + if isinstance(preds, (paddle.Tensor, paddle.fluid.core.eager.Tensor)): preds = preds.numpy() elif not _is_numpy_(preds): raise ValueError("The 'preds' must be a numpy ndarray or Tensor.") diff --git a/python/paddle/tensor/logic.py b/python/paddle/tensor/logic.py index c227d2e6cee54a4da05b72ecc031061376e9425f..3b1ed17a70d3bdc57c20f88c121e973265e32fcd 100755 --- a/python/paddle/tensor/logic.py +++ b/python/paddle/tensor/logic.py @@ -462,7 +462,7 @@ def is_tensor(x): print(check) #False """ - return isinstance(x, Tensor) + return isinstance(x, (Tensor, paddle.fluid.core.eager.Tensor)) def _bitwise_op(op_name, x, y, out=None, name=None, binary_op=True): diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index e7cf8059ae5aa52fe1ba205d265a7f02e8be85c8..a4972061d420853b4790dce5df457d981127194c 100755 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -264,6 +264,9 @@ def fill_diagonal_tensor(x, y, offset=0, dim1=0, dim2=1, name=None): setattr(core.VarBase, 'fill_diagonal_tensor', fill_diagonal_tensor) +if core._in_eager_mode(): + setattr(core.eager.Tensor, 'fill_diagonal_tensor', fill_diagonal_tensor) + @dygraph_only def tolist(x): @@ -890,12 +893,20 @@ def stack(x, axis=0, name=None): x1 = paddle.to_tensor([[1.0, 2.0]]) x2 = paddle.to_tensor([[3.0, 4.0]]) x3 = paddle.to_tensor([[5.0, 6.0]]) + out = paddle.stack([x1, x2, x3], axis=0) print(out.shape) # [3, 1, 2] print(out) # [[[1., 2.]], # [[3., 4.]], # [[5., 6.]]] + + out = paddle.stack([x1, x2, x3], axis=-2) + print(out.shape) # [1, 3, 2] + print(out) + # [[[1., 2.], + # [3., 4.], + # [5., 6.]]] """ return layers.stack(x, axis, name) diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 0b04c83c3d05790b3633463f17eba78847279431..e1dd5f5e61d96d54873800770a77d37ca36db8fe 100755 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -1337,7 +1337,7 @@ def renorm(x, p, axis, max_norm): raise ValueError("the axis:{} should not be less than -1 * length of input_shape:{}".format(axis,-1 * len(input_shape))) axis = axis + len(input_shape) if paddle.in_dynamic_mode(): - out = core.ops.renorm(x, 'p',p, 'axis',axis, 'max_norm', max_norm) + out = _C_ops.renorm(x, 'p',p, 'axis',axis, 'max_norm', max_norm) return out inputs = {'X': x} diff --git a/python/paddle/vision/transforms/transforms.py b/python/paddle/vision/transforms/transforms.py index 1a3dbd68066a72384589ac24579e0540b5484a6e..9fd200bf0344d58d6a2705d768afffc7ce92dcc2 100644 --- a/python/paddle/vision/transforms/transforms.py +++ b/python/paddle/vision/transforms/transforms.py @@ -327,12 +327,17 @@ class ToTensor(BaseTransform): import paddle.vision.transforms as T import paddle.vision.transforms.functional as F - fake_img = Image.fromarray((np.random.rand(224, 224, 3) * 255.).astype(np.uint8)) + fake_img = Image.fromarray((np.random.rand(4, 5, 3) * 255.).astype(np.uint8)) transform = T.ToTensor() tensor = transform(fake_img) - + + print(tensor.shape) + # [3, 4, 5] + + print(tensor.dtype) + # paddle.float32 """ def __init__(self, data_format='CHW', keys=None):