提交 0c1d5408 编写于 作者: P phlrain

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add_some_yaml_config

......@@ -414,6 +414,16 @@ std::future<int32_t> BrpcPsClient::load(uint32_t table_id,
return send_cmd(table_id, PS_LOAD_ONE_TABLE, {epoch, mode});
}
std::future<int32_t> BrpcPsClient::Load(const LoadSaveContext &load_context) {
if (load_context.table_id < 0) {
return send_cmd(-1, PS_LOAD_ALL_TABLE,
{load_context.epoch, load_context.mode});
} else {
return send_cmd(load_context.table_id, PS_LOAD_ONE_TABLE,
{load_context.epoch, load_context.mode});
}
}
std::future<int32_t> BrpcPsClient::save(const std::string &epoch,
const std::string &mode) {
VLOG(1) << "BrpcPsClient::save path " << epoch;
......@@ -427,6 +437,19 @@ std::future<int32_t> BrpcPsClient::save(uint32_t table_id,
return send_save_cmd(table_id, PS_SAVE_ONE_TABLE, {epoch, mode});
}
std::future<int32_t> BrpcPsClient::Save(const LoadSaveContext &save_context) {
if (save_context.table_id < 0) {
VLOG(1) << "BrpcPsClient::save path " << save_context.epoch;
return send_save_cmd(-1, PS_SAVE_ALL_TABLE,
{save_context.epoch, save_context.mode});
} else {
VLOG(1) << "BrpcPsClient::save one table path " << save_context.epoch
<< " table_id " << save_context.table_id;
return send_save_cmd(save_context.table_id, PS_SAVE_ONE_TABLE,
{save_context.epoch, save_context.mode});
}
}
std::future<int32_t> BrpcPsClient::clear() {
return send_cmd(-1, PS_CLEAR_ALL_TABLE, {});
}
......@@ -505,6 +528,44 @@ std::future<int32_t> BrpcPsClient::barrier(size_t table_id,
return send_cmd(table_id, PS_BARRIER, {std::to_string(barrier_type)});
}
std::future<int32_t> BrpcPsClient::Pull(RequestContext &pull_context) {
if (pull_context.value_type == Dense) { // pull dense
Region *dense_region =
reinterpret_cast<Region *>(pull_context.dense_values);
pull_dense(dense_region, pull_context.num, pull_context.table);
} else { // pull sparse
uint64_t *keys = reinterpret_cast<uint64_t *>(pull_context.keys);
float **select_values =
reinterpret_cast<float **>(pull_context.sparse_values);
size_t table_id = pull_context.table;
size_t num = pull_context.num;
bool is_training = pull_context.is_training;
if (pull_context.training_mode == Geo) { // for geo
pull_sparse_param(select_values, table_id, keys, num, is_training);
} else if (pull_context.training_mode == Async) { // for async
pull_sparse(select_values, table_id, keys, num, is_training);
}
}
}
std::future<int32_t> BrpcPsClient::Push(RequestContext &push_context) {
if (push_context.value_type == Dense) { // push dense
const Region *dense_region = push_context.push_context.push_dense_values;
push_dense(dense_region, push_context.num, push_context.table);
} else { // push sparse
size_t table_id = push_context.table;
size_t num = push_context.num;
bool is_training = push_context.is_training;
if (push_context.training_mode == Geo) { // for geo
// TODO(zhaocaibei)
} else if (push_context.training_mode == Async) { // for async
const uint64_t *keys = push_context.push_context.keys;
const float **update_values = push_context.push_context.push_values;
push_sparse(table_id, keys, update_values, num);
}
}
}
std::future<int32_t> BrpcPsClient::pull_geo_param(size_t table_id,
std::vector<float> *values,
std::vector<uint64_t> *keys,
......
......@@ -163,12 +163,17 @@ class BrpcPsClient : public PSClient {
std::future<int32_t> load(uint32_t table_id, const std::string &epoch,
const std::string &mode) override;
std::future<int32_t> Load(const LoadSaveContext &load_context) override;
std::future<int32_t> save(const std::string &epoch,
const std::string &mode) override;
std::future<int32_t> save(uint32_t table_id, const std::string &epoch,
const std::string &mode) override;
virtual std::future<int32_t> Save(
const LoadSaveContext &save_context) override;
std::future<int32_t> clear() override;
std::future<int32_t> clear(uint32_t table_id) override;
......@@ -199,6 +204,10 @@ class BrpcPsClient : public PSClient {
const uint64_t *keys,
size_t num, bool is_training);
virtual std::future<int32_t> Pull(RequestContext &pull_context) override;
virtual std::future<int32_t> Push(RequestContext &push_context) override;
virtual std::future<int32_t> print_table_stat(uint32_t table_id);
virtual std::future<int32_t> barrier(size_t table_id, uint32_t barrier_type);
......
......@@ -51,7 +51,7 @@ class BrpcPsServer : public PSServer {
_server.Join();
return 0;
}
virtual int32_t port();
int32_t port();
private:
virtual int32_t initialize();
......
......@@ -43,7 +43,7 @@ class GraphBrpcServer : public PSServer {
_server.Join();
return 0;
}
virtual int32_t port();
int32_t port();
std::condition_variable *export_cv() { return &cv_; }
......
......@@ -26,6 +26,7 @@
#include "paddle/fluid/distributed/ps/service/sendrecv.pb.h"
#include "paddle/fluid/distributed/ps/table/accessor.h"
#include "paddle/fluid/distributed/ps/table/graph/graph_node.h"
#include "paddle/fluid/distributed/ps/table/table.h"
#include "paddle/fluid/platform/timer.h"
namespace paddle {
......@@ -59,6 +60,41 @@ class PSClientClosure : public google::protobuf::Closure {
std::vector<std::shared_ptr<std::promise<int32_t>>> _promises;
};
struct LoadSaveContext {
int table_id;
std::string epoch;
std::string mode;
};
enum TrainingMode { Async = 0, Sync = 1, Geo = 3 };
enum TrainingPhase { Init = 0, Train = 1, Save = 2 };
// enum ValueType {
// Sparse = 0,
// Dense = 1
// };
struct PushContext {
const uint64_t *keys;
const float **push_values;
const Region *push_dense_values;
};
struct RequestContext {
int table;
TrainingMode training_mode; // 1 for async, 2 for geo, 3 for sync
TrainingPhase training_phase; // 1 for init, 2 for train
ValueType value_type; // 1 for sparse, 2 for dense
void *keys;
void **sparse_values; // for sparse values
Region *dense_values; // for dense values
PushContext push_context;
size_t num;
bool is_training;
void *callback;
};
class PSClient {
public:
PSClient() {}
......@@ -86,6 +122,9 @@ class PSClient {
// 指定table数据load
virtual std::future<int32_t> load(uint32_t table_id, const std::string &epoch,
const std::string &mode) = 0;
// context配置load选项
virtual std::future<int32_t> Load(const LoadSaveContext &load_context) = 0;
// 全量table数据save value_accessor根据mode,可能有不同的save条件
virtual std::future<int32_t> save(const std::string &epoch,
const std::string &mode) = 0;
......@@ -93,6 +132,8 @@ class PSClient {
virtual std::future<int32_t> save(uint32_t table_id, const std::string &epoch,
const std::string &mode) = 0;
virtual std::future<int32_t> Save(const LoadSaveContext &save_context) = 0;
// 清空table数据
virtual std::future<int32_t> clear() = 0;
virtual std::future<int32_t> clear(uint32_t table_id) = 0;
......@@ -107,6 +148,8 @@ class PSClient {
virtual std::future<int32_t> pull_dense(Region *regions, size_t region_num,
size_t table_id) = 0; // 保留
virtual std::future<int32_t> Push(RequestContext &push_context) = 0;
// firstly push dense param for parameter server
// this is neccessary because dense weight initialized in trainer on cold
// start
......@@ -117,6 +160,9 @@ class PSClient {
virtual std::future<int32_t> push_dense(const Region *regions,
size_t region_num,
size_t table_id) = 0;
virtual std::future<int32_t> Pull(RequestContext &pull_context) = 0;
// 使用keys进行pull请求,结果填充values
// keys和values的个数均为num个,每个value占用select_size空间
// future结束前keys和values缓冲区不能再次使用
......
......@@ -56,6 +56,19 @@ int32_t PsLocalClient::initialize() {
return done();
}
std::future<int32_t> PsLocalClient::Load(const LoadSaveContext& load_context) {
if (load_context.table_id < 0) {
for (auto& it : _table_map) {
load(it.first, load_context.epoch, load_context.mode);
}
return done();
} else {
auto* table_ptr = table(load_context.table_id);
table_ptr->load(load_context.epoch, load_context.mode);
return done();
}
}
::std::future<int32_t> PsLocalClient::save(const std::string& epoch,
const std::string& mode) {
// TODO
......@@ -74,6 +87,21 @@ int32_t PsLocalClient::initialize() {
return done();
}
::std::future<int32_t> PsLocalClient::Save(
const LoadSaveContext& save_context) {
if (save_context.table_id < 0) {
for (auto& it : _table_map) {
save(it.first, save_context.epoch, save_context.mode);
}
return done();
} else {
auto* table_ptr = table(save_context.table_id);
table_ptr->flush();
table_ptr->save(save_context.epoch, save_context.mode);
return done();
}
}
::std::future<int32_t> PsLocalClient::clear() {
// TODO
return done();
......@@ -93,6 +121,51 @@ int32_t PsLocalClient::initialize() {
return done();
}
::std::future<int32_t> PsLocalClient::Pull(RequestContext& pull_context) {
if (pull_context.value_type == Dense) { // pull dense
Region* dense_region = reinterpret_cast<Region*>(pull_context.dense_values);
pull_dense(dense_region, pull_context.num, pull_context.table);
} else { // pull sparse
uint64_t* keys = reinterpret_cast<uint64_t*>(pull_context.keys);
char** select_values = reinterpret_cast<char**>(pull_context.sparse_values);
size_t table_id = pull_context.table;
size_t num = pull_context.num;
pull_sparse_ptr(select_values, table_id, keys, num);
}
}
::std::future<int32_t> PsLocalClient::Push(RequestContext& push_context) {
if (push_context.value_type == Dense) { // push dense
if (push_context.training_phase == Init) {
const Region* regions = push_context.push_context.push_dense_values;
size_t region_num = push_context.num;
push_dense_param(regions, region_num, push_context.table);
} else {
if (push_context.training_mode == Geo) { // geo
float* total_send_data =
reinterpret_cast<float*>(push_context.dense_values);
size_t total_send_data_size = push_context.num;
push_dense_raw_gradient(push_context.table, total_send_data,
total_send_data_size, push_context.callback);
} else { // async and sync
const Region* regions = push_context.push_context.push_dense_values;
size_t region_num = push_context.num;
push_dense(regions, region_num, push_context.table);
}
}
} else { // push sparse
if (push_context.training_mode == Async) {
const uint64_t* keys = push_context.push_context.keys;
const float** update_values = push_context.push_context.push_values;
size_t table_id = push_context.table;
size_t num = push_context.num;
push_sparse(table_id, keys, update_values, num);
} else {
// TODO
}
}
}
::std::future<int32_t> PsLocalClient::pull_dense(Region* regions,
size_t region_num,
size_t table_id) {
......
......@@ -39,12 +39,16 @@ class PsLocalClient : public PSClient {
virtual ::std::future<int32_t> load(uint32_t table_id,
const std::string& epoch,
const std::string& mode) override;
virtual std::future<int32_t> Load(
const LoadSaveContext& load_context) override;
virtual ::std::future<int32_t> save(const std::string& epoch,
const std::string& mode) override;
virtual ::std::future<int32_t> save(uint32_t table_id,
const std::string& epoch,
const std::string& mode) override;
virtual std::future<int32_t> Save(
const LoadSaveContext& save_context) override;
virtual ::std::future<int32_t> clear() override;
virtual ::std::future<int32_t> clear(uint32_t table_id) override;
......@@ -55,6 +59,10 @@ class PsLocalClient : public PSClient {
virtual ::std::future<int32_t> pull_dense(Region* regions, size_t region_num,
size_t table_id);
virtual ::std::future<int32_t> Pull(RequestContext& pull_context) override;
virtual ::std::future<int32_t> Push(RequestContext& push_context) override;
virtual ::std::future<int32_t> push_dense(const Region* regions,
size_t region_num, size_t table_id);
......
......@@ -28,7 +28,6 @@ class PsLocalServer : public PSServer {
virtual uint64_t start() { return 0; }
virtual uint64_t start(const std::string &ip, uint32_t port) { return 0; }
virtual int32_t stop() { return 0; }
virtual int32_t port() { return 0; }
virtual int32_t configure(
const PSParameter &config, PSEnvironment &env, size_t server_rank,
const std::vector<framework::ProgramDesc> &server_sub_program = {}) {
......
......@@ -67,8 +67,6 @@ int32_t PSServer::configure(
_config = config.server_param();
_rank = server_rank;
_environment = &env;
_shuffled_ins =
paddle::framework::MakeChannel<std::pair<uint64_t, std::string>>();
size_t shard_num = env.get_ps_servers().size();
const auto &downpour_param = _config.downpour_server_param();
......
......@@ -69,11 +69,6 @@ class PSServer {
const PSParameter &config, PSEnvironment &env, size_t server_rank,
const std::vector<framework::ProgramDesc> &server_sub_program = {});
// return server_ip
virtual std::string ip() { return butil::my_ip_cstr(); }
// return server_port
virtual int32_t port() = 0;
virtual uint64_t start(const std::string &ip, uint32_t port) = 0;
virtual int32_t stop() = 0;
......@@ -94,15 +89,6 @@ class PSServer {
return &_table_map;
}
typedef std::function<int32_t(int, int, const std::string &)> MsgHandlerFunc;
virtual int registe_pserver2pserver_msg_handler(int msg_type,
MsgHandlerFunc handler) {
_msg_handler_map[msg_type] = handler;
return 0;
}
paddle::framework::Channel<std::pair<uint64_t, std::string>> _shuffled_ins;
protected:
virtual int32_t initialize() = 0;
......@@ -111,7 +97,6 @@ class PSServer {
ServerParameter _config;
PSEnvironment *_environment;
std::unordered_map<uint32_t, std::shared_ptr<Table>> _table_map;
std::unordered_map<int32_t, MsgHandlerFunc> _msg_handler_map;
protected:
std::shared_ptr<framework::Scope> scope_;
......
......@@ -45,6 +45,17 @@ struct DataConverter {
std::string deconverter;
};
struct AccessorInfo {
size_t dim;
size_t size;
size_t select_size;
size_t select_dim;
size_t update_size;
size_t update_dim;
size_t mf_size;
size_t fea_dim;
};
class ValueAccessor {
public:
ValueAccessor() {}
......@@ -68,6 +79,8 @@ class ValueAccessor {
}
virtual int initialize() = 0;
virtual void GetTableInfo(AccessorInfo& info) = 0;
// value维度
virtual size_t dim() = 0;
// value各个维度的size
......@@ -163,6 +176,7 @@ class ValueAccessor {
TableAccessorParameter _config;
std::unordered_map<int, std::shared_ptr<struct DataConverter>>
_data_coverter_map;
AccessorInfo _accessor_info;
};
REGISTER_PSCORE_REGISTERER(ValueAccessor);
} // namespace distributed
......
......@@ -128,6 +128,21 @@ int32_t CommonDenseTable::set_global_lr(float* lr) {
return 0;
}
int32_t CommonDenseTable::Pull(TableContext& context) {
CHECK(context.value_type == Dense);
float* pull_values = context.pull_context.values;
return pull_dense(pull_values, context.num);
}
int32_t CommonDenseTable::Push(TableContext& context) {
CHECK(context.value_type == Dense);
if (context.pull_context.values != nullptr) {
const float* values = context.push_context.values;
return push_dense(values, context.num);
}
return 0;
}
int32_t CommonDenseTable::pull_dense(float* pull_values, size_t num) {
std::copy(values_[param_idx_].begin(), values_[param_idx_].end(),
pull_values);
......
......@@ -40,6 +40,8 @@ class CommonDenseTable : public DenseTable {
const std::string& name);
virtual int32_t initialize_value();
virtual int32_t initialize_optimizer();
virtual int32_t Pull(TableContext& context);
virtual int32_t Push(TableContext& context);
int32_t pull_dense(float* pull_values, size_t num) override;
int32_t push_dense_param(const float* values, size_t num) override;
int32_t push_dense(const float* values, size_t num) override;
......
......@@ -454,6 +454,9 @@ class GraphTable : public SparseTable {
int32_t get_server_index_by_id(int64_t id);
Node *find_node(int64_t id);
virtual int32_t Pull(TableContext &context) { return 0; }
virtual int32_t Push(TableContext &context) { return 0; }
virtual int32_t pull_sparse(float *values,
const PullSparseValue &pull_value) {
return 0;
......
......@@ -355,6 +355,32 @@ int32_t CommonSparseTable::pour() {
return 0;
}
int32_t CommonSparseTable::Pull(TableContext& context) {
CHECK(context.value_type == Sparse);
if (context.use_ptr) {
char** pull_values = context.pull_context.ptr_values;
const uint64_t* keys = context.pull_context.keys;
return pull_sparse_ptr(pull_values, keys, context.num);
} else {
float* pull_values = context.pull_context.values;
const PullSparseValue& pull_value = context.pull_context.pull_value;
return pull_sparse(pull_values, pull_value);
}
}
int32_t CommonSparseTable::Push(TableContext& context) {
CHECK(context.value_type == Sparse);
if (context.pull_context.values != nullptr) {
const float* values = context.push_context.values;
const uint64_t* keys = context.push_context.keys;
return push_sparse(keys, values, context.num);
} else {
const float** values = context.push_context.ptr_values;
const uint64_t* keys = context.push_context.keys;
return push_sparse(keys, values, context.num);
}
}
int32_t CommonSparseTable::pull_sparse(float* pull_values,
const PullSparseValue& pull_value) {
auto shard_num = task_pool_size_;
......
......@@ -121,6 +121,9 @@ class CommonSparseTable : public SparseTable {
virtual int32_t push_dense(const float* values, size_t num) { return 0; }
// unused method end
virtual int32_t Pull(TableContext& context);
virtual int32_t Push(TableContext& context);
virtual int32_t initialize();
virtual int32_t initialize_shard() { return 0; }
virtual int32_t initialize_value();
......
......@@ -119,6 +119,9 @@ class BarrierTable : public Table {
virtual void *get_shard(size_t shard_idx) { return 0; }
virtual int32_t Pull(TableContext &context) { return 0; }
virtual int32_t Push(TableContext &context) { return 0; }
int32_t pull_dense(float *values, size_t num) override { return 0; }
int32_t push_dense(const float *values, size_t num) override { return 0; }
......
......@@ -38,6 +38,16 @@ int CtrCommonAccessor::initialize() {
return 0;
}
void CtrCommonAccessor::GetTableInfo(AccessorInfo& info) {
info.dim = dim();
info.size = size();
info.select_dim = select_dim();
info.select_size = select_size();
info.update_dim = update_dim();
info.update_size = update_size();
info.fea_dim = fea_dim();
}
size_t CtrCommonAccessor::dim() { return common_feature_value.dim(); }
size_t CtrCommonAccessor::dim_size(size_t dim) {
......
......@@ -126,6 +126,7 @@ class CtrCommonAccessor : public ValueAccessor {
virtual int initialize();
virtual ~CtrCommonAccessor() {}
virtual void GetTableInfo(AccessorInfo& info);
// value维度
virtual size_t dim();
// value各个维度的size
......
......@@ -37,6 +37,16 @@ int DownpourCtrDoubleAccessor::initialize() {
return 0;
}
void DownpourCtrDoubleAccessor::GetTableInfo(AccessorInfo& info) {
info.dim = dim();
info.size = size();
info.select_dim = select_dim();
info.select_size = select_size();
info.update_dim = update_dim();
info.update_size = update_size();
info.fea_dim = fea_dim();
}
size_t DownpourCtrDoubleAccessor::dim() {
auto embedx_dim = _config.embedx_dim();
return DownpourCtrDoubleFeatureValue::dim(embedx_dim);
......
......@@ -168,6 +168,7 @@ class DownpourCtrDoubleAccessor : public ValueAccessor {
DownpourCtrDoubleAccessor() {}
virtual ~DownpourCtrDoubleAccessor() {}
virtual int initialize();
virtual void GetTableInfo(AccessorInfo& info);
// value维度
virtual size_t dim();
// value各个维度的size
......
......@@ -58,7 +58,7 @@ struct PullSparseValue {
std::vector<int>* offset_shard) const {
offset_shard->reserve(numel_ / shard_num + 1);
for (int x = 0; x < numel_; ++x) {
if (feasigns_[x] % shard_num == shard_id) {
if (int(feasigns_[x] % shard_num) == shard_id) {
offset_shard->push_back(x);
}
}
......
......@@ -37,6 +37,16 @@ int DownpourCtrAccessor::initialize() {
return 0;
}
void DownpourCtrAccessor::GetTableInfo(AccessorInfo& info) {
info.dim = dim();
info.size = size();
info.select_dim = select_dim();
info.select_size = select_size();
info.update_dim = update_dim();
info.update_size = update_size();
info.fea_dim = fea_dim();
}
size_t DownpourCtrAccessor::dim() {
auto embedx_dim = _config.embedx_dim();
return DownpourCtrFeatureValue::dim(embedx_dim);
......
......@@ -160,6 +160,7 @@ class DownpourCtrAccessor : public ValueAccessor {
virtual ~DownpourCtrAccessor() {}
virtual int initialize();
virtual void GetTableInfo(AccessorInfo& info);
// value维度
virtual size_t dim();
// value各个维度的size
......
......@@ -48,6 +48,8 @@ class MemorySparseGeoTable : public SparseTable {
virtual int32_t save(const std::string& path, const std::string& param) {
return 0;
}
virtual int32_t Pull(TableContext& context) { return 0; }
virtual int32_t Push(TableContext& context) { return 0; }
virtual int32_t flush() { return 0; }
virtual int32_t shrink(const std::string& param) { return 0; }
virtual void clear() { return; }
......
......@@ -390,6 +390,26 @@ std::pair<int64_t, int64_t> MemorySparseTable::print_table_stat() {
return {feasign_size, mf_size};
}
int32_t MemorySparseTable::Pull(TableContext& context) {
CHECK(context.value_type == Sparse);
if (context.use_ptr) {
char** pull_values = context.pull_context.ptr_values;
const uint64_t* keys = context.pull_context.keys;
return pull_sparse_ptr(pull_values, keys, context.num);
} else {
float* pull_values = context.pull_context.values;
const PullSparseValue& pull_value = context.pull_context.pull_value;
return pull_sparse(pull_values, pull_value);
}
}
int32_t MemorySparseTable::Push(TableContext& context) {
CHECK(context.value_type == Sparse);
const uint64_t* keys = context.push_context.keys;
return push_sparse(keys, context.push_context.ptr_values, context.num);
}
int32_t MemorySparseTable::pull_sparse(float* pull_values,
const PullSparseValue& pull_value) {
CostTimer timer("pserver_sparse_select_all");
......
......@@ -48,6 +48,9 @@ class MemorySparseTable : public SparseTable {
virtual int32_t push_dense(const float* values, size_t num) { return 0; }
// unused method end
virtual int32_t Pull(TableContext& context);
virtual int32_t Push(TableContext& context);
virtual int32_t initialize();
virtual int32_t initialize_shard() { return 0; }
virtual int32_t initialize_value();
......
......@@ -61,6 +61,21 @@ int32_t SSDSparseTable::initialize() {
return 0;
}
int32_t SSDSparseTable::Pull(TableContext& context) {
CHECK(context.value_type == Sparse);
if (context.use_ptr) {
char** pull_values = context.pull_context.ptr_values;
const uint64_t* keys = context.pull_context.keys;
return pull_sparse_ptr(pull_values, keys, context.num);
} else {
float* pull_values = context.pull_context.values;
const PullSparseValue& pull_value = context.pull_context.pull_value;
return pull_sparse(pull_values, pull_value);
}
}
int32_t SSDSparseTable::Push(TableContext& context) { return 0; }
int32_t SSDSparseTable::pull_sparse(float* pull_values,
const PullSparseValue& pull_value) {
auto shard_num = task_pool_size_;
......
......@@ -42,6 +42,9 @@ class SSDSparseTable : public CommonSparseTable {
// exchange data
virtual int32_t update_table();
virtual int32_t Pull(TableContext& context);
virtual int32_t Push(TableContext& context);
virtual int32_t pull_sparse(float* values, const PullSparseValue& pull_value);
virtual int32_t pull_sparse_ptr(char** pull_values, const uint64_t* keys,
......
......@@ -32,6 +32,30 @@
namespace paddle {
namespace distributed {
enum ValueType { Sparse = 0, Dense = 1 };
struct PullContext {
const uint64_t *keys;
const PullSparseValue pull_value;
float *values;
char **ptr_values;
};
struct TablePushContext {
const uint64_t *keys;
const float *values;
const float **ptr_values;
};
struct TableContext {
ValueType value_type;
PullContext pull_context;
TablePushContext push_context;
size_t num;
bool use_ptr;
};
class Table {
public:
Table() {}
......@@ -39,6 +63,8 @@ class Table {
virtual int32_t initialize(const TableParameter &config,
const FsClientParameter &fs_config);
virtual int32_t Pull(TableContext &context) = 0;
virtual int32_t Push(TableContext &context) = 0;
virtual int32_t pull_dense(float *values, size_t num) = 0;
virtual int32_t push_dense(const float *values, size_t num) = 0;
// for push global_step
......
......@@ -20,6 +20,16 @@ namespace distributed {
int CommMergeAccessor::initialize() { return 0; }
void CommMergeAccessor::GetTableInfo(AccessorInfo &info) {
info.dim = dim();
info.size = size();
info.select_dim = select_dim();
info.select_size = select_size();
info.update_dim = update_dim();
info.update_size = update_size();
info.fea_dim = fea_dim();
}
// value 维度
size_t CommMergeAccessor::dim() { return 0; }
......
......@@ -30,6 +30,7 @@ class CommMergeAccessor : public ValueAccessor {
CommMergeAccessor() {}
virtual ~CommMergeAccessor() {}
virtual int initialize();
virtual void GetTableInfo(AccessorInfo &info);
// value维度
virtual size_t dim();
// value各个维度的size
......
......@@ -48,6 +48,8 @@ class TensorTable : public Table {
TensorTable() {}
virtual ~TensorTable() {}
virtual int32_t Pull(TableContext &context) { return 0; }
virtual int32_t Push(TableContext &context) { return 0; }
int32_t pull_dense(float *values, size_t num) override { return 0; }
int32_t push_dense(const float *values, size_t num) override { return 0; }
......
......@@ -30,6 +30,32 @@ bool FleetWrapper::is_initialized_ = false;
std::shared_ptr<paddle::distributed::PSCore> FleetWrapper::pserver_ptr_ = NULL;
void FleetWrapper::Stop() { StopServer(); }
void FleetWrapper::Load(WrapperContext& context) {
auto table_id = context.table_id;
if (table_id >= 0 && context.meta != "") {
LoadSparseOnServer(context.path, context.meta, context.table_id);
return;
}
if (table_id < 0) { // laod all
LoadModel(context.path, context.mode);
} else { // load one table
LoadModelOneTable(table_id, context.path, context.mode);
}
return;
}
void FleetWrapper::Save(WrapperContext& context) {
auto table_id = context.table_id;
if (table_id < 0) {
SaveModel(context.path, context.mode);
} else {
SaveModelOneTable(table_id, context.path, context.mode);
}
return;
}
void FleetWrapper::SetClient2ClientConfig(int request_timeout_ms,
int connect_timeout_ms,
int max_retry) {
......
......@@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/distributed/ps/service/communicator/communicator_common.h"
#include "paddle/fluid/distributed/ps/service/ps_service/service.h"
#include "paddle/fluid/distributed/ps/wrapper/ps_wrapper.h"
#include "paddle/fluid/framework/archive.h"
#include "paddle/fluid/framework/io/fs.h"
#include "paddle/fluid/framework/io/shell.h"
......@@ -54,7 +55,7 @@ using framework::Variable;
using RpcCtxMap = std::unordered_map<std::string, CommContext>;
class FleetWrapper {
class FleetWrapper : public PSWrapper {
public:
virtual ~FleetWrapper() {}
FleetWrapper() {
......@@ -68,7 +69,13 @@ class FleetWrapper {
// pserver request max retry
client2client_max_retry_ = 3;
}
virtual int32_t Initialize(InitContext& context) { return 0; }
virtual void Stop() override;
virtual void Load(WrapperContext& context) override;
virtual void Save(WrapperContext& context) override;
// set client to client communication config
void SetClient2ClientConfig(int request_timeout_ms, int connect_timeout_ms,
int max_retry);
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PADDLE_FLUID_DISTRIBUTED_PS_WRAPPER_PS_WRAPPER_H_
#define PADDLE_FLUID_DISTRIBUTED_PS_WRAPPER_PS_WRAPPER_H_
#endif // PADDLE_FLUID_DISTRIBUTED_PS_WRAPPER_PS_WRAPPER_H_
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <atomic>
#include <ctime>
#include <map>
#include <memory>
#include <random>
#include <string>
#include <unordered_map>
#include <vector>
#include "paddle/fluid/distributed/ps/service/communicator/communicator_common.h"
#include "paddle/fluid/distributed/ps/service/ps_service/service.h"
#include "paddle/fluid/framework/archive.h"
#include "paddle/fluid/framework/io/fs.h"
#include "paddle/fluid/framework/io/shell.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable_helper.h"
#include "paddle/fluid/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN
namespace paddle {
namespace framework {
class Scope;
class SelectedRows;
class Variable;
} // namespace framework
} // namespace paddle
namespace paddle {
namespace distributed {
class PSCore;
using framework::LoDTensor;
using framework::Scope;
using phi::SelectedRows;
using framework::Variable;
using RpcCtxMap = std::unordered_map<std::string, CommContext>;
struct WrapperContext {
uint32_t table_id;
const std::string path;
const int mode;
const std::string meta;
};
struct InitContext {
const std::vector<int> dev_ids; // for gpu
};
class PSWrapper {
public:
virtual ~PSWrapper() {}
PSWrapper() {}
// init server
virtual int32_t Initialize(InitContext& context) = 0;
virtual void Stop() = 0;
virtual void Load(WrapperContext& context) = 0;
virtual void Save(WrapperContext& context) = 0;
};
} // end namespace distributed
} // end namespace paddle
......@@ -2032,7 +2032,15 @@ static std::string GenerateSingleOpBase(
const char* ATTRS_TEMPLATE = " auto& %s = this->attr_map_;\n";
std::string grad_attrs_str =
paddle::string::Sprintf(ATTRS_TEMPLATE, attrs_name);
if (fwd_op_type == "cast") {
// swtich in out dtype
const char* CAST_GRAD =
" auto temp_type = %s[\"in_dtype\"];\n"
" %s[\"in_dtype\"] = %s[\"out_dtype\"];\n"
" %s[\"out_dtype\"] = temp_type;\n";
grad_attrs_str += paddle::string::Sprintf(CAST_GRAD, attrs_name, attrs_name,
attrs_name, attrs_name);
}
// Handle dynamic grad attributes
grad_attrs_str += HandleDynamicGradAttributes(fwd_op_type, attrs_name);
generated_grad_function_body += grad_attrs_str;
......
......@@ -93,7 +93,7 @@ void GradTensorHolder::add(size_t slot_id, size_t rank,
// Create new tensor->impl and fill it with 1.0
if (t.defined()) {
// Fill 1.0
buffer_[slot_id][rank] = paddle::experimental::ones_like(t);
buffer_[slot_id][rank] = paddle::experimental::ones_like(t, t.dtype());
}
}
}
......
......@@ -16,7 +16,9 @@ limitations under the License. */
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/infershape_utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/infermeta/unary.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle {
......@@ -24,734 +26,10 @@ namespace operators {
using framework::Tensor;
template <typename T>
void ConstPad3DFuncNCDHW(const T* in_data, T* out_data, const int in_depth,
const int in_height, const int in_width,
const int out_depth, const int out_height,
const int out_width, const int pad_front,
const int pad_top, const int pad_left, const int out_d,
const int out_h, const int out_w, const T value) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
out_data[out_d * out_height * out_width + out_h * out_width + out_w] =
(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth ||
in_h >= in_height || in_w >= in_width)
? value
: in_data[in_d * in_height * in_width + in_h * in_width + in_w];
}
template <typename T>
void ConstPad3DFuncNDHWC(const T* in_data, T* out_data, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, const int out_d, const int out_h,
const int out_w, const T value) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
if (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth ||
in_h >= in_height || in_w >= in_width) {
for (int c = 0; c < channels; ++c) {
out_data[out_index + c] = value;
}
} else {
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
out_data[out_index + c] = in_data[in_index + c];
}
}
}
template <typename T>
void ReflectPad3DFuncNCDHW(const T* in_data, T* out_data, const int in_depth,
const int in_height, const int in_width,
const int out_depth, const int out_height,
const int out_width, const int pad_front,
const int pad_top, const int pad_left,
const int out_d, const int out_h, const int out_w,
const T value) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = std::max(in_d, -in_d); // reflect by 0
in_d = std::min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth
in_h = std::max(in_h, -in_h); // reflect by 0
in_h = std::min(in_h, 2 * in_height - in_h - 2); // reflect by in_height
in_w = std::max(in_w, -in_w); // reflect by 0
in_w = std::min(in_w, 2 * in_width - in_w - 2); // reflect by in_width
out_data[out_d * out_height * out_width + out_h * out_width + out_w] =
in_data[in_d * in_height * in_width + in_h * in_width + in_w];
}
template <typename T>
void ReflectPad3DFuncNDHWC(const T* in_data, T* out_data, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, const int out_d, const int out_h,
const int out_w, const T value) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = std::max(in_d, -in_d);
in_d = std::min(in_d, 2 * in_depth - in_d - 2);
in_h = std::max(in_h, -in_h);
in_h = std::min(in_h, 2 * in_height - in_h - 2);
in_w = std::max(in_w, -in_w);
in_w = std::min(in_w, 2 * in_width - in_w - 2);
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
out_data[out_index + c] = in_data[in_index + c];
}
}
template <typename T>
void ReplicatePad3DFuncNCDHW(const T* in_data, T* out_data, const int in_depth,
const int in_height, const int in_width,
const int out_depth, const int out_height,
const int out_width, const int pad_front,
const int pad_top, const int pad_left,
const int out_d, const int out_h, const int out_w,
const T value) {
int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0));
int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0));
int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0));
out_data[out_d * out_height * out_width + out_h * out_width + out_w] =
in_data[in_d * in_height * in_width + in_h * in_width + in_w];
}
template <typename T>
void ReplicatePad3DFuncNDHWC(const T* in_data, T* out_data, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, const int out_d,
const int out_h, const int out_w, const T value) {
int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0));
int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0));
int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0));
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
out_data[out_index + c] = in_data[in_index + c];
}
}
template <typename T>
void CircularPad3DFuncNCDHW(const T* in_data, T* out_data, const int in_depth,
const int in_height, const int in_width,
const int out_depth, const int out_height,
const int out_width, const int pad_front,
const int pad_top, const int pad_left,
const int out_d, const int out_h, const int out_w,
const T value) {
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
out_data[out_d * out_height * out_width + out_h * out_width + out_w] =
in_data[in_d * in_height * in_width + in_h * in_width + in_w];
}
template <typename T>
void CircularPad3DFuncNDHWC(const T* in_data, T* out_data, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, const int out_d,
const int out_h, const int out_w, const T value) {
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
out_data[out_index + c] = in_data[in_index + c];
}
}
template <typename T>
void Pad3DNCDHW(const T* in_data, const int num, const int channels,
const int in_depth, const int in_height, const int in_width,
const int out_depth, const int out_height, const int out_width,
const int pad_front, const int pad_top, const int pad_left,
T value, T* out_data,
void (*pad_func)(const T*, T*, const int, const int, const int,
const int, const int, const int, const int,
const int, const int, const int, const int,
const int, const T)) {
for (int n = 0; n < num; ++n) {
for (int c = 0; c < channels; ++c) {
for (int out_d = 0; out_d < out_depth; ++out_d) {
for (int out_h = 0; out_h < out_height; ++out_h) {
for (int out_w = 0; out_w < out_width; ++out_w) {
pad_func(in_data, out_data, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top,
pad_left, out_d, out_h, out_w, value);
}
}
}
in_data += in_depth * in_height * in_width;
out_data += out_depth * out_height * out_width;
}
}
}
template <typename T>
void Pad3DNDHWC(const T* in_data, const int num, const int channels,
const int in_depth, const int in_height, const int in_width,
const int out_depth, const int out_height, const int out_width,
const int pad_front, const int pad_top, const int pad_left,
T value, T* out_data,
void (*pad_func)(const T*, T*, const int, const int, const int,
const int, const int, const int, const int,
const int, const int, const int, const int,
const int, const int, const T)) {
for (int n = 0; n < num; ++n) {
for (int out_d = 0; out_d < out_depth; ++out_d) {
for (int out_h = 0; out_h < out_height; ++out_h) {
for (int out_w = 0; out_w < out_width; ++out_w) {
pad_func(in_data, out_data, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top,
pad_left, out_d, out_h, out_w, value);
}
}
}
in_data += in_depth * in_height * in_width * channels;
out_data += out_depth * out_height * out_width * channels;
}
}
template <typename T>
void ConstPad3DGradNCDHW(T* d_in_data, const T* d_out_data, const int in_depth,
const int in_height, const int in_width,
const int out_depth, const int out_height,
const int out_width, const int pad_front,
const int pad_top, const int pad_left, const int out_d,
const int out_h, const int out_w) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
if (!(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth ||
in_h >= in_height || in_w >= in_width)) {
d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] =
d_out_data[out_d * out_height * out_width + out_h * out_width + out_w];
}
}
template <typename T>
void ConstPad3DGradNDHWC(T* d_in_data, const T* d_out_data, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, const int out_d, const int out_h,
const int out_w) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
if (!(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth ||
in_h >= in_height || in_w >= in_width)) {
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
d_in_data[in_index + c] = d_out_data[out_index + c];
}
}
}
template <typename T>
void ReflectPad3DGradNCDHW(T* d_in_data, const T* d_out_data,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, const int out_d, const int out_h,
const int out_w) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = std::max(in_d, -in_d); // reflect by 0
in_d = std::min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth
in_h = std::max(in_h, -in_h); // reflect by 0
in_h = std::min(in_h, 2 * in_height - in_h - 2); // reflect by in_height
in_w = std::max(in_w, -in_w); // reflect by 0
in_w = std::min(in_w, 2 * in_width - in_w - 2); // reflect by in_width
d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] +=
d_out_data[out_d * out_height * out_width + out_h * out_width + out_w];
}
template <typename T>
void ReflectPad3DGradNDHWC(T* d_in_data, const T* d_out_data,
const int channels, const int in_depth,
const int in_height, const int in_width,
const int out_depth, const int out_height,
const int out_width, const int pad_front,
const int pad_top, const int pad_left,
const int out_d, const int out_h, const int out_w) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = std::max(in_d, -in_d);
in_d = std::min(in_d, 2 * in_depth - in_d - 2);
in_h = std::max(in_h, -in_h);
in_h = std::min(in_h, 2 * in_height - in_h - 2);
in_w = std::max(in_w, -in_w);
in_w = std::min(in_w, 2 * in_width - in_w - 2);
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
d_in_data[in_index + c] += d_out_data[out_index + c];
}
}
template <typename T>
void ReplicatePad3DGradNCDHW(T* d_in_data, const T* d_out_data,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, const int out_d,
const int out_h, const int out_w) {
int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0));
int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0));
int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0));
d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] +=
d_out_data[out_d * out_height * out_width + out_h * out_width + out_w];
}
template <typename T>
void ReplicatePad3DGradNDHWC(T* d_in_data, const T* d_out_data,
const int channels, const int in_depth,
const int in_height, const int in_width,
const int out_depth, const int out_height,
const int out_width, const int pad_front,
const int pad_top, const int pad_left,
const int out_d, const int out_h,
const int out_w) {
int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0));
int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0));
int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0));
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
d_in_data[in_index + c] += d_out_data[out_index + c];
}
}
template <typename T>
void CircularPad3DGradNCDHW(T* d_in_data, const T* d_out_data,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, const int out_d,
const int out_h, const int out_w) {
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] +=
d_out_data[out_d * out_height * out_width + out_h * out_width + out_w];
}
template <typename T>
void CircularPad3DGradNDHWC(T* d_in_data, const T* d_out_data,
const int channels, const int in_depth,
const int in_height, const int in_width,
const int out_depth, const int out_height,
const int out_width, const int pad_front,
const int pad_top, const int pad_left,
const int out_d, const int out_h, const int out_w) {
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
d_in_data[in_index + c] += d_out_data[out_index + c];
}
}
template <typename T>
void Pad3DGradNCDHW(T* d_in_data, const int num, const int channels,
const int in_depth, const int in_height, const int in_width,
const int out_depth, const int out_height,
const int out_width, const int pad_front, const int pad_top,
const int pad_left, const T* d_out_data,
void (*pad_func)(T*, const T*, const int, const int,
const int, const int, const int, const int,
const int, const int, const int, const int,
const int, const int)) {
for (int n = 0; n < num; ++n) {
for (int c = 0; c < channels; ++c) {
for (int out_d = 0; out_d < out_depth; ++out_d) {
for (int out_h = 0; out_h < out_height; ++out_h) {
for (int out_w = 0; out_w < out_width; ++out_w) {
pad_func(d_in_data, d_out_data, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top,
pad_left, out_d, out_h, out_w);
}
}
}
d_in_data += in_depth * in_height * in_width;
d_out_data += out_depth * out_height * out_width;
}
}
}
template <typename T>
void Pad3DGradNDHWC(T* d_in_data, const int num, const int channels,
const int in_depth, const int in_height, const int in_width,
const int out_depth, const int out_height,
const int out_width, const int pad_front, const int pad_top,
const int pad_left, const T* d_out_data,
void (*pad_func)(T*, const T*, const int, const int,
const int, const int, const int, const int,
const int, const int, const int, const int,
const int, const int, const int)) {
for (int n = 0; n < num; ++n) {
for (int out_d = 0; out_d < out_depth; ++out_d) {
for (int out_h = 0; out_h < out_height; ++out_h) {
for (int out_w = 0; out_w < out_width; ++out_w) {
pad_func(d_in_data, d_out_data, channels, in_depth, in_height,
in_width, out_depth, out_height, out_width, pad_front,
pad_top, pad_left, out_d, out_h, out_w);
}
}
}
d_in_data += in_depth * in_height * in_width * channels;
d_out_data += out_depth * out_height * out_width * channels;
}
}
static inline std::vector<int> GetPaddings(
const framework::ExecutionContext& context) {
std::vector<int> paddings(6);
auto* paddings_t = context.Input<Tensor>("Paddings");
if (paddings_t) {
auto paddings_data = paddings_t->data<int>();
std::memcpy(paddings.data(), paddings_data, paddings.size() * sizeof(int));
} else {
auto pads = context.Attr<std::vector<int>>("paddings");
std::copy(pads.begin(), pads.end(), paddings.data());
}
return paddings;
}
template <typename T>
class Pad3dCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
std::vector<int> pads = GetPaddings(context);
auto mode = context.Attr<std::string>("mode");
auto data_format = context.Attr<std::string>("data_format");
T value = static_cast<T>(context.Attr<float>("value"));
auto* x = context.Input<Tensor>("X");
auto in_dims = x->dims();
const T* in_data = x->data<T>();
auto* out = context.Output<Tensor>("Out");
if (data_format == "NCDHW") {
out->Resize({in_dims[0], in_dims[1], in_dims[2] + pads[4] + pads[5],
in_dims[3] + pads[2] + pads[3],
in_dims[4] + pads[0] + pads[1]});
} else {
out->Resize({in_dims[0], in_dims[1] + pads[4] + pads[5],
in_dims[2] + pads[2] + pads[3],
in_dims[3] + pads[0] + pads[1], in_dims[4]});
}
auto out_dims = out->dims();
T* out_data = out->mutable_data<T>(context.GetPlace());
int channels = in_dims[1];
int in_depth = in_dims[2];
int in_height = in_dims[3];
int in_width = in_dims[4];
int out_depth = out_dims[2];
int out_height = out_dims[3];
int out_width = out_dims[4];
if (data_format == "NDHWC") {
channels = in_dims[4];
in_depth = in_dims[1];
in_height = in_dims[2];
in_width = in_dims[3];
out_depth = out_dims[1];
out_height = out_dims[2];
out_width = out_dims[3];
}
if (mode == "reflect") {
PADDLE_ENFORCE_GT(in_depth, pads[4],
platform::errors::InvalidArgument(
"The depth of Input(X)'s dimension should be "
"greater than pad_front"
" in reflect mode"
", but received depth(%d) and pad_front(%d).",
in_depth, pads[4]));
PADDLE_ENFORCE_GT(in_depth, pads[5],
platform::errors::InvalidArgument(
"The depth of Input(X)'s dimension should be "
"greater than pad_back"
" in reflect mode"
", but received depth(%d) and pad_back(%d).",
in_depth, pads[5]));
PADDLE_ENFORCE_GT(in_height, pads[2],
platform::errors::InvalidArgument(
"The height of Input(X)'s dimension should be "
"greater than pad_top"
" in reflect mode"
", but received depth(%d) and pad_top(%d).",
in_height, pads[2]));
PADDLE_ENFORCE_GT(in_height, pads[3],
platform::errors::InvalidArgument(
"The height of Input(X)'s dimension should be "
"greater than pad_bottom"
" in reflect mode"
", but received depth(%d) and pad_bottom(%d).",
in_height, pads[3]));
PADDLE_ENFORCE_GT(in_width, pads[0],
platform::errors::InvalidArgument(
"The width of Input(X)'s dimension should be "
"greater than pad_left"
" in reflect mode"
", but received depth(%d) and pad_left(%d).",
in_width, pads[0]));
PADDLE_ENFORCE_GT(in_width, pads[1],
platform::errors::InvalidArgument(
"The width of Input(X)'s dimension should be "
"greater than pad_right"
" in reflect mode"
", but received depth(%d) and pad_right(%d).",
in_width, pads[1]));
} else if (mode == "circular" || mode == "replicate") {
PADDLE_ENFORCE_NE(in_depth * in_height * in_width, 0,
platform::errors::InvalidArgument(
"The input tensor size can not be 0 for circular "
"or replicate padding mode."));
}
const int pad_left = pads[0];
const int pad_top = pads[2];
const int pad_front = pads[4];
const int num = in_dims[0];
if (data_format == "NCDHW") {
std::map<std::string,
void (*)(const T*, T*, const int, const int, const int,
const int, const int, const int, const int, const int,
const int, const int, const int, const int, const T)>
func_map;
func_map["reflect"] = ReflectPad3DFuncNCDHW;
func_map["replicate"] = ReplicatePad3DFuncNCDHW;
func_map["circular"] = CircularPad3DFuncNCDHW;
func_map["constant"] = ConstPad3DFuncNCDHW;
Pad3DNCDHW(in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
value, out_data, func_map[mode]);
} else {
std::map<std::string, void (*)(const T*, T*, const int, const int,
const int, const int, const int, const int,
const int, const int, const int, const int,
const int, const int, const int, const T)>
func_map;
func_map["reflect"] = ReflectPad3DFuncNDHWC;
func_map["replicate"] = ReplicatePad3DFuncNDHWC;
func_map["circular"] = CircularPad3DFuncNDHWC;
func_map["constant"] = ConstPad3DFuncNDHWC;
Pad3DNDHWC(in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
value, out_data, func_map[mode]);
}
}
};
template <typename T>
class Pad3dGradCPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
std::vector<int> pads = GetPaddings(context);
auto mode = context.Attr<std::string>("mode");
auto data_format = context.Attr<std::string>("data_format");
auto* d_out = context.Input<Tensor>(framework::GradVarName("Out"));
auto* d_in = context.Output<Tensor>(framework::GradVarName("X"));
auto d_in_dims = d_in->dims();
auto d_out_dims = d_out->dims();
const T* d_out_data = d_out->data<T>();
T* d_in_data = d_in->mutable_data<T>(context.GetPlace());
phi::funcs::SetConstant<platform::CPUDeviceContext, T> set_zero;
set_zero(context.template device_context<platform::CPUDeviceContext>(),
d_in, static_cast<T>(0));
const int pad_left = pads[0];
const int pad_top = pads[2];
const int pad_front = pads[4];
const int num = d_in_dims[0];
if (data_format == "NCDHW") {
const int channels = d_in_dims[1];
const int in_depth = d_in_dims[2];
const int in_height = d_in_dims[3];
const int in_width = d_in_dims[4];
const int out_depth = d_out_dims[2];
const int out_height = d_out_dims[3];
const int out_width = d_out_dims[4];
std::map<std::string,
void (*)(T*, const T*, const int, const int, const int,
const int, const int, const int, const int, const int,
const int, const int, const int, const int)>
func_map;
func_map["reflect"] = ReflectPad3DGradNCDHW;
func_map["replicate"] = ReplicatePad3DGradNCDHW;
func_map["circular"] = CircularPad3DGradNCDHW;
func_map["constant"] = ConstPad3DGradNCDHW;
Pad3DGradNCDHW(d_in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top,
pad_left, d_out_data, func_map[mode]);
} else {
const int channels = d_in_dims[4];
const int in_depth = d_in_dims[1];
const int in_height = d_in_dims[2];
const int in_width = d_in_dims[3];
const int out_depth = d_out_dims[1];
const int out_height = d_out_dims[2];
const int out_width = d_out_dims[3];
std::map<std::string,
void (*)(T*, const T*, const int, const int, const int,
const int, const int, const int, const int, const int,
const int, const int, const int, const int, const int)>
func_map;
func_map["reflect"] = ReflectPad3DGradNDHWC;
func_map["replicate"] = ReplicatePad3DGradNDHWC;
func_map["circular"] = CircularPad3DGradNDHWC;
func_map["constant"] = ConstPad3DGradNDHWC;
Pad3DGradNDHWC(d_in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top,
pad_left, d_out_data, func_map[mode]);
}
}
};
class Pad3dOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasInput("X"), "Input", "X", "Pad3d");
OP_INOUT_CHECK(ctx->HasOutput("Out"), "Output", "Out", "Pad3d");
auto x_dim = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ(x_dim.size(), 5,
platform::errors::InvalidArgument(
"The size of Input(X)'s dimension should be equal to "
"5, but received %d. ",
x_dim.size()));
std::vector<int64_t> out_dims(x_dim.size());
auto data_format = ctx->Attrs().Get<std::string>("data_format");
out_dims[0] = x_dim[0];
if (ctx->HasInput("Paddings")) {
auto paddings_dim = ctx->GetInputDim("Paddings");
PADDLE_ENFORCE_EQ(paddings_dim.size(), 1,
platform::errors::InvalidArgument(
"Size of Input(Paddings)'s dimension should be "
"equal to 1, but received %d.",
paddings_dim.size()));
if (ctx->IsRuntime()) {
PADDLE_ENFORCE_EQ(paddings_dim[0], 6,
platform::errors::InvalidArgument(
"Shape of Input(Paddings) should be equal to "
"[6], but received [%d].",
paddings_dim[0]));
}
out_dims[1] = x_dim[1];
out_dims[2] = x_dim[2];
out_dims[3] = x_dim[3];
} else {
auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
PADDLE_ENFORCE_EQ(
paddings.size(), 6,
platform::errors::InvalidArgument(
"Size of paddings should be equal to 4, but received %d.",
static_cast<int>(paddings.size())));
if (data_format == "NCDHW") {
out_dims[1] = x_dim[1]; // channel
out_dims[2] = ((!ctx->IsRuntime()) && (x_dim[2] < 0))
? x_dim[2]
: (x_dim[2] + paddings[4] + paddings[5]); // depth
out_dims[3] = ((!ctx->IsRuntime()) && (x_dim[3] < 0))
? x_dim[3]
: (x_dim[3] + paddings[2] + paddings[3]); // height
out_dims[4] = ((!ctx->IsRuntime()) && (x_dim[4] < 0))
? x_dim[4]
: (x_dim[4] + paddings[0] + paddings[1]); // width
} else { // NDHWC
out_dims[4] = x_dim[4]; // channel
out_dims[1] = ((!ctx->IsRuntime()) && (x_dim[1] < 0))
? x_dim[1]
: (x_dim[1] + paddings[4] + paddings[5]); // depth
out_dims[2] = ((!ctx->IsRuntime()) && (x_dim[2] < 0))
? x_dim[2]
: (x_dim[2] + paddings[2] + paddings[3]); // height
out_dims[3] = ((!ctx->IsRuntime()) && (x_dim[3] < 0))
? x_dim[3]
: (x_dim[3] + paddings[0] + paddings[1]); // width
}
}
ctx->SetOutputDim("Out", phi::make_ddim(out_dims));
ctx->ShareLoD("X", /*->*/ "Out");
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
......@@ -921,15 +199,14 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER(Pad3dOpGradNoNeedBufferVarsInferer, "X");
namespace ops = paddle::operators;
DECLARE_INFER_SHAPE_FUNCTOR(pad3d, Pad3dInferShapeFunctor,
PD_INFER_META(phi::Pad3dInferMeta));
REGISTER_OPERATOR(pad3d, ops::Pad3dOp, ops::Pad3dOpMaker,
ops::Pad3dOpGradMaker<paddle::framework::OpDesc>,
ops::Pad3dOpGradMaker<paddle::imperative::OpBase>);
ops::Pad3dOpGradMaker<paddle::imperative::OpBase>,
Pad3dInferShapeFunctor);
REGISTER_OPERATOR(pad3d_grad, ops::Pad3dOpGrad,
ops::Pad3dOpDoubleGradMaker<paddle::framework::OpDesc>,
ops::Pad3dOpDoubleGradMaker<paddle::imperative::OpBase>,
ops::Pad3dOpGradNoNeedBufferVarsInferer);
REGISTER_OP_CPU_KERNEL(pad3d, ops::Pad3dCPUKernel<float>,
ops::Pad3dCPUKernel<double>, ops::Pad3dCPUKernel<int>,
ops::Pad3dCPUKernel<int64_t>);
REGISTER_OP_CPU_KERNEL(pad3d_grad, ops::Pad3dGradCPUKernel<float>,
ops::Pad3dGradCPUKernel<double>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace paddle {
namespace operators {
using platform::PADDLE_CUDA_NUM_THREADS;
using framework::Tensor;
template <typename T>
__global__ void Pad3DConstNCDHW(const int nthreads, const T* in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, T value, T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int nc = index / out_width;
const int out_w = index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
out_data[index] =
(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth ||
in_h >= in_height || in_w >= in_width)
? value
: in_data[nc * in_depth * in_height * in_width +
in_d * in_height * in_width + in_h * in_width + in_w];
}
}
template <typename T>
__global__ void Pad3DConstNDHWC(const int nthreads, const T* in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, T value, T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int n = index / channels;
const int c = index % channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
const int in_d = out_d - pad_front;
const int in_h = out_h - pad_top;
const int in_w = out_w - pad_left;
out_data[index] =
(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth ||
in_h >= in_height || in_w >= in_width)
? value
: in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c];
}
}
template <typename T>
__global__ void Pad3DReflectNCDHW(const int nthreads, const T* in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int nc = index / out_width;
const int out_w = index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = max(in_d, -in_d); // reflect by 0
in_d = min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth
in_h = max(in_h, -in_h); // reflect by 0
in_h = min(in_h, 2 * in_height - in_h - 2); // reflect by in_height
in_w = max(in_w, -in_w); // reflect by 0
in_w = min(in_w, 2 * in_width - in_w - 2); // reflect by in_width
out_data[index] =
in_data[(nc * in_depth * in_height + in_d * in_height + in_h) *
in_width +
in_w];
}
}
template <typename T>
__global__ void Pad3DReflectNDHWC(const int nthreads, const T* in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int n = index / channels;
const int c = index % channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = max(in_d, -in_d);
in_d = min(in_d, 2 * in_depth - in_d - 2);
in_h = max(in_h, -in_h);
in_h = min(in_h, 2 * in_height - in_h - 2);
in_w = max(in_w, -in_w);
in_w = min(in_w, 2 * in_width - in_w - 2);
out_data[index] = in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c];
}
}
template <typename T>
__global__ void Pad3DReplicateNCDHW(const int nthreads, const T* in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int nc = index / out_width;
const int out_w = index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
int in_d = min(in_depth - 1, max(out_d - pad_front, 0));
int in_h = min(in_height - 1, max(out_h - pad_top, 0));
int in_w = min(in_width - 1, max(out_w - pad_left, 0));
out_data[index] =
in_data[(nc * in_depth * in_height + in_d * in_height + in_h) *
in_width +
in_w];
}
}
template <typename T>
__global__ void Pad3DReplicateNDHWC(const int nthreads, const T* in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int n = index / channels;
const int c = index % channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
int in_d = min(in_depth - 1, max(out_d - pad_front, 0));
int in_h = min(in_height - 1, max(out_h - pad_top, 0));
int in_w = min(in_width - 1, max(out_w - pad_left, 0));
out_data[index] = in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c];
}
}
template <typename T>
__global__ void Pad3DCircularNCDHW(const int nthreads, const T* in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int nc = index / out_width;
const int out_w = index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
out_data[index] =
in_data[(nc * in_depth * in_height + in_d * in_height + in_h) *
in_width +
in_w];
}
}
template <typename T>
__global__ void Pad3DCircularNDHWC(const int nthreads, const T* in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int n = index / channels;
const int c = index % channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
out_data[index] = in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c];
}
}
template <typename T>
__global__ void Pad3DGradConstNCDHW(const int in_size, T* d_in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, const T* d_out_data) {
CUDA_KERNEL_LOOP(in_index, in_size) {
const int in_w = in_index % in_width;
int nc = in_index / in_width;
const int in_h = nc % in_height;
nc /= in_height;
const int in_d = nc % in_depth;
nc /= in_depth;
const int out_d = in_d + pad_front;
const int out_h = in_h + pad_top;
const int out_w = in_w + pad_left;
d_in_data[in_index] =
d_out_data[nc * out_depth * out_height * out_width +
out_d * out_height * out_width + out_h * out_width + out_w];
}
}
template <typename T>
__global__ void Pad3DGradConstNDHWC(const int in_size, T* d_in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, const T* d_out_data) {
CUDA_KERNEL_LOOP(in_index, in_size) {
const int c = in_index % channels;
int n = in_index / channels;
const int in_w = n % in_width;
n /= in_width;
const int in_h = n % in_height;
n /= in_height;
const int in_d = n % in_depth;
n /= in_depth;
const int out_d = in_d + pad_front;
const int out_h = in_h + pad_top;
const int out_w = in_w + pad_left;
d_in_data[in_index] =
d_out_data[n * out_depth * out_height * out_width * channels +
out_d * out_height * out_width * channels +
out_h * out_width * channels + out_w * channels + c];
}
}
template <typename T>
__global__ void Pad3DGradReflectNCDHW(const int out_size, T* d_in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, const T* d_out_data) {
CUDA_KERNEL_LOOP(out_index, out_size) {
int nc = out_index / out_width;
const int out_w = out_index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = max(in_d, -in_d);
in_h = max(in_h, -in_h);
in_w = max(in_w, -in_w);
in_d = min(in_d, 2 * in_depth - in_d - 2);
in_h = min(in_h, 2 * in_height - in_h - 2);
in_w = min(in_w, 2 * in_width - in_w - 2);
platform::CudaAtomicAdd(
&d_in_data[nc * in_depth * in_height * in_width +
in_d * in_height * in_width + in_h * in_width + in_w],
d_out_data[out_index]);
}
}
template <typename T>
__global__ void Pad3DGradReflectNDHWC(const int out_size, T* d_in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height, const int out_width,
const int pad_front, const int pad_top,
const int pad_left, const T* d_out_data) {
CUDA_KERNEL_LOOP(out_index, out_size) {
const int c = out_index % channels;
int n = out_index / channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = max(in_d, -in_d);
in_h = max(in_h, -in_h);
in_w = max(in_w, -in_w);
in_d = min(in_d, in_depth * 2 - in_d - 2);
in_h = min(in_h, in_height * 2 - in_h - 2);
in_w = min(in_w, in_width * 2 - in_w - 2);
platform::CudaAtomicAdd(
&d_in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c],
d_out_data[out_index]);
}
}
template <typename T>
__global__ void Pad3DGradReplicateNCDHW(
const int out_size, T* d_in_data, const int num, const int channels,
const int in_depth, const int in_height, const int in_width,
const int out_depth, const int out_height, const int out_width,
const int pad_front, const int pad_top, const int pad_left,
const T* d_out_data) {
CUDA_KERNEL_LOOP(out_index, out_size) {
int nc = out_index / out_width;
const int out_w = out_index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
const int in_d = min(in_depth - 1, max(out_d - pad_front, 0));
const int in_h = min(in_height - 1, max(out_h - pad_top, 0));
const int in_w = min(in_width - 1, max(out_w - pad_left, 0));
platform::CudaAtomicAdd(
&d_in_data[nc * in_depth * in_height * in_width +
in_d * in_height * in_width + in_h * in_width + in_w],
d_out_data[out_index]);
}
}
template <typename T>
__global__ void Pad3DGradReplicateNDHWC(
const int out_size, T* d_in_data, const int num, const int channels,
const int in_depth, const int in_height, const int in_width,
const int out_depth, const int out_height, const int out_width,
const int pad_front, const int pad_top, const int pad_left,
const T* d_out_data) {
CUDA_KERNEL_LOOP(out_index, out_size) {
const int c = out_index % channels;
int n = out_index / channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
const int in_d = min(in_depth - 1, max(out_d - pad_front, 0));
const int in_h = min(in_height - 1, max(out_h - pad_top, 0));
const int in_w = min(in_width - 1, max(out_w - pad_left, 0));
platform::CudaAtomicAdd(
&d_in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c],
d_out_data[out_index]);
}
}
template <typename T>
__global__ void Pad3DGradCircularNCDHW(const int out_size, T* d_in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height,
const int out_width, const int pad_front,
const int pad_top, const int pad_left,
const T* d_out_data) {
CUDA_KERNEL_LOOP(out_index, out_size) {
int nc = out_index / out_width;
const int out_w = out_index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
platform::CudaAtomicAdd(
&d_in_data[nc * in_depth * in_height * in_width +
in_d * in_height * in_width + in_h * in_width + in_w],
d_out_data[out_index]);
}
}
template <typename T>
__global__ void Pad3DGradCircularNDHWC(const int out_size, T* d_in_data,
const int num, const int channels,
const int in_depth, const int in_height,
const int in_width, const int out_depth,
const int out_height,
const int out_width, const int pad_front,
const int pad_top, const int pad_left,
const T* d_out_data) {
CUDA_KERNEL_LOOP(out_index, out_size) {
const int c = out_index % channels;
int n = out_index / channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
platform::CudaAtomicAdd(
&d_in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c],
d_out_data[out_index]);
}
}
static inline std::vector<int> GetPaddings(
const framework::ExecutionContext& context) {
std::vector<int> paddings(6);
auto* paddings_data = context.Input<Tensor>("Paddings");
if (paddings_data) {
Tensor pads;
framework::TensorCopySync(*paddings_data, platform::CPUPlace(), &pads);
auto pads_data = pads.data<int>();
std::memcpy(paddings.data(), pads_data, paddings.size() * sizeof(int));
} else {
auto pads = context.Attr<std::vector<int>>("paddings");
std::copy(pads.begin(), pads.end(), paddings.data());
}
return paddings;
}
template <typename T>
class Pad3dCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
std::vector<int> pads = GetPaddings(context);
auto mode = context.Attr<std::string>("mode");
auto data_format = context.Attr<std::string>("data_format");
T value = static_cast<T>(context.Attr<float>("value"));
auto* x = context.Input<Tensor>("X");
auto in_dims = x->dims();
const T* in_data = x->data<T>();
auto* out = context.Output<Tensor>("Out");
auto out_dims = out->dims();
if (data_format == "NCDHW") {
out_dims[0] = in_dims[0];
out_dims[1] = in_dims[1];
out_dims[2] = in_dims[2] + pads[4] + pads[5];
out_dims[3] = in_dims[3] + pads[2] + pads[3];
out_dims[4] = in_dims[4] + pads[0] + pads[1];
} else {
out_dims[0] = in_dims[0];
out_dims[1] = in_dims[1] + pads[4] + pads[5];
out_dims[2] = in_dims[2] + pads[2] + pads[3];
out_dims[3] = in_dims[3] + pads[0] + pads[1];
out_dims[4] = in_dims[4];
}
T* out_data = out->mutable_data<T>(out_dims, context.GetPlace());
int channels = in_dims[1];
int in_depth = in_dims[2];
int in_height = in_dims[3];
int in_width = in_dims[4];
int out_depth = out_dims[2];
int out_height = out_dims[3];
int out_width = out_dims[4];
if (data_format == "NDHWC") {
channels = in_dims[4];
in_depth = in_dims[1];
in_height = in_dims[2];
in_width = in_dims[3];
out_depth = out_dims[1];
out_height = out_dims[2];
out_width = out_dims[3];
}
if (mode == "reflect") {
PADDLE_ENFORCE_GT(in_depth, pads[4],
platform::errors::InvalidArgument(
"The depth of Input(X)'s dimension should be "
"greater than pad_front"
" in reflect mode"
", but received depth(%d) and pad_front(%d).",
in_depth, pads[4]));
PADDLE_ENFORCE_GT(in_depth, pads[5],
platform::errors::InvalidArgument(
"The depth of Input(X)'s dimension should be "
"greater than pad_back"
" in reflect mode"
", but received depth(%d) and pad_back(%d).",
in_depth, pads[5]));
PADDLE_ENFORCE_GT(in_height, pads[2],
platform::errors::InvalidArgument(
"The height of Input(X)'s dimension should be "
"greater than pad_top"
" in reflect mode"
", but received depth(%d) and pad_top(%d).",
in_height, pads[2]));
PADDLE_ENFORCE_GT(in_height, pads[3],
platform::errors::InvalidArgument(
"The height of Input(X)'s dimension should be "
"greater than pad_bottom"
" in reflect mode"
", but received depth(%d) and pad_bottom(%d).",
in_height, pads[3]));
PADDLE_ENFORCE_GT(in_width, pads[0],
platform::errors::InvalidArgument(
"The width of Input(X)'s dimension should be "
"greater than pad_left"
" in reflect mode"
", but received depth(%d) and pad_left(%d).",
in_width, pads[0]));
PADDLE_ENFORCE_GT(in_width, pads[1],
platform::errors::InvalidArgument(
"The width of Input(X)'s dimension should be "
"greater than pad_right"
" in reflect mode"
", but received depth(%d) and pad_right(%d).",
in_width, pads[1]));
} else if (mode == "circular" || mode == "replicate") {
PADDLE_ENFORCE_NE(in_depth * in_height * in_width, 0,
platform::errors::InvalidArgument(
"The input tensor size can not be 0 for circular "
"or replicate padding mode."));
}
const int pad_left = pads[0];
const int pad_top = pads[2];
const int pad_front = pads[4];
const int num = in_dims[0];
auto stream = context.cuda_device_context().stream();
int block = PADDLE_CUDA_NUM_THREADS;
const int out_size = out->numel();
int grid = (out_size + block - 1) / block;
if (data_format == "NCDHW") {
if (mode == "reflect") {
Pad3DReflectNCDHW<T><<<grid, block, 0, stream>>>(
out_size, in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
out_data);
} else if (mode == "replicate") {
Pad3DReplicateNCDHW<T><<<grid, block, 0, stream>>>(
out_size, in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
out_data);
} else if (mode == "circular") {
Pad3DCircularNCDHW<T><<<grid, block, 0, stream>>>(
out_size, in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
out_data);
} else {
Pad3DConstNCDHW<T><<<grid, block, 0, stream>>>(
out_size, in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
value, out_data);
}
} else {
if (mode == "reflect") {
Pad3DReflectNDHWC<T><<<grid, block, 0, stream>>>(
out_size, in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
out_data);
} else if (mode == "replicate") {
Pad3DReplicateNDHWC<T><<<grid, block, 0, stream>>>(
out_size, in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
out_data);
} else if (mode == "circular") {
Pad3DCircularNDHWC<T><<<grid, block, 0, stream>>>(
out_size, in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
out_data);
} else {
Pad3DConstNDHWC<T><<<grid, block, 0, stream>>>(
out_size, in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
value, out_data);
}
}
}
};
template <typename T>
class Pad3dGradCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& context) const override {
std::vector<int> pads = GetPaddings(context);
auto mode = context.Attr<std::string>("mode");
auto data_format = context.Attr<std::string>("data_format");
auto* d_out = context.Input<Tensor>(framework::GradVarName("Out"));
auto* d_in = context.Output<Tensor>(framework::GradVarName("X"));
auto d_in_dims = d_in->dims();
auto d_out_dims = d_out->dims();
const T* d_out_data = d_out->data<T>();
T* d_in_data = d_in->mutable_data<T>(context.GetPlace());
phi::funcs::SetConstant<platform::CUDADeviceContext, T> set_zero;
set_zero(context.template device_context<platform::CUDADeviceContext>(),
d_in, static_cast<T>(0));
const int pad_left = pads[0];
const int pad_top = pads[2];
const int pad_front = pads[4];
const int num = d_in_dims[0];
auto stream = context.cuda_device_context().stream();
int block = PADDLE_CUDA_NUM_THREADS;
const int out_size = d_out->numel();
const int in_size = d_in->numel();
int grid = (out_size + block - 1) / block;
if (data_format == "NCDHW") {
const int channels = d_in_dims[1];
const int in_depth = d_in_dims[2];
const int in_height = d_in_dims[3];
const int in_width = d_in_dims[4];
const int out_depth = d_out_dims[2];
const int out_height = d_out_dims[3];
const int out_width = d_out_dims[4];
if (mode == "reflect") {
Pad3DGradReflectNCDHW<T><<<grid, block, 0, stream>>>(
out_size, d_in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
d_out_data);
} else if (mode == "replicate") {
Pad3DGradReplicateNCDHW<T><<<grid, block, 0, stream>>>(
out_size, d_in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
d_out_data);
} else if (mode == "circular") {
Pad3DGradCircularNCDHW<T><<<grid, block, 0, stream>>>(
out_size, d_in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
d_out_data);
} else {
grid = (in_size + block - 1) / block;
Pad3DGradConstNCDHW<T><<<grid, block, 0, stream>>>(
in_size, d_in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
d_out_data);
}
} else {
const int channels = d_in_dims[4];
const int in_depth = d_in_dims[1];
const int in_height = d_in_dims[2];
const int in_width = d_in_dims[3];
const int out_depth = d_out_dims[1];
const int out_height = d_out_dims[2];
const int out_width = d_out_dims[3];
if (mode == "reflect") {
Pad3DGradReflectNDHWC<T><<<grid, block, 0, stream>>>(
out_size, d_in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
d_out_data);
} else if (mode == "replicate") {
Pad3DGradReplicateNDHWC<T><<<grid, block, 0, stream>>>(
out_size, d_in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
d_out_data);
} else if (mode == "circular") {
Pad3DGradCircularNDHWC<T><<<grid, block, 0, stream>>>(
out_size, d_in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
d_out_data);
} else {
grid = (in_size + block - 1) / block;
Pad3DGradConstNDHWC<T><<<grid, block, 0, stream>>>(
in_size, d_in_data, num, channels, in_depth, in_height, in_width,
out_depth, out_height, out_width, pad_front, pad_top, pad_left,
d_out_data);
}
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_CUDA_KERNEL(pad3d, ops::Pad3dCUDAKernel<plat::float16>,
ops::Pad3dCUDAKernel<float>,
ops::Pad3dCUDAKernel<double>, ops::Pad3dCUDAKernel<int>,
ops::Pad3dCUDAKernel<int64_t>);
REGISTER_OP_CUDA_KERNEL(pad3d_grad, ops::Pad3dGradCUDAKernel<plat::float16>,
ops::Pad3dGradCUDAKernel<float>,
ops::Pad3dGradCUDAKernel<double>);
......@@ -31,6 +31,7 @@ limitations under the License. */
#include "paddle/phi/common/data_type.h"
#include "paddle/phi/core/compat/convert_utils.h"
#include "paddle/phi/core/dense_tensor.h"
namespace paddle {
namespace pybind {
......@@ -62,10 +63,10 @@ int TensorDtype2NumpyDtype(phi::DataType dtype) {
return pybind11::detail::npy_api::NPY_INT32_;
case phi::DataType::INT64:
return pybind11::detail::npy_api::NPY_INT64_;
case phi::DataType::FLOAT16:
return pybind11::detail::NPY_FLOAT16_;
case phi::DataType::BFLOAT16:
return pybind11::detail::NPY_UINT16_;
case phi::DataType::FLOAT16:
return pybind11::detail::NPY_FLOAT16_;
case phi::DataType::FLOAT32:
return pybind11::detail::npy_api::NPY_FLOAT_;
case phi::DataType::FLOAT64:
......
......@@ -877,6 +877,77 @@ void PadInferMeta(const MetaTensor& input,
out->set_dtype(input.dtype());
}
void Pad3dInferMeta(const MetaTensor& x,
const ScalarArray& paddings_scalar_array,
const std::string& mode,
float value,
const std::string& data_format,
MetaTensor* out,
MetaConfig config) {
auto x_dim = x.dims();
PADDLE_ENFORCE_EQ(x_dim.size(),
5,
errors::InvalidArgument(
"The size of Input(X)'s dimension should be equal to "
"5, but received %d. ",
x_dim.size()));
std::vector<int64_t> out_dims(x_dim.size());
out_dims[0] = x_dim[0];
if (paddings_scalar_array.FromTensor()) {
if (config.is_runtime) {
PADDLE_ENFORCE_EQ(
paddings_scalar_array.GetData().size(),
6,
errors::InvalidArgument("Shape of Input(Paddings) should be equal to "
"[6], but received [%d].",
paddings_scalar_array.GetData().size()));
}
out_dims[1] = x_dim[1];
out_dims[2] = x_dim[2];
out_dims[3] = x_dim[3];
} else {
auto paddings = paddings_scalar_array.GetData();
PADDLE_ENFORCE_EQ(
paddings.size(),
6,
errors::InvalidArgument(
"Size of paddings should be equal to 6, but received %d.",
static_cast<int>(paddings.size())));
if (data_format == "NCDHW") {
out_dims[1] = x_dim[1]; // channel
out_dims[2] = ((!config.is_runtime) && (x_dim[2] < 0))
? x_dim[2]
: (x_dim[2] + paddings[4] + paddings[5]); // depth
out_dims[3] = ((!config.is_runtime) && (x_dim[3] < 0))
? x_dim[3]
: (x_dim[3] + paddings[2] + paddings[3]); // height
out_dims[4] = ((!config.is_runtime) && (x_dim[4] < 0))
? x_dim[4]
: (x_dim[4] + paddings[0] + paddings[1]); // width
} else { // NDHWC
out_dims[4] = x_dim[4]; // channel
out_dims[1] = ((!config.is_runtime) && (x_dim[1] < 0))
? x_dim[1]
: (x_dim[1] + paddings[4] + paddings[5]); // depth
out_dims[2] = ((!config.is_runtime) && (x_dim[2] < 0))
? x_dim[2]
: (x_dim[2] + paddings[2] + paddings[3]); // height
out_dims[3] = ((!config.is_runtime) && (x_dim[3] < 0))
? x_dim[3]
: (x_dim[3] + paddings[0] + paddings[1]); // width
}
}
out->set_dims(phi::make_ddim(out_dims));
out->set_dtype(x.dtype());
out->share_lod(x);
}
void PixelShuffleInferMeta(const MetaTensor& x,
int upscale_factor,
const std::string& data_format,
......
......@@ -147,6 +147,14 @@ void PadInferMeta(const MetaTensor& input,
MetaTensor* out,
MetaConfig config = MetaConfig());
void Pad3dInferMeta(const MetaTensor& x,
const ScalarArray& paddings,
const std::string& mode,
float value,
const std::string& data_format,
MetaTensor* out,
MetaConfig config = MetaConfig());
void PixelShuffleInferMeta(const MetaTensor& x,
int upscale_factor,
const std::string& data_format,
......
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/pad3d_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace phi {
template <typename T>
void ConstPad3DGradNCDHW(T* d_in_data,
const T* d_out_data,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
if (!(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth ||
in_h >= in_height || in_w >= in_width)) {
d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] =
d_out_data[out_d * out_height * out_width + out_h * out_width + out_w];
}
}
template <typename T>
void ConstPad3DGradNDHWC(T* d_in_data,
const T* d_out_data,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
if (!(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth ||
in_h >= in_height || in_w >= in_width)) {
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
d_in_data[in_index + c] = d_out_data[out_index + c];
}
}
}
template <typename T>
void ReflectPad3DGradNCDHW(T* d_in_data,
const T* d_out_data,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = std::max(in_d, -in_d); // reflect by 0
in_d = std::min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth
in_h = std::max(in_h, -in_h); // reflect by 0
in_h = std::min(in_h, 2 * in_height - in_h - 2); // reflect by in_height
in_w = std::max(in_w, -in_w); // reflect by 0
in_w = std::min(in_w, 2 * in_width - in_w - 2); // reflect by in_width
d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] +=
d_out_data[out_d * out_height * out_width + out_h * out_width + out_w];
}
template <typename T>
void ReflectPad3DGradNDHWC(T* d_in_data,
const T* d_out_data,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = std::max(in_d, -in_d);
in_d = std::min(in_d, 2 * in_depth - in_d - 2);
in_h = std::max(in_h, -in_h);
in_h = std::min(in_h, 2 * in_height - in_h - 2);
in_w = std::max(in_w, -in_w);
in_w = std::min(in_w, 2 * in_width - in_w - 2);
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
d_in_data[in_index + c] += d_out_data[out_index + c];
}
}
template <typename T>
void ReplicatePad3DGradNCDHW(T* d_in_data,
const T* d_out_data,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w) {
int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0));
int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0));
int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0));
d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] +=
d_out_data[out_d * out_height * out_width + out_h * out_width + out_w];
}
template <typename T>
void ReplicatePad3DGradNDHWC(T* d_in_data,
const T* d_out_data,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w) {
int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0));
int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0));
int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0));
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
d_in_data[in_index + c] += d_out_data[out_index + c];
}
}
template <typename T>
void CircularPad3DGradNCDHW(T* d_in_data,
const T* d_out_data,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w) {
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
d_in_data[in_d * in_height * in_width + in_h * in_width + in_w] +=
d_out_data[out_d * out_height * out_width + out_h * out_width + out_w];
}
template <typename T>
void CircularPad3DGradNDHWC(T* d_in_data,
const T* d_out_data,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w) {
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
d_in_data[in_index + c] += d_out_data[out_index + c];
}
}
template <typename T>
void Pad3DGradNCDHW(T* d_in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const T* d_out_data,
void (*pad_func)(T*,
const T*,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int)) {
for (int n = 0; n < num; ++n) {
for (int c = 0; c < channels; ++c) {
for (int out_d = 0; out_d < out_depth; ++out_d) {
for (int out_h = 0; out_h < out_height; ++out_h) {
for (int out_w = 0; out_w < out_width; ++out_w) {
pad_func(d_in_data,
d_out_data,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
out_d,
out_h,
out_w);
}
}
}
d_in_data += in_depth * in_height * in_width;
d_out_data += out_depth * out_height * out_width;
}
}
}
template <typename T>
void Pad3DGradNDHWC(T* d_in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const T* d_out_data,
void (*pad_func)(T*,
const T*,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int)) {
for (int n = 0; n < num; ++n) {
for (int out_d = 0; out_d < out_depth; ++out_d) {
for (int out_h = 0; out_h < out_height; ++out_h) {
for (int out_w = 0; out_w < out_width; ++out_w) {
pad_func(d_in_data,
d_out_data,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
out_d,
out_h,
out_w);
}
}
}
d_in_data += in_depth * in_height * in_width * channels;
d_out_data += out_depth * out_height * out_width * channels;
}
}
template <typename T, typename Context>
void Pad3dGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const ScalarArray& paddings,
const std::string& mode,
float pad_value,
const std::string& data_format,
DenseTensor* x_grad) {
std::vector<int64_t> pads = paddings.GetData();
auto* d_out = &out_grad;
auto* d_in = x_grad;
auto d_in_dims = d_in->dims();
auto d_out_dims = d_out->dims();
const T* d_out_data = d_out->data<T>();
T* d_in_data = dev_ctx.template Alloc<T>(d_in);
phi::funcs::SetConstant<Context, T>()(dev_ctx, d_in, static_cast<T>(0));
const int pad_left = pads[0];
const int pad_top = pads[2];
const int pad_front = pads[4];
const int num = d_in_dims[0];
if (data_format == "NCDHW") {
const int channels = d_in_dims[1];
const int in_depth = d_in_dims[2];
const int in_height = d_in_dims[3];
const int in_width = d_in_dims[4];
const int out_depth = d_out_dims[2];
const int out_height = d_out_dims[3];
const int out_width = d_out_dims[4];
std::map<std::string,
void (*)(T*,
const T*,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int)>
func_map;
func_map["reflect"] = ReflectPad3DGradNCDHW;
func_map["replicate"] = ReplicatePad3DGradNCDHW;
func_map["circular"] = CircularPad3DGradNCDHW;
func_map["constant"] = ConstPad3DGradNCDHW;
Pad3DGradNCDHW(d_in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
d_out_data,
func_map[mode]);
} else {
const int channels = d_in_dims[4];
const int in_depth = d_in_dims[1];
const int in_height = d_in_dims[2];
const int in_width = d_in_dims[3];
const int out_depth = d_out_dims[1];
const int out_height = d_out_dims[2];
const int out_width = d_out_dims[3];
std::map<std::string,
void (*)(T*,
const T*,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int)>
func_map;
func_map["reflect"] = ReflectPad3DGradNDHWC;
func_map["replicate"] = ReplicatePad3DGradNDHWC;
func_map["circular"] = CircularPad3DGradNDHWC;
func_map["constant"] = ConstPad3DGradNDHWC;
Pad3DGradNDHWC(d_in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
d_out_data,
func_map[mode]);
}
}
} // namespace phi
PD_REGISTER_KERNEL(
pad3d_grad, CPU, ALL_LAYOUT, phi::Pad3dGradKernel, float, double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/pad3d_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
template <typename T>
void ConstPad3DFuncNCDHW(const T* in_data,
T* out_data,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w,
const T value) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
out_data[out_d * out_height * out_width + out_h * out_width + out_w] =
(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth ||
in_h >= in_height || in_w >= in_width)
? value
: in_data[in_d * in_height * in_width + in_h * in_width + in_w];
}
template <typename T>
void ConstPad3DFuncNDHWC(const T* in_data,
T* out_data,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w,
const T value) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
if (in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth ||
in_h >= in_height || in_w >= in_width) {
for (int c = 0; c < channels; ++c) {
out_data[out_index + c] = value;
}
} else {
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
out_data[out_index + c] = in_data[in_index + c];
}
}
}
template <typename T>
void ReflectPad3DFuncNCDHW(const T* in_data,
T* out_data,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w,
const T value) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = std::max(in_d, -in_d); // reflect by 0
in_d = std::min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth
in_h = std::max(in_h, -in_h); // reflect by 0
in_h = std::min(in_h, 2 * in_height - in_h - 2); // reflect by in_height
in_w = std::max(in_w, -in_w); // reflect by 0
in_w = std::min(in_w, 2 * in_width - in_w - 2); // reflect by in_width
out_data[out_d * out_height * out_width + out_h * out_width + out_w] =
in_data[in_d * in_height * in_width + in_h * in_width + in_w];
}
template <typename T>
void ReflectPad3DFuncNDHWC(const T* in_data,
T* out_data,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w,
const T value) {
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = std::max(in_d, -in_d);
in_d = std::min(in_d, 2 * in_depth - in_d - 2);
in_h = std::max(in_h, -in_h);
in_h = std::min(in_h, 2 * in_height - in_h - 2);
in_w = std::max(in_w, -in_w);
in_w = std::min(in_w, 2 * in_width - in_w - 2);
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
out_data[out_index + c] = in_data[in_index + c];
}
}
template <typename T>
void ReplicatePad3DFuncNCDHW(const T* in_data,
T* out_data,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w,
const T value) {
int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0));
int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0));
int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0));
out_data[out_d * out_height * out_width + out_h * out_width + out_w] =
in_data[in_d * in_height * in_width + in_h * in_width + in_w];
}
template <typename T>
void ReplicatePad3DFuncNDHWC(const T* in_data,
T* out_data,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w,
const T value) {
int in_d = std::min(in_depth - 1, std::max(out_d - pad_front, 0));
int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0));
int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0));
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
out_data[out_index + c] = in_data[in_index + c];
}
}
template <typename T>
void CircularPad3DFuncNCDHW(const T* in_data,
T* out_data,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w,
const T value) {
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
out_data[out_d * out_height * out_width + out_h * out_width + out_w] =
in_data[in_d * in_height * in_width + in_h * in_width + in_w];
}
template <typename T>
void CircularPad3DFuncNDHWC(const T* in_data,
T* out_data,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const int out_d,
const int out_h,
const int out_w,
const T value) {
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
const int out_index =
(out_d * out_height * out_width + out_h * out_width + out_w) * channels;
const int in_index =
(in_d * in_height * in_width + in_h * in_width + in_w) * channels;
for (int c = 0; c < channels; ++c) {
out_data[out_index + c] = in_data[in_index + c];
}
}
template <typename T>
void Pad3DNCDHW(const T* in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
T value,
T* out_data,
void (*pad_func)(const T*,
T*,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const T)) {
for (int n = 0; n < num; ++n) {
for (int c = 0; c < channels; ++c) {
for (int out_d = 0; out_d < out_depth; ++out_d) {
for (int out_h = 0; out_h < out_height; ++out_h) {
for (int out_w = 0; out_w < out_width; ++out_w) {
pad_func(in_data,
out_data,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
out_d,
out_h,
out_w,
value);
}
}
}
in_data += in_depth * in_height * in_width;
out_data += out_depth * out_height * out_width;
}
}
}
template <typename T>
void Pad3DNDHWC(const T* in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
T value,
T* out_data,
void (*pad_func)(const T*,
T*,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const T)) {
for (int n = 0; n < num; ++n) {
for (int out_d = 0; out_d < out_depth; ++out_d) {
for (int out_h = 0; out_h < out_height; ++out_h) {
for (int out_w = 0; out_w < out_width; ++out_w) {
pad_func(in_data,
out_data,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
out_d,
out_h,
out_w,
value);
}
}
}
in_data += in_depth * in_height * in_width * channels;
out_data += out_depth * out_height * out_width * channels;
}
}
template <typename T, typename Context>
void Pad3dKernel(const Context& dev_ctx,
const DenseTensor& x,
const ScalarArray& paddings,
const std::string& mode,
float pad_value,
const std::string& data_format,
DenseTensor* out) {
T value = static_cast<T>(pad_value);
std::vector<int64_t> pads = paddings.GetData();
auto in_dims = x.dims();
const T* in_data = x.data<T>();
if (data_format == "NCDHW") {
out->Resize({in_dims[0],
in_dims[1],
in_dims[2] + pads[4] + pads[5],
in_dims[3] + pads[2] + pads[3],
in_dims[4] + pads[0] + pads[1]});
} else {
out->Resize({in_dims[0],
in_dims[1] + pads[4] + pads[5],
in_dims[2] + pads[2] + pads[3],
in_dims[3] + pads[0] + pads[1],
in_dims[4]});
}
auto out_dims = out->dims();
T* out_data = dev_ctx.template Alloc<T>(out);
int channels = in_dims[1];
int in_depth = in_dims[2];
int in_height = in_dims[3];
int in_width = in_dims[4];
int out_depth = out_dims[2];
int out_height = out_dims[3];
int out_width = out_dims[4];
if (data_format == "NDHWC") {
channels = in_dims[4];
in_depth = in_dims[1];
in_height = in_dims[2];
in_width = in_dims[3];
out_depth = out_dims[1];
out_height = out_dims[2];
out_width = out_dims[3];
}
if (mode == "reflect") {
PADDLE_ENFORCE_GT(
in_depth,
pads[4],
errors::InvalidArgument("The depth of Input(X)'s dimension should be "
"greater than pad_front"
" in reflect mode"
", but received depth(%d) and pad_front(%d).",
in_depth,
pads[4]));
PADDLE_ENFORCE_GT(
in_depth,
pads[5],
errors::InvalidArgument("The depth of Input(X)'s dimension should be "
"greater than pad_back"
" in reflect mode"
", but received depth(%d) and pad_back(%d).",
in_depth,
pads[5]));
PADDLE_ENFORCE_GT(
in_height,
pads[2],
errors::InvalidArgument("The height of Input(X)'s dimension should be "
"greater than pad_top"
" in reflect mode"
", but received depth(%d) and pad_top(%d).",
in_height,
pads[2]));
PADDLE_ENFORCE_GT(
in_height,
pads[3],
errors::InvalidArgument("The height of Input(X)'s dimension should be "
"greater than pad_bottom"
" in reflect mode"
", but received depth(%d) and pad_bottom(%d).",
in_height,
pads[3]));
PADDLE_ENFORCE_GT(
in_width,
pads[0],
errors::InvalidArgument("The width of Input(X)'s dimension should be "
"greater than pad_left"
" in reflect mode"
", but received depth(%d) and pad_left(%d).",
in_width,
pads[0]));
PADDLE_ENFORCE_GT(
in_width,
pads[1],
errors::InvalidArgument("The width of Input(X)'s dimension should be "
"greater than pad_right"
" in reflect mode"
", but received depth(%d) and pad_right(%d).",
in_width,
pads[1]));
} else if (mode == "circular" || mode == "replicate") {
PADDLE_ENFORCE_NE(in_depth * in_height * in_width,
0,
errors::InvalidArgument(
"The input tensor size can not be 0 for circular "
"or replicate padding mode."));
}
const int pad_left = pads[0];
const int pad_top = pads[2];
const int pad_front = pads[4];
const int num = in_dims[0];
if (data_format == "NCDHW") {
std::map<std::string,
void (*)(const T*,
T*,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const T)>
func_map;
func_map["reflect"] = ReflectPad3DFuncNCDHW;
func_map["replicate"] = ReplicatePad3DFuncNCDHW;
func_map["circular"] = CircularPad3DFuncNCDHW;
func_map["constant"] = ConstPad3DFuncNCDHW;
Pad3DNCDHW(in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
value,
out_data,
func_map[mode]);
} else {
std::map<std::string,
void (*)(const T*,
T*,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const int,
const T)>
func_map;
func_map["reflect"] = ReflectPad3DFuncNDHWC;
func_map["replicate"] = ReplicatePad3DFuncNDHWC;
func_map["circular"] = CircularPad3DFuncNDHWC;
func_map["constant"] = ConstPad3DFuncNDHWC;
Pad3DNDHWC(in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
value,
out_data,
func_map[mode]);
}
}
} // namespace phi
PD_REGISTER_KERNEL(
pad3d, CPU, ALL_LAYOUT, phi::Pad3dKernel, float, double, int, int64_t) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/pad3d_grad_kernel.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/math_function.h"
namespace phi {
using paddle::platform::PADDLE_CUDA_NUM_THREADS;
template <typename T>
__global__ void Pad3DGradConstNCDHW(const int in_size,
T* d_in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const T* d_out_data) {
CUDA_KERNEL_LOOP(in_index, in_size) {
const int in_w = in_index % in_width;
int nc = in_index / in_width;
const int in_h = nc % in_height;
nc /= in_height;
const int in_d = nc % in_depth;
nc /= in_depth;
const int out_d = in_d + pad_front;
const int out_h = in_h + pad_top;
const int out_w = in_w + pad_left;
d_in_data[in_index] =
d_out_data[nc * out_depth * out_height * out_width +
out_d * out_height * out_width + out_h * out_width + out_w];
}
}
template <typename T>
__global__ void Pad3DGradConstNDHWC(const int in_size,
T* d_in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const T* d_out_data) {
CUDA_KERNEL_LOOP(in_index, in_size) {
const int c = in_index % channels;
int n = in_index / channels;
const int in_w = n % in_width;
n /= in_width;
const int in_h = n % in_height;
n /= in_height;
const int in_d = n % in_depth;
n /= in_depth;
const int out_d = in_d + pad_front;
const int out_h = in_h + pad_top;
const int out_w = in_w + pad_left;
d_in_data[in_index] =
d_out_data[n * out_depth * out_height * out_width * channels +
out_d * out_height * out_width * channels +
out_h * out_width * channels + out_w * channels + c];
}
}
template <typename T>
__global__ void Pad3DGradReflectNCDHW(const int out_size,
T* d_in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const T* d_out_data) {
CUDA_KERNEL_LOOP(out_index, out_size) {
int nc = out_index / out_width;
const int out_w = out_index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = max(in_d, -in_d);
in_h = max(in_h, -in_h);
in_w = max(in_w, -in_w);
in_d = min(in_d, 2 * in_depth - in_d - 2);
in_h = min(in_h, 2 * in_height - in_h - 2);
in_w = min(in_w, 2 * in_width - in_w - 2);
paddle::platform::CudaAtomicAdd(
&d_in_data[nc * in_depth * in_height * in_width +
in_d * in_height * in_width + in_h * in_width + in_w],
d_out_data[out_index]);
}
}
template <typename T>
__global__ void Pad3DGradReflectNDHWC(const int out_size,
T* d_in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const T* d_out_data) {
CUDA_KERNEL_LOOP(out_index, out_size) {
const int c = out_index % channels;
int n = out_index / channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = max(in_d, -in_d);
in_h = max(in_h, -in_h);
in_w = max(in_w, -in_w);
in_d = min(in_d, in_depth * 2 - in_d - 2);
in_h = min(in_h, in_height * 2 - in_h - 2);
in_w = min(in_w, in_width * 2 - in_w - 2);
paddle::platform::CudaAtomicAdd(
&d_in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c],
d_out_data[out_index]);
}
}
template <typename T>
__global__ void Pad3DGradReplicateNCDHW(const int out_size,
T* d_in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const T* d_out_data) {
CUDA_KERNEL_LOOP(out_index, out_size) {
int nc = out_index / out_width;
const int out_w = out_index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
const int in_d = min(in_depth - 1, max(out_d - pad_front, 0));
const int in_h = min(in_height - 1, max(out_h - pad_top, 0));
const int in_w = min(in_width - 1, max(out_w - pad_left, 0));
paddle::platform::CudaAtomicAdd(
&d_in_data[nc * in_depth * in_height * in_width +
in_d * in_height * in_width + in_h * in_width + in_w],
d_out_data[out_index]);
}
}
template <typename T>
__global__ void Pad3DGradReplicateNDHWC(const int out_size,
T* d_in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const T* d_out_data) {
CUDA_KERNEL_LOOP(out_index, out_size) {
const int c = out_index % channels;
int n = out_index / channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
const int in_d = min(in_depth - 1, max(out_d - pad_front, 0));
const int in_h = min(in_height - 1, max(out_h - pad_top, 0));
const int in_w = min(in_width - 1, max(out_w - pad_left, 0));
paddle::platform::CudaAtomicAdd(
&d_in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c],
d_out_data[out_index]);
}
}
template <typename T>
__global__ void Pad3DGradCircularNCDHW(const int out_size,
T* d_in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const T* d_out_data) {
CUDA_KERNEL_LOOP(out_index, out_size) {
int nc = out_index / out_width;
const int out_w = out_index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
paddle::platform::CudaAtomicAdd(
&d_in_data[nc * in_depth * in_height * in_width +
in_d * in_height * in_width + in_h * in_width + in_w],
d_out_data[out_index]);
}
}
template <typename T>
__global__ void Pad3DGradCircularNDHWC(const int out_size,
T* d_in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
const T* d_out_data) {
CUDA_KERNEL_LOOP(out_index, out_size) {
const int c = out_index % channels;
int n = out_index / channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
paddle::platform::CudaAtomicAdd(
&d_in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c],
d_out_data[out_index]);
}
}
template <typename T, typename Context>
void Pad3dGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const ScalarArray& paddings,
const std::string& mode,
float pad_value,
const std::string& data_format,
DenseTensor* x_grad) {
std::vector<int64_t> pads = paddings.GetData();
auto* d_out = &out_grad;
auto* d_in = x_grad;
auto d_in_dims = d_in->dims();
auto d_out_dims = d_out->dims();
const T* d_out_data = d_out->data<T>();
T* d_in_data = dev_ctx.template Alloc<T>(d_in);
phi::funcs::SetConstant<Context, T>()(dev_ctx, d_in, static_cast<T>(0));
const int pad_left = pads[0];
const int pad_top = pads[2];
const int pad_front = pads[4];
const int num = d_in_dims[0];
auto stream = dev_ctx.stream();
int block = PADDLE_CUDA_NUM_THREADS;
const int out_size = d_out->numel();
const int in_size = d_in->numel();
int grid = (out_size + block - 1) / block;
if (data_format == "NCDHW") {
const int channels = d_in_dims[1];
const int in_depth = d_in_dims[2];
const int in_height = d_in_dims[3];
const int in_width = d_in_dims[4];
const int out_depth = d_out_dims[2];
const int out_height = d_out_dims[3];
const int out_width = d_out_dims[4];
if (mode == "reflect") {
Pad3DGradReflectNCDHW<T><<<grid, block, 0, stream>>>(out_size,
d_in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
d_out_data);
} else if (mode == "replicate") {
Pad3DGradReplicateNCDHW<T><<<grid, block, 0, stream>>>(out_size,
d_in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
d_out_data);
} else if (mode == "circular") {
Pad3DGradCircularNCDHW<T><<<grid, block, 0, stream>>>(out_size,
d_in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
d_out_data);
} else {
grid = (in_size + block - 1) / block;
Pad3DGradConstNCDHW<T><<<grid, block, 0, stream>>>(in_size,
d_in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
d_out_data);
}
} else {
const int channels = d_in_dims[4];
const int in_depth = d_in_dims[1];
const int in_height = d_in_dims[2];
const int in_width = d_in_dims[3];
const int out_depth = d_out_dims[1];
const int out_height = d_out_dims[2];
const int out_width = d_out_dims[3];
if (mode == "reflect") {
Pad3DGradReflectNDHWC<T><<<grid, block, 0, stream>>>(out_size,
d_in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
d_out_data);
} else if (mode == "replicate") {
Pad3DGradReplicateNDHWC<T><<<grid, block, 0, stream>>>(out_size,
d_in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
d_out_data);
} else if (mode == "circular") {
Pad3DGradCircularNDHWC<T><<<grid, block, 0, stream>>>(out_size,
d_in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
d_out_data);
} else {
grid = (in_size + block - 1) / block;
Pad3DGradConstNDHWC<T><<<grid, block, 0, stream>>>(in_size,
d_in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
d_out_data);
}
}
}
} // namespace phi
PD_REGISTER_KERNEL(
pad3d_grad, GPU, ALL_LAYOUT, phi::Pad3dGradKernel, float, double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/pad3d_kernel.h"
#include <algorithm>
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/device/gpu/gpu_primitives.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
namespace phi {
using paddle::platform::PADDLE_CUDA_NUM_THREADS;
template <typename T>
__global__ void Pad3DConstNCDHW(const int nthreads,
const T* in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
T value,
T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int nc = index / out_width;
const int out_w = index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
out_data[index] =
(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth ||
in_h >= in_height || in_w >= in_width)
? value
: in_data[nc * in_depth * in_height * in_width +
in_d * in_height * in_width + in_h * in_width + in_w];
}
}
template <typename T>
__global__ void Pad3DConstNDHWC(const int nthreads,
const T* in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
T value,
T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int n = index / channels;
const int c = index % channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
const int in_d = out_d - pad_front;
const int in_h = out_h - pad_top;
const int in_w = out_w - pad_left;
out_data[index] =
(in_d < 0 || in_h < 0 || in_w < 0 || in_d >= in_depth ||
in_h >= in_height || in_w >= in_width)
? value
: in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c];
}
}
template <typename T>
__global__ void Pad3DReflectNCDHW(const int nthreads,
const T* in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int nc = index / out_width;
const int out_w = index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = max(in_d, -in_d); // reflect by 0
in_d = min(in_d, 2 * in_depth - in_d - 2); // reflect by in_depth
in_h = max(in_h, -in_h); // reflect by 0
in_h = min(in_h, 2 * in_height - in_h - 2); // reflect by in_height
in_w = max(in_w, -in_w); // reflect by 0
in_w = min(in_w, 2 * in_width - in_w - 2); // reflect by in_width
out_data[index] =
in_data[(nc * in_depth * in_height + in_d * in_height + in_h) *
in_width +
in_w];
}
}
template <typename T>
__global__ void Pad3DReflectNDHWC(const int nthreads,
const T* in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int n = index / channels;
const int c = index % channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
int in_d = out_d - pad_front;
int in_h = out_h - pad_top;
int in_w = out_w - pad_left;
in_d = max(in_d, -in_d);
in_d = min(in_d, 2 * in_depth - in_d - 2);
in_h = max(in_h, -in_h);
in_h = min(in_h, 2 * in_height - in_h - 2);
in_w = max(in_w, -in_w);
in_w = min(in_w, 2 * in_width - in_w - 2);
out_data[index] = in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c];
}
}
template <typename T>
__global__ void Pad3DReplicateNCDHW(const int nthreads,
const T* in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int nc = index / out_width;
const int out_w = index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
int in_d = min(in_depth - 1, max(out_d - pad_front, 0));
int in_h = min(in_height - 1, max(out_h - pad_top, 0));
int in_w = min(in_width - 1, max(out_w - pad_left, 0));
out_data[index] =
in_data[(nc * in_depth * in_height + in_d * in_height + in_h) *
in_width +
in_w];
}
}
template <typename T>
__global__ void Pad3DReplicateNDHWC(const int nthreads,
const T* in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int n = index / channels;
const int c = index % channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
int in_d = min(in_depth - 1, max(out_d - pad_front, 0));
int in_h = min(in_height - 1, max(out_h - pad_top, 0));
int in_w = min(in_width - 1, max(out_w - pad_left, 0));
out_data[index] = in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c];
}
}
template <typename T>
__global__ void Pad3DCircularNCDHW(const int nthreads,
const T* in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int nc = index / out_width;
const int out_w = index % out_width;
const int out_h = nc % out_height;
nc /= out_height;
const int out_d = nc % out_depth;
nc /= out_depth;
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
out_data[index] =
in_data[(nc * in_depth * in_height + in_d * in_height + in_h) *
in_width +
in_w];
}
}
template <typename T>
__global__ void Pad3DCircularNDHWC(const int nthreads,
const T* in_data,
const int num,
const int channels,
const int in_depth,
const int in_height,
const int in_width,
const int out_depth,
const int out_height,
const int out_width,
const int pad_front,
const int pad_top,
const int pad_left,
T* out_data) {
CUDA_KERNEL_LOOP(index, nthreads) {
int n = index / channels;
const int c = index % channels;
const int out_w = n % out_width;
n /= out_width;
const int out_h = n % out_height;
n /= out_height;
const int out_d = n % out_depth;
n /= out_depth;
int in_d = ((out_d - pad_front) % in_depth + in_depth) % in_depth;
int in_h = ((out_h - pad_top) % in_height + in_height) % in_height;
int in_w = ((out_w - pad_left) % in_width + in_width) % in_width;
out_data[index] = in_data[n * in_depth * in_height * in_width * channels +
in_d * in_height * in_width * channels +
in_h * in_width * channels + in_w * channels + c];
}
}
template <typename T, typename Context>
void Pad3dKernel(const Context& dev_ctx,
const DenseTensor& x,
const ScalarArray& paddings,
const std::string& mode,
float pad_value,
const std::string& data_format,
DenseTensor* out) {
std::vector<int64_t> pads = paddings.GetData();
auto in_dims = x.dims();
const T* in_data = x.data<T>();
auto out_dims = out->dims();
T value = static_cast<T>(pad_value);
if (data_format == "NCDHW") {
out_dims[0] = in_dims[0];
out_dims[1] = in_dims[1];
out_dims[2] = in_dims[2] + pads[4] + pads[5];
out_dims[3] = in_dims[3] + pads[2] + pads[3];
out_dims[4] = in_dims[4] + pads[0] + pads[1];
} else {
out_dims[0] = in_dims[0];
out_dims[1] = in_dims[1] + pads[4] + pads[5];
out_dims[2] = in_dims[2] + pads[2] + pads[3];
out_dims[3] = in_dims[3] + pads[0] + pads[1];
out_dims[4] = in_dims[4];
}
out->Resize(out_dims);
T* out_data = dev_ctx.template Alloc<T>(out);
int channels = in_dims[1];
int in_depth = in_dims[2];
int in_height = in_dims[3];
int in_width = in_dims[4];
int out_depth = out_dims[2];
int out_height = out_dims[3];
int out_width = out_dims[4];
if (data_format == "NDHWC") {
channels = in_dims[4];
in_depth = in_dims[1];
in_height = in_dims[2];
in_width = in_dims[3];
out_depth = out_dims[1];
out_height = out_dims[2];
out_width = out_dims[3];
}
if (mode == "reflect") {
PADDLE_ENFORCE_GT(
in_depth,
pads[4],
errors::InvalidArgument("The depth of Input(X)'s dimension should be "
"greater than pad_front"
" in reflect mode"
", but received depth(%d) and pad_front(%d).",
in_depth,
pads[4]));
PADDLE_ENFORCE_GT(
in_depth,
pads[5],
errors::InvalidArgument("The depth of Input(X)'s dimension should be "
"greater than pad_back"
" in reflect mode"
", but received depth(%d) and pad_back(%d).",
in_depth,
pads[5]));
PADDLE_ENFORCE_GT(
in_height,
pads[2],
errors::InvalidArgument("The height of Input(X)'s dimension should be "
"greater than pad_top"
" in reflect mode"
", but received depth(%d) and pad_top(%d).",
in_height,
pads[2]));
PADDLE_ENFORCE_GT(
in_height,
pads[3],
errors::InvalidArgument("The height of Input(X)'s dimension should be "
"greater than pad_bottom"
" in reflect mode"
", but received depth(%d) and pad_bottom(%d).",
in_height,
pads[3]));
PADDLE_ENFORCE_GT(
in_width,
pads[0],
errors::InvalidArgument("The width of Input(X)'s dimension should be "
"greater than pad_left"
" in reflect mode"
", but received depth(%d) and pad_left(%d).",
in_width,
pads[0]));
PADDLE_ENFORCE_GT(
in_width,
pads[1],
errors::InvalidArgument("The width of Input(X)'s dimension should be "
"greater than pad_right"
" in reflect mode"
", but received depth(%d) and pad_right(%d).",
in_width,
pads[1]));
} else if (mode == "circular" || mode == "replicate") {
PADDLE_ENFORCE_NE(in_depth * in_height * in_width,
0,
errors::InvalidArgument(
"The input tensor size can not be 0 for circular "
"or replicate padding mode."));
}
const int pad_left = pads[0];
const int pad_top = pads[2];
const int pad_front = pads[4];
const int num = in_dims[0];
auto stream = dev_ctx.stream();
int block = PADDLE_CUDA_NUM_THREADS;
const int out_size = out->numel();
int grid = (out_size + block - 1) / block;
if (data_format == "NCDHW") {
if (mode == "reflect") {
Pad3DReflectNCDHW<T><<<grid, block, 0, stream>>>(out_size,
in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
out_data);
} else if (mode == "replicate") {
Pad3DReplicateNCDHW<T><<<grid, block, 0, stream>>>(out_size,
in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
out_data);
} else if (mode == "circular") {
Pad3DCircularNCDHW<T><<<grid, block, 0, stream>>>(out_size,
in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
out_data);
} else {
Pad3DConstNCDHW<T><<<grid, block, 0, stream>>>(out_size,
in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
value,
out_data);
}
} else {
if (mode == "reflect") {
Pad3DReflectNDHWC<T><<<grid, block, 0, stream>>>(out_size,
in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
out_data);
} else if (mode == "replicate") {
Pad3DReplicateNDHWC<T><<<grid, block, 0, stream>>>(out_size,
in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
out_data);
} else if (mode == "circular") {
Pad3DCircularNDHWC<T><<<grid, block, 0, stream>>>(out_size,
in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
out_data);
} else {
Pad3DConstNDHWC<T><<<grid, block, 0, stream>>>(out_size,
in_data,
num,
channels,
in_depth,
in_height,
in_width,
out_depth,
out_height,
out_width,
pad_front,
pad_top,
pad_left,
value,
out_data);
}
}
}
} // namespace phi
PD_REGISTER_KERNEL(pad3d,
GPU,
ALL_LAYOUT,
phi::Pad3dKernel,
phi::dtype::float16,
float,
double,
int,
int64_t) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/common/scalar_array.h"
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void Pad3dGradKernel(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& out_grad,
const ScalarArray& paddings,
const std::string& mode,
float pad_value,
const std::string& data_format,
DenseTensor* x_grad);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/common/scalar_array.h"
#include "paddle/phi/core/dense_tensor.h"
namespace phi {
template <typename T, typename Context>
void Pad3dKernel(const Context& dev_ctx,
const DenseTensor& x,
const ScalarArray& paddings,
const std::string& mode,
float pad_value,
const std::string& data_format,
DenseTensor* out);
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/core/compat/op_utils.h"
namespace phi {
KernelSignature Pad3dOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.HasInput("Paddings")) {
return KernelSignature(
"pad3d", {"X"}, {"Paddings", "mode", "value", "data_format"}, {"Out"});
}
return KernelSignature(
"pad3d", {"X"}, {"paddings", "mode", "value", "data_format"}, {"Out"});
}
KernelSignature Pad3dGradOpArgumentMapping(const ArgumentMappingContext& ctx) {
if (ctx.HasInput("Paddings")) {
return KernelSignature("pad3d_grad",
{"X", GradVarName("Out")},
{"Paddings", "mode", "value", "data_format"},
{GradVarName("X")});
}
return KernelSignature("pad3d_grad",
{"X", GradVarName("Out")},
{"paddings", "mode", "value", "data_format"},
{GradVarName("X")});
}
} // namespace phi
PD_REGISTER_ARG_MAPPING_FN(pad3d, phi::Pad3dOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(pad3d_grad, phi::Pad3dGradOpArgumentMapping);
......@@ -612,7 +612,7 @@ def grad(outputs,
if no_grad_vars is None:
no_grad_vars = []
elif isinstance(no_grad_vars, core.VarBase):
elif isinstance(no_grad_vars, (core.VarBase, core.eager.Tensor)):
no_grad_vars = [no_grad_vars]
elif isinstance(no_grad_vars, core.eager.Tensor):
no_grad_vars = [no_grad_vars]
......@@ -718,13 +718,13 @@ def to_variable(value, name=None, zero_copy=None, dtype=None):
y.shape # [3L, 2L]
"""
support_type = (list, tuple, np.ndarray, core.VarBase, framework.Variable,
core.Tensor, core.LoDTensor)
support_type = (list, tuple, np.ndarray, core.eager.Tensor, core.VarBase,
framework.Variable, core.Tensor, core.LoDTensor)
if not isinstance(value, support_type):
raise TypeError(
"The type of 'value' in fluid.dygraph.to_variable must be %s, but received %s."
% (support_type, type(value)))
if isinstance(value, (core.VarBase, framework.Variable)):
if isinstance(value, (core.eager.Tensor, core.VarBase, framework.Variable)):
return value
elif isinstance(value, (core.Tensor, core.LoDTensor)):
return core.VarBase(value)
......
......@@ -28,6 +28,7 @@ from .math_op_patch import monkey_patch_math_varbase
from .parallel import scale_loss
from paddle.fluid.data_feeder import convert_dtype, _PADDLE_DTYPE_2_NUMPY_DTYPE
import paddle.utils.deprecated as deprecated
from paddle import _C_ops
class TensorHookRemoveHelper(object):
......@@ -782,7 +783,7 @@ def monkey_patch_varbase():
@framework.dygraph_only
def clone(self):
return _C_ops_.assign(self)
return _C_ops.assign(self)
@framework.dygraph_only
def value(self):
......
......@@ -316,7 +316,8 @@ def _dygraph_not_support_(func):
def _dygraph_only_(func):
def __impl__(*args, **kwargs):
assert in_dygraph_mode(
assert (
in_dygraph_mode() or _in_eager_mode()
), "We only support '%s()' in dynamic graph mode, please call 'paddle.disable_static()' to enter dynamic graph mode." % func.__name__
return func(*args, **kwargs)
......
......@@ -886,6 +886,7 @@ class TestDistributeFpnProposals(LayerTest):
refer_level=4,
refer_scale=224,
rois_num=rois_num_dy)
print(type(multi_rois_dy))
output_dy = multi_rois_dy + [restore_ind_dy] + rois_num_per_level_dy
output_dy_np = []
for output in output_dy:
......@@ -973,4 +974,5 @@ class TestBoxDecoderAndAssign(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -50,3 +50,7 @@ class TestExponentialFamilyException(unittest.TestCase):
def test_entropy_exception(self):
with self.assertRaises(NotImplementedError):
paddle.distribution.ExponentialFamily.entropy(self.dist)
if __name__ == '__main__':
unittest.main()
......@@ -112,3 +112,7 @@ class TestKLExpfamilyExpFamily(unittest.TestCase):
kl._kl_expfamily_expfamily(self.p, self.q),
rtol=config.RTOL.get(config.DEFAULT_DTYPE),
atol=config.ATOL.get(config.DEFAULT_DTYPE))
if __name__ == '__main__':
unittest.main()
......@@ -20,6 +20,7 @@ import sys
sys.path.append("../")
from op_test import OpTest
import paddle
from paddle import fluid
......@@ -115,4 +116,5 @@ class TestSequenceConcatOpError(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -39,6 +39,7 @@ class TensorTypeTest(unittest.TestCase):
tensorx = paddle.tensor.logic.Tensor(inx)
typex_str = str(type(tensorx))
expectx = "<class 'paddle.Tensor'>"
self.assertEqual((typex_str == expectx), True)
......
......@@ -1202,4 +1202,5 @@ class TestMultiTensorAdam(unittest.TestCase):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
......@@ -451,4 +451,5 @@ class TestLayerTo(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -18,6 +18,7 @@ import numpy as np
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
from paddle.fluid import ParamAttr, initializer
import paddle
class TestCreateParameterError(unittest.TestCase):
......@@ -50,4 +51,5 @@ class TestCreateParameterError(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -20,6 +20,7 @@ import numpy as np
from op_test import OpTest
from test_softmax_op import stable_softmax
import paddle.fluid as fluid
import paddle
def CTCAlign(input, lod, blank, merge_repeated, padding=0, input_length=None):
......@@ -229,4 +230,5 @@ class BadInputTestCTCAlignr(unittest.TestCase):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
......@@ -211,4 +211,5 @@ class TestDiffOpPreAppendAxis(TestDiffOp):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -190,4 +190,5 @@ class TestDygraphRemoveWeightNorm(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -209,4 +209,5 @@ class TestExponentialAPI(unittest.TestCase):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
......@@ -189,3 +189,8 @@ class TestElementwiseFmin2Op(OpTest):
"""test_check_grad_ingore_y"""
self.check_grad(
['X'], 'Out', max_relative_error=0.005, no_grad_set=set('Y'))
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
......@@ -1025,4 +1025,5 @@ class TestDiracInitializer3(TestDiracInitializer1):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -163,4 +163,5 @@ class TestMultiplyError(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -88,4 +88,5 @@ class TestWhenTrainWithNoGrad(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -210,6 +210,9 @@ class TestIscloseOpFloat64(TestIscloseOp):
self.atol = np.array([0]).astype("float64")
self.equal_nan = False
def test_check_output(self):
self.check_output()
class TestIscloseOpLargeDimInput(TestIscloseOp):
def set_args(self):
......@@ -222,4 +225,5 @@ class TestIscloseOpLargeDimInput(TestIscloseOp):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
......@@ -175,4 +175,5 @@ class TestNNFunctionalLogSoftmaxAPI(unittest.TestCase):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
......@@ -555,4 +555,5 @@ class TestLRScheduler(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -19,6 +19,7 @@ import unittest
import numpy as np
from op_test import OpTest
import paddle.fluid as fluid
import paddle
def compute_mean_iou(predictions, labels, num_classes, in_wrongs, in_corrects,
......@@ -129,4 +130,5 @@ class TestMeanIOUOpError(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -17,6 +17,7 @@ from __future__ import print_function
import unittest
import numpy as np
from op_test import OpTest
import paddle
class TestMinusOp(OpTest):
......@@ -36,4 +37,5 @@ class TestMinusOp(OpTest):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
......@@ -588,4 +588,5 @@ class API_NormTest(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -315,7 +315,9 @@ class TestSaveLoadAny(unittest.TestCase):
paddle.save(tensor, path)
t_dygraph = paddle.load(path)
np_dygraph = paddle.load(path, return_numpy=True)
self.assertTrue(isinstance(t_dygraph, paddle.fluid.core.VarBase))
self.assertTrue(
isinstance(t_dygraph, (paddle.fluid.core.VarBase,
paddle.fluid.core.eager.Tensor)))
self.assertTrue(np.array_equal(tensor.numpy(), np_dygraph))
self.assertTrue(np.array_equal(tensor.numpy(), t_dygraph.numpy()))
paddle.enable_static()
......@@ -685,27 +687,34 @@ class TestSaveLoadAny(unittest.TestCase):
np.array(v), np.array(load_tensor2['k2'][k])))
self.assertTrue(load_tensor2['epoch'] == 123)
self.assertTrue(isinstance(load_tensor3[0], fluid.core.VarBase))
self.assertTrue(
isinstance(load_tensor3[0], (fluid.core.VarBase,
fluid.core.eager.Tensor)))
self.assertTrue(np.array_equal(load_tensor3[0].numpy(), obj3[0]))
self.assertTrue(isinstance(load_tensor3[1], fluid.core.VarBase))
self.assertTrue(
isinstance(load_tensor3[1], (fluid.core.VarBase,
fluid.core.eager.Tensor)))
self.assertTrue(np.array_equal(load_tensor3[1].numpy(), obj3[1]))
for k, v in state_dict.items():
self.assertTrue(
isinstance(load_tensor3[2]["state_dict"][k],
fluid.core.VarBase))
isinstance(load_tensor3[2]["state_dict"][k], (
fluid.core.VarBase, fluid.core.eager.Tensor)))
self.assertTrue(
np.array_equal(load_tensor3[2]["state_dict"][k].numpy(),
np.array(v)))
for k, v in state_dict.items():
self.assertTrue(
isinstance(load_tensor3[2]["opt"][k], fluid.core.VarBase))
isinstance(load_tensor3[2]["opt"][k], (
fluid.core.VarBase, fluid.core.eager.Tensor)))
self.assertTrue(
np.array_equal(load_tensor3[2]["opt"][k].numpy(),
np.array(v)))
self.assertTrue(isinstance(load_tensor4[0], fluid.core.VarBase))
self.assertTrue(
isinstance(load_tensor4[0], (fluid.core.VarBase,
fluid.core.eager.Tensor)))
self.assertTrue(np.array_equal(load_tensor4[0].numpy(), obj4[0]))
load_array1 = paddle.load(path1, return_numpy=True)
......
......@@ -54,7 +54,7 @@ class TestRenormAPI(unittest.TestCase):
def test_dygraph_api(self):
self.input_data()
# case axis none
with fluid.dygraph.guard():
with fluid.dygraph.guard(fluid.CPUPlace()):
input = [[[2.0, 2, -2], [3, 0.3, 3]], [[2, -8, 2], [3.1, 3.7, 3]]]
x = paddle.to_tensor(input, stop_gradient=False)
y = paddle.renorm(x, 1.0, 2, 2.05)
......@@ -94,4 +94,5 @@ class TestRenormAPI(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -23,6 +23,7 @@ from test_multiclass_nms_op import iou
from test_multiclass_nms_op import nms
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
import paddle
def multiclass_nms(prediction, class_num, keep_top_k, nms_threshold):
......@@ -518,4 +519,5 @@ class TestRetinanetDetectionOutOpError(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -178,4 +178,5 @@ class SmoothL1Loss(unittest.TestCase):
if __name__ == "__main__":
paddle.enable_static()
unittest.main()
......@@ -22,7 +22,7 @@ import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard
# Situation 1: repeat_times is a list (without tensor)
#Situation 1: repeat_times is a list (without tensor)
class TestTileOpRank1(OpTest):
def setUp(self):
self.op_type = "tile"
......
......@@ -1361,4 +1361,5 @@ class TestVarBaseCopyGradientFrom(unittest.TestCase):
if __name__ == '__main__':
paddle.enable_static()
unittest.main()
......@@ -68,8 +68,9 @@ def to_list(value):
def to_numpy(var):
assert isinstance(var, (Variable, fluid.core.VarBase)), "not a variable"
if isinstance(var, fluid.core.VarBase):
assert isinstance(var, (Variable, fluid.core.VarBase,
fluid.core.eager.Tensor)), "not a variable"
if isinstance(var, (fluid.core.VarBase, fluid.core.eager.Tensor)):
return var.numpy()
t = global_scope().find_var(var.name).get_tensor()
return np.array(t)
......
......@@ -282,7 +282,7 @@ class Accuracy(Metric):
Return:
Tensor: the accuracy of current step.
"""
if isinstance(correct, paddle.Tensor):
if isinstance(correct, (paddle.Tensor, paddle.fluid.core.eager.Tensor)):
correct = correct.numpy()
num_samples = np.prod(np.array(correct.shape[:-1]))
accs = []
......@@ -410,12 +410,12 @@ class Precision(Metric):
the shape should keep the same as preds.
The data type is 'int32' or 'int64'.
"""
if isinstance(preds, paddle.Tensor):
if isinstance(preds, (paddle.Tensor, paddle.fluid.core.eager.Tensor)):
preds = preds.numpy()
elif not _is_numpy_(preds):
raise ValueError("The 'preds' must be a numpy ndarray or Tensor.")
if isinstance(labels, paddle.Tensor):
if isinstance(labels, (paddle.Tensor, paddle.fluid.core.eager.Tensor)):
labels = labels.numpy()
elif not _is_numpy_(labels):
raise ValueError("The 'labels' must be a numpy ndarray or Tensor.")
......@@ -543,12 +543,12 @@ class Recall(Metric):
the shape should keep the same as preds.
Shape: [batch_size, 1], Dtype: 'int32' or 'int64'.
"""
if isinstance(preds, paddle.Tensor):
if isinstance(preds, (paddle.Tensor, paddle.fluid.core.eager.Tensor)):
preds = preds.numpy()
elif not _is_numpy_(preds):
raise ValueError("The 'preds' must be a numpy ndarray or Tensor.")
if isinstance(labels, paddle.Tensor):
if isinstance(labels, (paddle.Tensor, paddle.fluid.core.eager.Tensor)):
labels = labels.numpy()
elif not _is_numpy_(labels):
raise ValueError("The 'labels' must be a numpy ndarray or Tensor.")
......@@ -698,12 +698,12 @@ class Auc(Metric):
(batch_size, 1), labels[i] is either o or 1,
representing the label of the instance i.
"""
if isinstance(labels, paddle.Tensor):
if isinstance(labels, (paddle.Tensor, paddle.fluid.core.eager.Tensor)):
labels = labels.numpy()
elif not _is_numpy_(labels):
raise ValueError("The 'labels' must be a numpy ndarray or Tensor.")
if isinstance(preds, paddle.Tensor):
if isinstance(preds, (paddle.Tensor, paddle.fluid.core.eager.Tensor)):
preds = preds.numpy()
elif not _is_numpy_(preds):
raise ValueError("The 'preds' must be a numpy ndarray or Tensor.")
......
......@@ -462,7 +462,7 @@ def is_tensor(x):
print(check) #False
"""
return isinstance(x, Tensor)
return isinstance(x, (Tensor, paddle.fluid.core.eager.Tensor))
def _bitwise_op(op_name, x, y, out=None, name=None, binary_op=True):
......
......@@ -264,6 +264,9 @@ def fill_diagonal_tensor(x, y, offset=0, dim1=0, dim2=1, name=None):
setattr(core.VarBase, 'fill_diagonal_tensor', fill_diagonal_tensor)
if core._in_eager_mode():
setattr(core.eager.Tensor, 'fill_diagonal_tensor', fill_diagonal_tensor)
@dygraph_only
def tolist(x):
......@@ -890,12 +893,20 @@ def stack(x, axis=0, name=None):
x1 = paddle.to_tensor([[1.0, 2.0]])
x2 = paddle.to_tensor([[3.0, 4.0]])
x3 = paddle.to_tensor([[5.0, 6.0]])
out = paddle.stack([x1, x2, x3], axis=0)
print(out.shape) # [3, 1, 2]
print(out)
# [[[1., 2.]],
# [[3., 4.]],
# [[5., 6.]]]
out = paddle.stack([x1, x2, x3], axis=-2)
print(out.shape) # [1, 3, 2]
print(out)
# [[[1., 2.],
# [3., 4.],
# [5., 6.]]]
"""
return layers.stack(x, axis, name)
......
......@@ -1337,7 +1337,7 @@ def renorm(x, p, axis, max_norm):
raise ValueError("the axis:{} should not be less than -1 * length of input_shape:{}".format(axis,-1 * len(input_shape)))
axis = axis + len(input_shape)
if paddle.in_dynamic_mode():
out = core.ops.renorm(x, 'p',p, 'axis',axis, 'max_norm', max_norm)
out = _C_ops.renorm(x, 'p',p, 'axis',axis, 'max_norm', max_norm)
return out
inputs = {'X': x}
......
......@@ -327,12 +327,17 @@ class ToTensor(BaseTransform):
import paddle.vision.transforms as T
import paddle.vision.transforms.functional as F
fake_img = Image.fromarray((np.random.rand(224, 224, 3) * 255.).astype(np.uint8))
fake_img = Image.fromarray((np.random.rand(4, 5, 3) * 255.).astype(np.uint8))
transform = T.ToTensor()
tensor = transform(fake_img)
print(tensor.shape)
# [3, 4, 5]
print(tensor.dtype)
# paddle.float32
"""
def __init__(self, data_format='CHW', keys=None):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册