提交 fef6f6f9 编写于 作者: S seiriosPlus

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into optimize/large_scale_kv_spped

......@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h"
#include <cmath>
#include <functional>
#include <string>
#include <vector>
......@@ -74,12 +75,17 @@ void recompute_bias_and_weights(const Scope* scope, ir::Node* conv_weight,
auto* weights = scope->FindVar(conv_weight->Name())->GetMutable<LoDTensor>();
auto weights_shape = weights->dims();
auto weights_shape_2d = flatten_to_2d(weights_shape, 1);
auto* weights_data = weights->mutable_data<float>(platform::CPUPlace());
EigenMatrixArrayMap weights_array_2d(
weights->mutable_data<float>(platform::CPUPlace()), weights_shape_2d[0],
EigenMatrixArrayMap weights_array_2d(weights_data, weights_shape_2d[0],
weights_shape_2d[1]);
weights_array_2d.colwise() *= scale_array;
// Check for subnormal values that slows down convolution execution
for (int i = 0; i < weights->numel(); ++i) {
if (std::fpclassify(weights_data[i]) == FP_SUBNORMAL) weights_data[i] = 0;
}
}
void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const {
......@@ -108,13 +114,6 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const {
GET_CONV_BN_NODES(conv_ac_pattern);
// check if fuse can be done and if MKL-DNN should be used
FuseOptions fuse_option = FindFuseOption(*conv, *affine_channel);
if (fuse_option == DO_NOT_FUSE) {
VLOG(3) << "do not perform conv+affinechannel fuse";
return;
}
// Create eltwise_y (conv bias) variable
VarDesc eltwise_y_in_desc(
patterns::PDNodeName(name_scope_, "eltwise_y_in"));
......@@ -143,6 +142,7 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const {
desc.SetOutput("Out", std::vector<std::string>({ac_out->Name()}));
desc.SetType("elementwise_add");
desc.SetAttr("axis", 1);
desc.SetAttr("use_mkldnn", conv->Op()->GetAttrIfExists<bool>("use_mkldnn"));
auto eltwise_op = g->CreateOpNode(&desc); // OpDesc will be copied.
GraphSafeRemoveNodes(graph, {ac_scale, ac_bias, affine_channel});
......
......@@ -15,7 +15,6 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h"
......@@ -103,8 +102,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// params_file_ fields.
CP_MEMBER(opt_cache_dir_);
prog_file_ = std::move(other.prog_file_);
params_file_ = std::move(other.params_file_);
CP_MEMBER(prog_file_);
CP_MEMBER(params_file_);
CP_MEMBER(use_fc_padding_);
// GPU related.
......
......@@ -32,7 +32,6 @@
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/memory/memcpy.h"
......@@ -517,6 +516,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
// TODO(NHZlX): Should add the link to the doc of
// paddle_infer::CreatePredictor<paddle_infer::Config>
if (config.glog_info_disabled()) {
FLAGS_logtostderr = 1;
FLAGS_minloglevel = 2; // GLOG_ERROR
......@@ -1058,3 +1059,122 @@ USE_TRT_CONVERTER(skip_layernorm);
USE_TRT_CONVERTER(slice);
USE_TRT_CONVERTER(scale);
#endif
namespace paddle_infer {
void Tensor::Reshape(const std::vector<int> &shape) { tensor_->Reshape(shape); }
std::vector<int> Tensor::shape() const { return tensor_->shape(); }
void Tensor::SetLoD(const std::vector<std::vector<size_t>> &x) {
return tensor_->SetLoD(x);
}
std::vector<std::vector<size_t>> Tensor::lod() const { return tensor_->lod(); }
const std::string &Tensor::name() const { return tensor_->name(); }
DataType Tensor::type() const { return tensor_->type(); }
Predictor::Predictor(const Config &config) {
const_cast<Config *>(&config)->SwitchUseFeedFetchOps(false);
// The second parameter indicates that the discard log is not printed
predictor_ = paddle::CreatePaddlePredictor<
Config, paddle::PaddleEngineKind::kAnalysis>(config);
}
std::vector<std::string> Predictor::GetInputNames() {
return predictor_->GetInputNames();
}
std::unique_ptr<Tensor> Predictor::GetInputHandle(const std::string &name) {
auto zero_copy_tensor = predictor_->GetInputTensor(name);
std::unique_ptr<Tensor> tensor(new Tensor(std::move(zero_copy_tensor)));
return tensor;
}
std::vector<std::string> Predictor::GetOutputNames() {
return predictor_->GetOutputNames();
}
std::unique_ptr<Tensor> Predictor::GetOutputHandle(const std::string &name) {
auto zero_copy_tensor = predictor_->GetOutputTensor(name);
std::unique_ptr<Tensor> tensor(new Tensor(std::move(zero_copy_tensor)));
return tensor;
}
bool Predictor::Run() { return predictor_->ZeroCopyRun(); }
std::unique_ptr<Predictor> Predictor::Clone() {
auto analysis_pred = predictor_->Clone();
std::unique_ptr<Predictor> pred(new Predictor(std::move(analysis_pred)));
return pred;
}
void Predictor::ClearIntermediateTensor() {
predictor_->ClearIntermediateTensor();
}
int GetNumBytesOfDataType(DataType dtype) {
switch (dtype) {
case DataType::FLOAT32:
return sizeof(float);
case DataType::INT64:
return sizeof(int64_t);
case DataType::INT32:
return sizeof(int32_t);
case DataType::UINT8:
return sizeof(uint8_t);
default:
assert(false);
return -1;
}
}
std::string GetVersion() { return paddle::get_version(); }
std::string UpdateDllFlag(const char *name, const char *value) {
return paddle::UpdateDllFlag(name, value);
}
} // namespace paddle_infer
namespace paddle_infer {
std::shared_ptr<Predictor> CreatePredictor(const Config &config) { // NOLINT
std::shared_ptr<Predictor> predictor(new Predictor(config));
return predictor;
}
namespace services {
PredictorPool::PredictorPool(const Config &config, size_t size) {
PADDLE_ENFORCE_GE(
size, 1UL,
paddle::platform::errors::InvalidArgument(
"The predictor pool size should be greater than 1, but it's (%d)",
size));
Config copy_config(config);
main_pred_.reset(new Predictor(config));
for (size_t i = 0; i < size - 1; i++) {
if (config.tensorrt_engine_enabled()) {
Config config_tmp(copy_config);
preds_.push_back(
std::move(std::unique_ptr<Predictor>(new Predictor(config_tmp))));
} else {
preds_.push_back(std::move(main_pred_->Clone()));
}
}
}
Predictor *PredictorPool::Retrive(size_t idx) {
PADDLE_ENFORCE_LT(
idx, preds_.size() + 1,
paddle::platform::errors::InvalidArgument(
"There are (%d) predictors in the pool, but the idx is (%d)", idx,
preds_.size() + 1));
if (idx == 0) {
return main_pred_.get();
}
return preds_[idx - 1].get();
}
} // namespace services
} // namespace paddle_infer
......@@ -112,6 +112,12 @@ void PaddleBuf::Free() {
}
}
NativeConfig::NativeConfig() {
LOG(WARNING) << "The paddle::NativeConfig interface is going to be "
"deprecated in the next release, plase use the latest "
"paddle_infer::Config instead.";
}
std::string get_version() {
std::stringstream ss;
ss << "version: " << framework::paddle_version() << "\n";
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include <glog/logging.h>
#include <algorithm>
#include <map>
#include <memory>
#include <set>
#include <sstream>
#include <string>
......@@ -25,6 +26,7 @@ limitations under the License. */
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -311,6 +313,8 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
// TODO(NHZlX): Should add the link to the doc of
// paddle_infer::CreatePredictor<paddle_infer::Config>
VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) {
// 1. GPU memory
......
......@@ -347,6 +347,7 @@ class PD_INFER_DECL PaddlePredictor {
/// place of inference, etc.)
///
struct PD_INFER_DECL NativeConfig : public PaddlePredictor::Config {
NativeConfig();
/// GPU related fields.
bool use_gpu{false};
int device{0};
......@@ -421,7 +422,8 @@ enum class PaddleEngineKind {
};
template <typename ConfigT, PaddleEngineKind engine>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config);
PD_INFER_DECL std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(
const ConfigT& config);
template <>
PD_INFER_DECL std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
......@@ -437,6 +439,4 @@ PD_INFER_DECL std::string get_version();
PD_INFER_DECL std::string UpdateDllFlag(const char* name, const char* value);
PD_INFER_DECL std::shared_ptr<framework::Cipher> MakeCipher(
const std::string& config_file);
} // namespace paddle
......@@ -22,9 +22,124 @@ limitations under the License. */
#pragma once
#include <cassert>
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "paddle_analysis_config.h" // NOLINT
#include "paddle_api.h" // NOLINT
namespace paddle_infer {
using DataType = paddle::PaddleDType;
using PlaceType = paddle::PaddlePlace;
using PrecisionType = paddle::AnalysisConfig::Precision;
using Config = paddle::AnalysisConfig;
class PD_INFER_DECL Tensor {
public:
// Can only be created by predictor->GetInputHandle(cosnt std::string& name)
// or predictor->GetOutputHandle(cosnt std::string& name)
Tensor() = delete;
explicit Tensor(std::unique_ptr<paddle::ZeroCopyTensor>&& tensor)
: tensor_(std::move(tensor)) {}
void Reshape(const std::vector<int>& shape);
template <typename T>
void CopyFromCpu(const T* data);
// should add the place
template <typename T>
T* mutable_data(PlaceType place);
template <typename T>
void CopyToCpu(T* data);
template <typename T>
T* data(PlaceType* place, int* size) const;
void SetLoD(const std::vector<std::vector<size_t>>& x);
std::vector<std::vector<size_t>> lod() const;
DataType type() const;
std::vector<int> shape() const;
const std::string& name() const;
private:
std::unique_ptr<paddle::ZeroCopyTensor> tensor_;
};
class PD_INFER_DECL Predictor {
public:
Predictor() = default;
~Predictor() {}
// Use for clone
explicit Predictor(std::unique_ptr<paddle::PaddlePredictor>&& pred)
: predictor_(std::move(pred)) {}
explicit Predictor(const Config& config);
std::vector<std::string> GetInputNames();
std::unique_ptr<Tensor> GetInputHandle(const std::string& name);
bool Run();
std::vector<std::string> GetOutputNames();
std::unique_ptr<Tensor> GetOutputHandle(const std::string& name);
std::unique_ptr<Predictor> Clone();
void ClearIntermediateTensor();
private:
std::unique_ptr<paddle::PaddlePredictor> predictor_;
};
PD_INFER_DECL std::shared_ptr<Predictor> CreatePredictor(
const Config& config); // NOLINT
PD_INFER_DECL int GetNumBytesOfDataType(DataType dtype);
PD_INFER_DECL std::string GetVersion();
PD_INFER_DECL std::string UpdateDllFlag(const char* name, const char* value);
template <typename T>
void Tensor::CopyFromCpu(const T* data) {
tensor_->copy_from_cpu<T>(data);
}
template <typename T>
void Tensor::CopyToCpu(T* data) {
return tensor_->copy_to_cpu<T>(data);
}
template <typename T>
T* Tensor::mutable_data(PlaceType place) {
return tensor_->mutable_data<T>(place);
}
template <typename T>
T* Tensor::data(PlaceType* place, int* size) const {
return tensor_->data<T>(place, size);
}
} // namespace paddle_infer
namespace paddle_infer {
namespace services {
class PD_INFER_DECL PredictorPool {
public:
PredictorPool() = delete;
PredictorPool(const PredictorPool&) = delete;
PredictorPool& operator=(const PredictorPool&) = delete;
explicit PredictorPool(const Config& config, size_t size = 1);
Predictor* Retrive(size_t idx);
private:
std::shared_ptr<Predictor> main_pred_;
std::vector<std::unique_ptr<Predictor>> preds_;
};
} // namespace services
} // namespace paddle_infer
......@@ -188,6 +188,8 @@ void CpuPassStrategy::EnableMKLDNN() {
"depthwise_conv_mkldnn_pass", //
"conv_bn_fuse_pass", // Execute BN passes again to
"conv_eltwiseadd_bn_fuse_pass", // preserve correct pass order
"conv_affine_channel_fuse_pass", //
"conv_eltwiseadd_affine_channel_fuse_pass", //
"conv_transpose_bn_fuse_pass", //
"conv_transpose_eltwiseadd_bn_fuse_pass", //
"conv_bias_mkldnn_fuse_pass", //
......
......@@ -54,7 +54,7 @@ class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT {
auto ptr = new SkipLayerNormPluginDynamic(
bias_.data(), scale_.data(), bias_size_, scale_size_, eps_, ban_fp16_);
ptr->bias_gpu_ = bias_gpu_;
ptr->scale_gpu_ = bias_gpu_;
ptr->scale_gpu_ = scale_gpu_;
return ptr;
}
......
......@@ -515,3 +515,9 @@ if(WITH_MKLDNN)
inference_analysis_test(test_analyzer_capi_ner SRCS analyzer_capi_ner_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
ARGS --infer_model=${CHINESE_NER_INSTALL_DIR}/model)
if(WITH_GPU)
inference_analysis_test(paddle_infer_api_test SRCS paddle_infer_api_test.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
ARGS --infer_model=${RESNET50_MODEL_DIR})
endif()
......@@ -72,3 +72,59 @@ TEST(AnalysisPredictor, use_gpu) {
} // namespace inference
} // namespace paddle
namespace paddle_infer {
TEST(Predictor, use_gpu) {
std::string model_dir = FLAGS_infer_model + "/" + "model";
Config config;
config.EnableUseGpu(100, 0);
config.SetModel(model_dir + "/model", model_dir + "/params");
config.EnableLiteEngine(PrecisionType::kFloat32);
auto predictor = CreatePredictor(config);
const int batch = 1;
const int channel = 3;
const int height = 318;
const int width = 318;
const int input_num = batch * channel * height * width;
std::vector<float> input(input_num, 1);
auto input_names = predictor->GetInputNames();
auto input_t = predictor->GetInputHandle(input_names[0]);
input_t->Reshape({1, 3, 318, 318});
input_t->CopyFromCpu(input.data());
predictor->Run();
auto output_names = predictor->GetOutputNames();
auto output_t = predictor->GetOutputHandle(output_names[0]);
std::vector<int> output_shape = output_t->shape();
size_t out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int>());
std::vector<float> out_data;
out_data.resize(out_num);
output_t->CopyToCpu(out_data.data());
const std::vector<float> truth_values = {
127.780396f, 738.16656f, 1013.2264f, -438.17206f, 366.4022f,
927.66187f, 736.2241f, -633.68567f, -329.92737f, -430.15637f,
-633.0639f, -146.54858f, -1324.2804f, -1349.3661f, -242.67671f,
117.44864f, -801.7251f, -391.51495f, -404.8202f, 454.16132f,
515.48206f, -133.03114f, 69.293076f, 590.09753f, -1434.6917f,
-1070.8903f, 307.0744f, 400.52573f, -316.12177f, -587.1265f,
-161.05742f, 800.3663f, -96.47157f, 748.708f, 868.17645f,
-447.9403f, 112.73656f, 1127.1992f, 47.43518f, 677.7219f,
593.1881f, -336.4011f, 551.3634f, 397.82474f, 78.39835f,
-715.4006f, 405.96988f, 404.25684f, 246.01978f, -8.430191f,
131.36617f, -648.0528f};
float* data_o = out_data.data();
for (size_t j = 0; j < out_num; j += 10) {
EXPECT_NEAR((data_o[j] - truth_values[j / 10]) / truth_values[j / 10], 0.,
10e-5);
}
}
} // namespace paddle_infer
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cuda_runtime.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <cstring>
#include <numeric>
#include "paddle/fluid/inference/tests/api/trt_test_helper.h"
namespace paddle_infer {
TEST(Predictor, use_gpu) {
LOG(INFO) << GetVersion();
UpdateDllFlag("conv_workspace_size_limit", "4000");
std::string model_dir = FLAGS_infer_model + "/model";
Config config;
config.SetModel(model_dir + "/model", model_dir + "/params");
config.EnableUseGpu(100, 0);
auto predictor = CreatePredictor(config);
auto pred_clone = predictor->Clone();
std::vector<int> in_shape = {1, 3, 318, 318};
int in_num = std::accumulate(in_shape.begin(), in_shape.end(), 1,
[](int &a, int &b) { return a * b; });
std::vector<float> input(in_num, 0);
auto input_names = predictor->GetInputNames();
auto input_t = predictor->GetInputHandle(input_names[0]);
input_t->Reshape(in_shape);
input_t->CopyFromCpu(input.data());
predictor->Run();
auto output_names = predictor->GetOutputNames();
auto output_t = predictor->GetOutputHandle(output_names[0]);
std::vector<int> output_shape = output_t->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int>());
std::vector<float> out_data;
out_data.resize(out_num);
output_t->CopyToCpu(out_data.data());
predictor->ClearIntermediateTensor();
}
TEST(PredictorPool, basic) {
LOG(INFO) << GetVersion();
UpdateDllFlag("conv_workspace_size_limit", "4000");
std::string model_dir = FLAGS_infer_model + "/model";
Config config;
config.SetModel(model_dir + "/model", model_dir + "/params");
config.EnableUseGpu(100, 0);
services::PredictorPool pred_pool(config, 4);
auto pred = pred_pool.Retrive(2);
std::vector<int> in_shape = {1, 3, 318, 318};
int in_num = std::accumulate(in_shape.begin(), in_shape.end(), 1,
[](int &a, int &b) { return a * b; });
std::vector<float> input(in_num, 0);
auto in_names = pred->GetInputNames();
auto input_t = pred->GetInputHandle(in_names[0]);
input_t->name();
input_t->Reshape(in_shape);
input_t->CopyFromCpu(input.data());
pred->Run();
auto out_names = pred->GetOutputNames();
auto output_t = pred->GetOutputHandle(out_names[0]);
auto out_type = output_t->type();
LOG(INFO) << GetNumBytesOfDataType(out_type);
if (out_type == DataType::FLOAT32) {
PlaceType place;
int size;
output_t->data<float>(&place, &size);
}
}
} // namespace paddle_infer
......@@ -41,7 +41,7 @@ TEST(AnalysisPredictor, use_gpu) {
SetFakeImageInput(&inputs_all, model_dir, false, "__model__", "");
std::vector<PaddleTensor> outputs;
for (auto& input : inputs_all) {
for (auto &input : inputs_all) {
ASSERT_TRUE(predictor->Run(input, &outputs));
predictor->ClearIntermediateTensor();
}
......@@ -49,3 +49,27 @@ TEST(AnalysisPredictor, use_gpu) {
} // namespace inference
} // namespace paddle
namespace paddle_infer {
TEST(PredictorPool, use_gpu) {
std::string model_dir = FLAGS_infer_model + "/" + "mobilenet";
Config config;
config.EnableUseGpu(100, 0);
config.SetModel(model_dir);
config.EnableTensorRtEngine();
services::PredictorPool pred_pool(config, 1);
auto predictor = pred_pool.Retrive(0);
auto input_names = predictor->GetInputNames();
auto input_t = predictor->GetInputHandle(input_names[0]);
std::vector<int> in_shape = {1, 3, 224, 224};
int in_num = std::accumulate(in_shape.begin(), in_shape.end(), 1,
[](int &a, int &b) { return a * b; });
std::vector<float> input(in_num, 0);
input_t->Reshape(in_shape);
input_t->CopyFromCpu(input.data());
predictor->Run();
}
} // namespace paddle_infer
......@@ -20,6 +20,7 @@ limitations under the License. */
#include <unordered_map>
#include <vector>
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/common_infer_shape_functions.h"
#include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h"
#include "paddle/fluid/platform/port.h"
......@@ -1231,3 +1232,24 @@ REGISTER_OP_CPU_KERNEL(
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::AbsGradFunctor<int64_t>>);
/* ========================================================================== */
/* ========================== register checkpoint ===========================*/
REGISTER_OP_VERSION(leaky_relu)
.AddCheckpoint(
R"ROC(fix leaky_relu, bahavior changed when alpha < 0 or alpha > 1)ROC",
paddle::framework::compatible::OpVersionDesc()
.BugfixWithBehaviorChanged(
"leaky_relu calculate formula before checkponit: out = max(x, "
"alpha * x); after checkpoint: out = x if x > 0 else alpha * "
"x"));
REGISTER_OP_VERSION(hard_shrink)
.AddCheckpoint(
R"ROC(fix hard_shrink, bahavior changed when threshold<0)ROC",
paddle::framework::compatible::OpVersionDesc()
.BugfixWithBehaviorChanged(
"hard_shrink calculate formula before checkponit: out = x * "
"((x < -threshold) + (x > threshold)); after checkpoint: out = "
"x * (((x < -threshold) + (x > threshold)) > 0)"));
/* ========================================================================== */
......@@ -17,6 +17,7 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/cudnn_workspace_helper.h"
#ifdef PADDLE_WITH_MKLDNN
......@@ -567,3 +568,14 @@ REGISTER_OP_CPU_KERNEL(
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext,
double>);
REGISTER_OP_VERSION(conv_transpose)
.AddCheckpoint(
R"ROC(
Upgrade convtranspose add a new attribute [output_padding].
)ROC",
paddle::framework::compatible::OpVersionDesc().NewAttr(
"output_padding",
"In order to add additional size to one side of each dimension "
"in the output",
{}));
......@@ -56,7 +56,7 @@ endif()
cc_test(rpc_server_test SRCS rpc_server_test.cc
DEPS ${RPC_DEPS} executor scope proto_desc lookup_sparse_table_read_op)
DEPS ${RPC_DEPS} executor scope proto_desc lookup_sparse_table_read_op scale_op)
cc_test(varhandle_test SRCS varhandle_test.cc DEPS profiler scope)
cc_library(parameter_prefetch SRCS parameter_prefetch.cc DEPS sendrecvop_rpc memory)
cc_library(parameter_send SRCS parameter_send.cc DEPS sendrecvop_rpc memory)
......
......@@ -132,6 +132,15 @@ void ProcGetResponse(const VarHandle& var_h,
&trainer_id);
}
void ProcGetRecvResponse(const VarHandle& var_h,
const ::grpc::ByteBuffer& ret_msg) {
VLOG(4) << "ProcGetRecvResponse";
framework::Variable* outvar = nullptr;
int trainer_id;
DeserializeRecvFromByteBuffer(ret_msg, *var_h.ctx(), var_h.scope(), &outvar,
&trainer_id);
}
template <typename T>
void RequestToByteBuffer(const T& proto, ::grpc::ByteBuffer* result) {
::grpc::Slice slice(proto.ByteSizeLong());
......@@ -482,6 +491,79 @@ VarHandlePtr GRPCClient::AsyncDistributeNotify(
return h;
}
VarHandlePtr GRPCClient::AsyncSendAndRecv(const std::string& ep,
const platform::DeviceContext& ctx,
const framework::Scope& scope,
const std::string& send_var_name,
const std::string& recv_var_name,
const std::string& table_name,
int64_t time_out) {
const platform::DeviceContext* p_ctx = &ctx;
const std::string ep_val = ep;
const std::string send_var_name_val = send_var_name;
const std::string recv_var_name_val = recv_var_name;
const std::string table_name_val = table_name;
const framework::Scope* p_scope = &scope;
const auto ch = GetChannel(ep_val);
const std::string method = kSendAndRecvRPC;
VLOG(4) << "GRPCClient::SendAndRecv Begin ,Send_var_name: "
<< send_var_name_val << " Recv_var_name: " << recv_var_name_val;
int retry_times_ = 0;
while (true) {
SendAndRecvProcessor* s = new SendAndRecvProcessor(ch);
VarHandlePtr h(
new VarHandle(ep, method, send_var_name_val, p_ctx, p_scope));
VarHandlePtr h_recv(
new VarHandle(ep, method, recv_var_name_val, p_ctx, p_scope));
s->Prepare(h, time_out);
s->RecvPrepare(h_recv);
framework::AsyncIO([send_var_name_val, recv_var_name_val, table_name_val,
p_scope, p_ctx, s, method, h, this] {
auto* send_var = p_scope->FindVar(send_var_name_val);
send_var->GetMutable<framework::LoDTensor>()->set_lod({});
::grpc::ByteBuffer buf;
VLOG(4) << "SerializeToByteBuffer: send_var_name_val: "
<< send_var_name_val
<< " recv_var_name_val: " << recv_var_name_val;
SerializeToByteBuffer(send_var_name_val, send_var, *p_ctx, &buf,
recv_var_name_val, trainer_id_, table_name_val);
VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
// stub context
s->response_call_back_ = ProcGetRecvResponse;
platform::RecordRPCEvent record_event(method);
auto call = s->stub_g_.PrepareUnaryCall(
s->context_.get(), "/sendrecv.SendRecvService/SendAndRecvVariable",
buf, &cq_);
call->StartCall();
call->Finish(&s->reply_, &s->status_, reinterpret_cast<void*>(s));
if (UNLIKELY(platform::IsProfileEnabled())) {
h->Wait();
}
});
req_count_++;
if (FLAGS_rpc_retry_times > 0 && retry_times_ < FLAGS_rpc_retry_times) {
h->Wait();
if (h->should_retry) {
VLOG(3) << "rpc call failed, retry times " << retry_times_;
retry_times_++;
std::random_device rd;
std::this_thread::sleep_for(std::chrono::milliseconds(rd() % 5));
continue;
}
}
return h;
}
}
bool GRPCClient::Wait() {
std::unique_lock<std::mutex> lk(sync_mutex_);
sync_cond_.wait(lk, [this] { return (req_count_ == 0 || ok_ == false); });
......
......@@ -53,6 +53,8 @@ namespace distributed {
void ProcGetResponse(const VarHandle& var_h, const grpc::ByteBuffer& msg);
void ProcGetRecvResponse(const VarHandle& var_h, const grpc::ByteBuffer& msg);
class BaseProcessor {
public:
BaseProcessor() { context_ = nullptr; }
......@@ -131,6 +133,28 @@ class GetProcessor : public BaseProcessor {
RequestGetCallBack response_call_back_ = ProcGetResponse;
};
class SendAndRecvProcessor : public BaseProcessor {
public:
explicit SendAndRecvProcessor(std::shared_ptr<grpc::Channel> ch)
: BaseProcessor(), stub_g_(ch) {}
virtual ~SendAndRecvProcessor() {}
void ProcessImpl() override {
if (response_call_back_) {
response_call_back_(*var_h_recv_.get(), reply_);
var_h_recv_->Finish(true);
}
}
void RecvPrepare(VarHandlePtr h_recv) { var_h_recv_ = h_recv; }
::grpc::ByteBuffer reply_;
::grpc::GenericStub stub_g_;
RequestGetCallBack response_call_back_ = ProcGetResponse;
VarHandlePtr var_h_recv_;
};
class BatchBarrierProcessor : public BaseProcessor {
public:
explicit BatchBarrierProcessor(std::shared_ptr<grpc::Channel> ch)
......@@ -231,6 +255,14 @@ class GRPCClient : public RPCClient {
const framework::Scope& scope, const std::string& var_name,
int64_t time_out = FLAGS_rpc_deadline) override;
VarHandlePtr AsyncSendAndRecv(const std::string& ep,
const platform::DeviceContext& ctx,
const framework::Scope& scope,
const std::string& send_var_name,
const std::string& recv_var_name,
const std::string& table_name = "",
int64_t time_out = FLAGS_rpc_deadline) override;
VarHandlePtr AsyncSendComplete(
const std::string& ep, int64_t time_out = FLAGS_rpc_deadline) override;
......
......@@ -76,7 +76,6 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
PADDLE_THROW("Serialize does not support type: %s",
typeid(var->Type()).name());
}
std::string header;
request.AppendToString(&header);
auto buffer = std::unique_ptr<char[]>(new char[1024]);
......@@ -101,7 +100,6 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
}
#endif
PADDLE_ENFORCE_NOT_NULL(payload);
e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber,
payload->memory_size());
if (payload->memory_size() >= std::numeric_limits<int>::max()) {
......@@ -140,7 +138,6 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
::grpc::Slice::STEAL_REF);
num_slices = 4;
}
::grpc::ByteBuffer tmp(&slices[0], num_slices);
msg->Swap(&tmp);
}
......@@ -156,6 +153,19 @@ void DeserializeFromByteBuffer(const ::grpc::ByteBuffer& msg,
*trainer_id = resp.GetTrainerId();
}
void DeserializeRecvFromByteBuffer(const ::grpc::ByteBuffer& msg,
const platform::DeviceContext& ctx,
const framework::Scope* scope,
framework::Variable** var, int* trainer_id) {
platform::RecordRPCEvent record_event("deserial");
operators::distributed::GRPCVariableResponse resp(scope, &ctx);
PADDLE_ENFORCE_EQ(
resp.Parse(msg), 0,
platform::errors::InvalidArgument("parse bytebuffer to tensor error!"));
*var = resp.GetRecvVar();
*trainer_id = resp.GetTrainerId();
}
} // namespace distributed
} // namespace operators
} // namespace paddle
......@@ -47,6 +47,11 @@ void DeserializeFromByteBuffer(const ::grpc::ByteBuffer& msg,
const framework::Scope* scope,
framework::Variable** var, int* trainer_id);
void DeserializeRecvFromByteBuffer(const ::grpc::ByteBuffer& msg,
const platform::DeviceContext& ctx,
const framework::Scope* scope,
framework::Variable** var, int* trainer_id);
} // namespace distributed
} // namespace operators
} // namespace paddle
......@@ -28,6 +28,7 @@ DECLARE_int32(rpc_retry_bind_port);
namespace paddle {
namespace operators {
namespace distributed {
enum CallStatus { PROCESS = 0, FINISH };
// reference:
......@@ -433,6 +434,51 @@ class RequestNotify final : public RequestBase {
ServerAsyncResponseWriter<sendrecv::VoidMessage> responder_;
};
class RequestSendAndRecv final : public RequestBase {
public:
explicit RequestSendAndRecv(GrpcService::AsyncService* service,
::grpc::ServerCompletionQueue* cq,
RequestHandler* request_handler, int req_id)
: RequestBase(service, cq, request_handler, req_id), responder_(&ctx_) {
request_.reset(new GRPCVariableResponse(
request_handler->scope(), request_handler->dev_ctx(),
request_handler->distributed_mode()));
int method_id =
static_cast<int>(distributed::GrpcMethod::kRequestSendAndRecv);
service_->RequestAsyncUnary(
method_id, &ctx_, request_.get(), &responder_, cq_, cq_,
reinterpret_cast<void*>(static_cast<intptr_t>(req_id)));
}
virtual ~RequestSendAndRecv() {}
std::string GetReqName() override { return request_->Varname(); }
void Process() override {
std::string in_var_name = request_->Varname();
std::string out_var_name = request_->OutVarname();
std::string table_name = request_->TableName();
int trainer_id = request_->GetTrainerId();
VLOG(4) << "RequestSendAndRecv, in_var_name: " << in_var_name
<< " out_var_name: " << out_var_name << " trainer: " << trainer_id;
auto scope = request_->GetMutableLocalScope();
auto invar = scope->FindVar(in_var_name);
framework::Variable* outvar = nullptr;
request_handler_->Handle(in_var_name, scope, invar, &outvar, trainer_id,
out_var_name, table_name);
SerializeToByteBuffer(out_var_name, outvar, *request_handler_->dev_ctx(),
&reply_);
Finish(reply_, &responder_);
}
protected:
std::shared_ptr<GRPCVariableResponse> request_;
::grpc::ByteBuffer reply_;
ServerAsyncResponseWriter<::grpc::ByteBuffer> responder_;
};
void AsyncGRPCServer::WaitServerReady() {
VLOG(4) << "AsyncGRPCServer is waiting server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_);
......@@ -586,6 +632,8 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
b = new RequestCheckpointNotify(service_.get(), cq.get(), handler, req_id);
} else if (rpc_name == kRequestNotify) {
b = new RequestNotify(service_.get(), cq.get(), handler, req_id);
} else if (rpc_name == kRequestSendAndRecv) {
b = new RequestSendAndRecv(service_.get(), cq.get(), handler, req_id);
} else {
PADDLE_ENFORCE(false, "not supported rpc");
}
......
......@@ -85,10 +85,12 @@ enum class GrpcMethod {
kGetMonomerVariable,
kGetMonomerBarrier,
kRequestNotify,
kRequestSendAndRecv,
// when you add new handler, change kGrpcNumMethods at the same time!
};
static const int kGrpcNumMethods =
static_cast<int>(GrpcMethod::kRequestNotify) + 1;
static_cast<int>(GrpcMethod::kRequestSendAndRecv) + 1;
inline const char* GrpcMethodName(GrpcMethod id) {
switch (id) {
......@@ -108,6 +110,8 @@ inline const char* GrpcMethodName(GrpcMethod id) {
return "/sendrecv.SendRecvService/CheckpointNotify";
case GrpcMethod::kRequestNotify:
return "/sendrecv.SendRecvService/DistributeNotify";
case GrpcMethod::kRequestSendAndRecv:
return "/sendrecv.SendRecvService/SendAndRecvVariable";
}
// Shouldn't be reached.
......
......@@ -46,6 +46,7 @@ constexpr char kRequestCheckpoint[] = "RequestCheckpoint";
constexpr char kRequestPassBarrier[] = "RequestPassBarrier";
constexpr char kRequestGetNoBarrier[] = "GetVariableNoBarrier";
constexpr char kRequestNotify[] = "RequestNotify";
constexpr char kRequestSendAndRecv[] = "RequestSendAndRecv";
constexpr char kSendRPC[] = "SendRPC";
constexpr char kGetRPC[] = "GetRPC";
......@@ -57,6 +58,7 @@ constexpr char kFetchBarrierRPC[] = "FetchBarrierRPC";
constexpr char kSendMonomerFetchBarrierRPC[] = "SendMonomerFetchBarrierRPC";
constexpr char kSendCompleteRPC[] = "SendCompleteRPC";
constexpr char kCheckPointNotifyRPC[] = "CheckPointNotifyRPC";
constexpr char kSendAndRecvRPC[] = "SendAndRecvRPC";
constexpr int64_t kPrefetchTimeout = 60000;
#define LISTEN_TERMINATE_MESSAGE "TERMINATE@RECV"
......
......@@ -325,6 +325,22 @@ bool RequestNotifyHandler::Handle(const std::string &varname,
return true;
}
bool RequestSendAndRecvHandler::Handle(const std::string &varname,
framework::Scope *Scope,
framework::Variable *var,
framework::Variable **outvar,
const int trainer_id,
const std::string &out_var_name,
const std::string &table_name) {
VLOG(3) << "SendAndRecvHandle: " << varname
<< " out_var_name: " << out_var_name
<< " , trainer_id: " << trainer_id;
executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(), Scope);
*outvar = Scope->FindVar(out_var_name);
return true;
}
} // namespace distributed
} // namespace operators
} // namespace paddle
......@@ -176,6 +176,17 @@ class RequestNotifyHandler final : public RequestHandler {
std::unordered_map<int, int64_t> decay_counters;
};
class RequestSendAndRecvHandler final : public RequestHandler {
public:
explicit RequestSendAndRecvHandler(int distributed_mode)
: RequestHandler(distributed_mode) {}
virtual ~RequestSendAndRecvHandler() {}
bool Handle(const std::string& varname, framework::Scope* Scope,
framework::Variable* var, framework::Variable** outvar,
const int trainer_id, const std::string& out_var_name = "",
const std::string& table_name = "") override;
};
} // namespace distributed
} // namespace operators
} // namespace paddle
......@@ -85,6 +85,12 @@ class RPCClient {
const framework::Scope& scope, const std::string& var_name,
int64_t time_out = FLAGS_rpc_deadline) = 0;
virtual VarHandlePtr AsyncSendAndRecv(
const std::string& ep, const platform::DeviceContext& ctx,
const framework::Scope& scope, const std::string& send_var_name,
const std::string& recv_var_name, const std::string& table_name = "",
int64_t time_out = FLAGS_rpc_deadline) = 0;
virtual VarHandlePtr AsyncSendComplete(
const std::string& ep, int64_t time_out = FLAGS_rpc_deadline) = 0;
......
......@@ -35,27 +35,24 @@ namespace platform = paddle::platform;
namespace distributed = paddle::operators::distributed;
USE_NO_KERNEL_OP(lookup_sparse_table_read);
USE_OP(scale);
std::unique_ptr<distributed::RPCServer> g_rpc_service;
std::unique_ptr<distributed::RequestHandler> g_req_handler;
framework::BlockDesc* AppendPrefetchBlcok(framework::ProgramDesc* program) {
framework::BlockDesc* AppendSendAndRecvBlock(framework::ProgramDesc* program) {
auto root_block = program->MutableBlock(0);
auto* block = program->AppendBlock(*root_block);
framework::VariableNameMap input({{"W", {"w"}}, {"Ids", {"ids"}}});
framework::VariableNameMap output({{"Output", {"out"}}});
auto op = block->AppendOp();
op->SetType("lookup_sparse_table_read");
op->SetInput("W", {"w"});
op->SetInput("Ids", {"ids"});
op->SetOutput("Out", {"out"});
op->SetAttr("tablename", {"w"});
op->SetAttr("value_names", {"Param"});
auto& out = *root_block->Var("out");
framework::OpDesc* op = block->AppendOp();
op->SetType("scale");
op->SetInput("X", {"x"});
op->SetOutput("Out", {"res"});
op->SetAttr("scale", 0.5f);
auto& out = *root_block->Var("res");
out.SetType(framework::proto::VarType::LOD_TENSOR);
out.SetShape({10, 10});
out.SetShape({1, 10});
return block;
}
......@@ -69,6 +66,12 @@ void CreateVarsOnScope(framework::Scope* scope, platform::CPUPlace* place) {
auto ids_var = scope->Var("ids");
ids_var->GetMutable<framework::LoDTensor>();
auto x_var = scope->Var("x");
x_var->GetMutable<framework::LoDTensor>();
auto res_var = scope->Var("res");
res_var->GetMutable<framework::LoDTensor>();
}
void InitTensorsOnClient(framework::Scope* scope, platform::CPUPlace* place,
......@@ -78,6 +81,11 @@ void InitTensorsOnClient(framework::Scope* scope, platform::CPUPlace* place,
int64_t* ids_ptr =
ids_var->mutable_data<int64_t>(framework::DDim({rows_numel, 1}), *place);
for (int64_t i = 0; i < rows_numel; ++i) ids_ptr[i] = i * 2;
auto x_var = scope->Var("x")->GetMutable<framework::LoDTensor>();
float* x_ptr =
x_var->mutable_data<float>(framework::DDim({1, rows_numel}), *place);
for (int64_t i = 0; i < rows_numel; ++i) x_ptr[i] = 1.0;
}
void InitTensorsOnServer(framework::Scope* scope, platform::CPUPlace* place,
......@@ -124,6 +132,38 @@ void StartServer(const std::string& rpc_name) {
server_thread.join();
}
void StartSendAndRecvServer(const std::string& rpc_name) {
framework::ProgramDesc program;
framework::Scope scope;
platform::CPUPlace place;
framework::Executor exe(place);
platform::CPUDeviceContext ctx(place);
auto block = AppendSendAndRecvBlock(&program);
std::string in_var_name("x");
std::vector<int> prefetch_block_ids{block->ID()};
auto prepared = exe.Prepare(program, prefetch_block_ids);
InitTensorsOnServer(&scope, &place, 10);
std::unordered_map<std::string,
std::shared_ptr<framework::ExecutorPrepareContext>>
grad_to_prepared_ctx;
grad_to_prepared_ctx[in_var_name] = prepared[0];
g_req_handler->SetProgram(&program);
g_req_handler->SetGradToPreparedCtx(&grad_to_prepared_ctx);
g_req_handler->SetDevCtx(&ctx);
g_req_handler->SetScope(&scope);
g_req_handler->SetExecutor(&exe);
g_rpc_service->RegisterRPC(rpc_name, g_req_handler.get());
g_req_handler->SetRPCServer(g_rpc_service.get());
std::thread server_thread(
std::bind(&distributed::RPCServer::StartServer, g_rpc_service.get()));
server_thread.join();
}
TEST(COMPLETE, CPU) {
setenv("http_proxy", "", 1);
setenv("https_proxy", "", 1);
......@@ -147,3 +187,46 @@ TEST(COMPLETE, CPU) {
g_rpc_service.reset(nullptr);
g_req_handler.reset(nullptr);
}
TEST(SENDANDRECV, CPU) {
setenv("http_proxy", "", 1);
setenv("https_proxy", "", 1);
g_req_handler.reset(new distributed::RequestSendAndRecvHandler(
distributed::DistributedMode::kAsync));
g_rpc_service.reset(new RPCSERVER_T("127.0.0.1:0", 1));
distributed::RPCClient* client =
distributed::RPCClient::GetInstance<RPCCLIENT_T>(0);
PADDLE_ENFORCE_NE(client, nullptr,
platform::errors::InvalidArgument(
"Client Start Fail, Check Your Code & Env"));
std::thread server_thread(StartSendAndRecvServer,
distributed::kRequestSendAndRecv);
g_rpc_service->WaitServerReady();
int port = g_rpc_service->GetSelectedPort();
std::string ep = paddle::string::Sprintf("127.0.0.1:%d", port);
framework::Scope scope;
platform::CPUPlace place;
platform::CPUDeviceContext ctx(place);
// create var on local scope
int64_t rows_numel = 10;
InitTensorsOnClient(&scope, &place, rows_numel);
std::string in_var_name("x");
std::string out_var_name("res");
client->AsyncSendAndRecv(ep, ctx, scope, in_var_name, out_var_name);
client->Wait();
auto var = scope.Var(out_var_name);
auto value = var->GetMutable<framework::LoDTensor>();
auto ptr = value->mutable_data<float>(place);
for (int64_t i = 0; i < rows_numel; ++i) {
EXPECT_EQ(ptr[i], 0.5);
}
g_rpc_service->ShutDown();
server_thread.join();
LOG(INFO) << "begin reset";
g_rpc_service.reset(nullptr);
g_req_handler.reset(nullptr);
}
......@@ -29,7 +29,7 @@ service SendRecvService {
rpc CheckpointNotify(VariableMessage) returns (VoidMessage) {}
rpc DistributeNotify(VariableMessage) returns (VoidMessage) {}
rpc SendAndRecvVariable(VariableMessage) returns (VariableMessage) {}
rpc GetMonomerVariable(VariableMessage) returns (VariableMessage) {}
rpc GetMonomerBarrier(VariableMessage) returns (VoidMessage) {}
}
......
......@@ -96,6 +96,13 @@ class VariableResponse {
return scope_->FindVar(meta_.varname());
}
framework::Variable* GetRecvVar() {
if (create_scope_) {
return local_scope_->Var(meta_.out_varname());
}
return scope_->FindVar(meta_.out_varname());
}
int GetTrainerId() { return static_cast<int>(meta_.trainer_id()); }
protected:
......
......@@ -268,7 +268,6 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
size_t num_blocks = program->Size();
PADDLE_ENFORCE_GE(num_blocks, 2,
"server program should have at least 2 blocks");
std::vector<int> block_list;
for (size_t blkid = 1; blkid < num_blocks; ++blkid) {
block_list.push_back(blkid);
......@@ -295,6 +294,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
request_send_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
request_get_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
request_prefetch_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
request_send_and_recv_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
while (true) {
if (rpc_service_->IsExit()) {
......@@ -394,6 +394,8 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
new distributed::RequestGetNoBarrierHandler());
request_notify_handler_.reset(
new distributed::RequestNotifyHandler(distributed_mode, fan_in));
request_send_and_recv_handler_.reset(
new distributed::RequestSendAndRecvHandler(distributed_mode));
rpc_service_->RegisterRPC(distributed::kRequestSend,
request_send_handler_.get(), rpc_send_thread_num);
......@@ -408,6 +410,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
request_get_no_barrier_handler_.get());
rpc_service_->RegisterRPC(distributed::kRequestNotify,
request_notify_handler_.get(), rpc_send_thread_num);
rpc_service_->RegisterRPC(distributed::kRequestSendAndRecv,
request_send_and_recv_handler_.get(),
rpc_get_thread_num);
auto optimize_blocks =
Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
......@@ -416,6 +421,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
"optimize blocks is less than 1. Optimize blocks "
"should be 1 at least on the pserver side."));
auto *program = optimize_blocks[0]->Program();
framework::Executor executor(dev_place);
std::shared_ptr<framework::ExecutorPrepareContext> ckpt_pre_context = nullptr;
......@@ -488,6 +494,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
f(request_checkpoint_handler_.get());
f(request_get_no_barrier_handler_.get());
f(request_notify_handler_.get());
f(request_send_and_recv_handler_.get());
// register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers
signal(SIGINT, SignalHandler::StopAndExit);
......
......@@ -99,6 +99,8 @@ class ListenAndServOp : public framework::OperatorBase {
mutable std::shared_ptr<distributed::RequestHandler>
request_checkpoint_handler_;
mutable std::shared_ptr<distributed::RequestHandler> request_notify_handler_;
mutable std::shared_ptr<distributed::RequestHandler>
request_send_and_recv_handler_;
mutable std::shared_ptr<std::thread> server_thread_;
mutable std::vector<std::string> sparse_vars_;
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <future> // NOLINT
#include <ostream>
#include "paddle/fluid/framework/blocking_queue.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/distributed/communicator.h"
#include "paddle/fluid/operators/distributed/communicator_common.h"
#include "paddle/fluid/operators/distributed/distributed.h"
#include "paddle/fluid/operators/distributed/parameter_send.h"
#include "paddle/fluid/operators/distributed_ops/send_recv_util.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class SendAndRecvKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& scope = ctx.scope();
const auto& place = ctx.GetPlace();
auto send_var_name = ctx.Attr<std::string>("send_var_name");
auto recv_var_name = ctx.Attr<std::string>("recv_var_name");
auto epmap = ctx.Attr<std::string>("endpoint");
auto trainer_id = ctx.Attr<int>("trainer_id");
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& context = *pool.Get(place);
distributed::RPCClient* rpc_client =
distributed::RPCClient::GetInstance<RPCCLIENT_T>(trainer_id);
VLOG(3) << "SendAndRecvOp Send_var_name: " << send_var_name
<< " Recv_var_name: " << recv_var_name;
distributed::VarHandlePtr rets = rpc_client->AsyncSendAndRecv(
epmap, context, scope, send_var_name, recv_var_name);
rets->Wait();
}
};
class SendAndRecvOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
return framework::OpKernelType(data_type, platform::CPUPlace());
}
};
class SendAndRecvOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() {
AddInput("X", "Tensor Input variable to be sent").AsDuplicable();
AddOutput("Out", "Tensor Output varibale to be recv").AsDuplicable();
AddAttr<std::string>("send_var_name", "Send Tensor's name")
.SetDefault(std::string(""));
AddAttr<std::string>("recv_var_name", "Recv Tensor's name")
.SetDefault(std::string(""));
AddAttr<int>("trainer_id", "trainer id from 0 ~ worker_num.").SetDefault(0);
AddAttr<std::string>("endpoint", "Server endpoint")
.SetDefault({"127.0.0.1:6164"});
AddComment(R"DOC(
SendAndRecv operator
This operator will send variables to listen_and_serve op at the parameter server.
And recv variable from parameter server of send variable's scope.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(send_and_recv, ops::SendAndRecvOp, ops::SendAndRecvOpMaker);
REGISTER_OP_CPU_KERNEL(
send_and_recv,
ops::SendAndRecvKernel<paddle::platform::CPUDeviceContext, float>)
......@@ -24,49 +24,69 @@ class AdadeltaOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of AdadeltaOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
"Input(Grad) of AdadeltaOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("AvgSquaredGrad"),
"Input(AvgSquaredGrad) of AdadeltaOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("AvgSquaredUpdate"),
"Input(AvgSquaredUpdate) of AdadeltaOp should not be null.");
PADDLE_ENFORCE(
PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true,
platform::errors::InvalidArgument(
"Input(Param) of AdadeltaOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasInput("Grad"), true,
platform::errors::InvalidArgument(
"Input(Grad) of AdadeltaOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasInput("AvgSquaredGrad"), true,
platform::errors::InvalidArgument(
"Input(AvgSquaredGrad) of AdadeltaOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasInput("AvgSquaredUpdate"), true,
platform::errors::InvalidArgument(
"Input(AvgSquaredUpdate) of AdadeltaOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->GetInputsVarType("Param").front() ==
framework::proto::VarType::LOD_TENSOR,
true,
platform::errors::InvalidArgument(
"The input var's type should be LoDTensor, but the received is %s",
ctx->Inputs("Param").front(), ctx->GetInputsVarType("Param").front());
PADDLE_ENFORCE(
ctx->Inputs("Param").front(),
ctx->GetInputsVarType("Param").front()));
PADDLE_ENFORCE_EQ(
ctx->GetInputsVarType("Grad").front() ==
framework::proto::VarType::LOD_TENSOR,
true,
platform::errors::InvalidArgument(
"The input var's type should be LoDTensor, but the received is %s",
ctx->Inputs("Grad").front(), ctx->GetInputsVarType("Grad").front());
ctx->Inputs("Grad").front(),
ctx->GetInputsVarType("Grad").front()));
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
"Output(ParamOut) of AdadeltaOp should not be null.");
PADDLE_ENFORCE(
ctx->HasOutput("AvgSquaredGradOut"),
"Output(AvgSquaredGradOut) of AdadeltaOp should not be null.");
PADDLE_ENFORCE(
ctx->HasOutput("AvgSquaredUpdateOut"),
"Output(AvgSquaredUpdateOut) of AdadeltaOp should not be null.");
PADDLE_ENFORCE_EQ(
ctx->HasOutput("ParamOut"), true,
platform::errors::InvalidArgument(
"Output(ParamOut) of AdadeltaOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasOutput("AvgSquaredGradOut"), true,
platform::errors::InvalidArgument(
"Output(AvgSquaredGradOut) of AdadeltaOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasOutput("AvgSquaredUpdateOut"), true,
platform::errors::InvalidArgument(
"Output(AvgSquaredUpdateOut) of AdadeltaOp should not be null."));
auto param_dim = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ(
param_dim, ctx->GetInputDim("Grad"),
"param and grad input of AdadeltaOp should have same dimension");
PADDLE_ENFORCE_NE(framework::product(ctx->GetInputDim("AvgSquaredGrad")), 0,
PADDLE_ENFORCE_NE(
framework::product(ctx->GetInputDim("AvgSquaredGrad")), 0,
platform::errors::InvalidArgument(
"Maybe the Input variable AvgSquaredGrad has not "
"been initialized. You may need to confirm if you put "
"exe.run(startup_program) after optimizer.minimize "
"function.");
"function."));
PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("AvgSquaredGrad"),
platform::errors::InvalidArgument(
"Param and AvgSquaredGrad input of AdadeltaOp "
"should have same dimension");
"should have same dimension"));
PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("AvgSquaredUpdate"),
platform::errors::InvalidArgument(
"Param and AvgSquaredUpdate input of AdadeltaOp "
"should have same dimension");
"should have same dimension"));
ctx->SetOutputDim("ParamOut", param_dim);
ctx->SetOutputDim("AvgSquaredGradOut", param_dim);
......
......@@ -24,17 +24,19 @@ class AdadeltaOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const auto* param_var = ctx.InputVar("Param");
PADDLE_ENFORCE(param_var->IsType<framework::LoDTensor>(),
PADDLE_ENFORCE_EQ(param_var->IsType<framework::LoDTensor>(), true,
platform::errors::InvalidArgument(
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.InputNames("Param").front(),
framework::ToTypeName(param_var->Type()));
framework::ToTypeName(param_var->Type())));
const auto* grad_var = ctx.InputVar("Grad");
PADDLE_ENFORCE(grad_var->IsType<framework::LoDTensor>(),
PADDLE_ENFORCE_EQ(grad_var->IsType<framework::LoDTensor>(), true,
platform::errors::InvalidArgument(
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s",
ctx.InputNames("Grad").front(),
framework::ToTypeName(grad_var->Type()));
framework::ToTypeName(grad_var->Type())));
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto avg_squared_grad_out_tensor =
......
......@@ -23,22 +23,27 @@ class TopkOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"),
"Input(X) of TopkOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Out"),
"Output(Out) of TopkOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("Indices"),
"Output(Indices) of TopkOp should not be null.");
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
platform::errors::InvalidArgument(
"Input(X) of TopkOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
platform::errors::InvalidArgument(
"Output(Out) of TopkOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput("Indices"), true,
platform::errors::InvalidArgument(
"Output(Indices) of TopkOp should not be null."));
auto input_dims = ctx->GetInputDim("X");
const int k = static_cast<int>(ctx->Attrs().Get<int>("k"));
PADDLE_ENFORCE_GE(k, 1, "k must >= 1");
PADDLE_ENFORCE_GE(input_dims.size(), 1, "input must have >= 1d shape");
PADDLE_ENFORCE_GE(input_dims.size(), 1, platform::errors::InvalidArgument(
"input must have >= 1d shape"));
if (ctx->IsRuntime()) {
PADDLE_ENFORCE_GE(input_dims[input_dims.size() - 1], k,
"input must have >= k columns");
PADDLE_ENFORCE_GE(
input_dims[input_dims.size() - 1], k,
platform::errors::InvalidArgument("input must have >= k columns"));
}
framework::DDim dims = input_dims;
......
......@@ -43,8 +43,9 @@ template <typename DeviceContext, typename T>
class TopkOpCUDAKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()),
"It must use CUDAPlace.");
PADDLE_ENFORCE_EQ(
platform::is_gpu_place(ctx.GetPlace()), true,
platform::errors::InvalidArgument("It must use CUDAPlace."));
auto* input = ctx.Input<Tensor>("X");
auto* output = ctx.Output<Tensor>("Out");
auto* indices = ctx.Output<Tensor>("Indices");
......
......@@ -206,9 +206,9 @@ void BindInferenceApi(py::module *m) {
BindMkldnnQuantizerConfig(m);
#endif
m->def("create_paddle_predictor",
&paddle::CreatePaddlePredictor<AnalysisConfig>);
&paddle::CreatePaddlePredictor<AnalysisConfig>, py::arg("config"));
m->def("create_paddle_predictor",
&paddle::CreatePaddlePredictor<NativeConfig>);
&paddle::CreatePaddlePredictor<NativeConfig>, py::arg("config"));
m->def("paddle_dtype_size", &paddle::PaddleDtypeSize);
m->def("paddle_tensor_to_bytes", &SerializePDTensorToBytes);
}
......
......@@ -1399,6 +1399,9 @@ function main() {
local CMD=$1
local parallel_number=$2
init
if [ "$CMD" != "assert_file_approvals" ];then
python ${PADDLE_ROOT}/tools/summary_env.py
fi
case $CMD in
build_only)
cmake_gen_and_build ${PYTHON_ABI:-""} ${parallel_number}
......
......@@ -30,8 +30,11 @@ __all__ = ["spawn"]
# dygraph parallel apis
__all__ += [
"init_parallel_env", "get_rank", "get_world_size", "prepare_context",
"ParallelEnv"
"init_parallel_env",
"get_rank",
"get_world_size",
"prepare_context",
"ParallelEnv",
]
# collective apis
......
......@@ -18,16 +18,15 @@ from .base.distributed_strategy import DistributedStrategy
from .base.fleet_base import Fleet
from .base.util_factory import UtilBase
from .dataset import *
#from . import metrics
__all__ = [
"DistributedStrategy",
"UtilBase",
"DatasetFactory",
"DatasetBase",
"InMemoryDataset",
"QueueDataset",
"UserDefinedRoleMaker",
"PaddleCloudRoleMaker",
"Fleet",
]
fleet = Fleet()
......
......@@ -17,6 +17,8 @@ from paddle.distributed.fleet.proto import distributed_strategy_pb2
from paddle.fluid.framework import Variable, set_flags, core
import google.protobuf.text_format
__all__ = ["DistributedStrategy"]
def get_msg_dict(msg):
res_dict = {}
......
......@@ -22,7 +22,7 @@ from .runtime_factory import RuntimeFactory
from .util_factory import UtilFactory
from paddle.fluid.wrapped_decorator import wrap_decorator
__all__ = ['Fleet']
#__all__ = ['Fleet']
def _inited_runtime_handler_(func):
......@@ -200,7 +200,8 @@ class Fleet(object):
bool: True if this is a node of server,
False if not.
"""
return self._role_maker.is_server()
return self._role_maker.is_server(
) or self._role_maker._is_heter_worker()
@property
def util(self):
......
......@@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ["MetaOptimizerFactory"]
from ..meta_optimizers import *
meta_optimizer_names = list(
......
......@@ -14,15 +14,17 @@
"""Defination of Role Makers."""
import os
import numpy as np
import warnings
from multiprocessing import Process, Manager
import paddle.fluid as fluid
__all__ = ['RoleMakerBase', 'UserDefinedRoleMaker', 'PaddleCloudRoleMaker']
#__all__ = ['UserDefinedRoleMaker', 'PaddleCloudRoleMaker']
class Role:
WORKER = 1
SERVER = 2
HETER_WORKER = 3
class RoleMakerBase(object):
......@@ -40,6 +42,11 @@ class RoleMakerBase(object):
self._role = None
self._current_id = -1
# for heter parameter server mode
self._heter_trainer_endpoints = []
self._heter_trainer_device = "CPU"
self._is_heter_parameter_server_mode = False
self._node_type = None
self._node_type_comm = None
self._all_comm = None
......@@ -163,12 +170,58 @@ class RoleMakerBase(object):
"""
print("warning: RoleMakerBase does not have barrier worker.")
def _is_heter_worker(self):
"""
Return is_heter_worker() of current process
"""
warnings.warn("RoleMakerBase does not have function: _is_heter_worker.")
return False
def _heter_worker_num(self):
"""
Get current total heter-worker number.
Returns:
int: heter_worker number
"""
warnings.warn(
"RoleMakerBase does not have function: _heter_worker_num.")
return 0
def _get_heter_worker_endpoints(self):
"""
Returns:
string: all heter_trainers'endpoints
"""
assert self._heter_trainer_endpoints != []
return self._heter_trainer_endpoints
def _get_heter_worker_endpoint(self):
"""
Returns:
int: corresponding heter_trainer's endpoint
e.g: if we have 4 cpu-trainer(default), 2 gpu-trainer(heter)
then No.0 and No.2 cpu-trainer will work with No.0 gpu-trainer
and No.1 and No.3 cpu-trainer will work with No.1 gpu-trainerr
"""
assert self._heter_trainer_endpoints != []
return self._heter_trainer_endpoints[(self._current_id + 1) %
self._heter_worker_num()]
def _get_heter_worker_device(self):
"""
Returns:
string: heter_trainer's device of current node, e.g: CPU/GPU/XPU
"""
return self._heter_trainer_device.upper()
class PaddleCloudRoleMaker(RoleMakerBase):
def __init__(self, is_collective=False, **kwargs):
super(PaddleCloudRoleMaker, self).__init__()
self._is_collective = is_collective
self._init_gloo = False #default no init gloo
self._init_gloo = False # default no init gloo
self._kwargs = kwargs
self._role_is_generated = False
......@@ -278,10 +331,7 @@ class PaddleCloudRoleMaker(RoleMakerBase):
"""
get index of current node
"""
if self.is_server():
return self.server_index()
elif self.is_worker():
return self.worker_index()
return self._current_id
def worker_num(self):
"""
......@@ -323,6 +373,22 @@ class PaddleCloudRoleMaker(RoleMakerBase):
self.generate_role()
return self._server_endpoints
def _heter_worker_num(self):
"""
get heter worker nums
"""
if not self._role_is_generated:
self.generate_role()
return self._heter_trainers_num
def _is_heter_worker(self):
"""
whether current process is heter worker
"""
if not self._role_is_generated:
self.generate_role()
return self._role == Role.HETER_WORKER
def _get_rank(self):
"""
get current rank in all workers and pservers
......@@ -342,17 +408,47 @@ class PaddleCloudRoleMaker(RoleMakerBase):
def _ps_env(self):
try:
# Environment variable PADDLE_PSERVERS_IP_PORT_LIST must be set
# format: string(ip:port), eg. 127.0.0.1:6001
self._server_endpoints = os.environ[
"PADDLE_PSERVERS_IP_PORT_LIST"].split(",")
# format: string(ip:port,ip:port), eg. 127.0.0.1:6001,127.0.0.1:6002
self._server_endpoints = os.getenv("PADDLE_PSERVERS_IP_PORT_LIST",
"").split(",")
assert self._server_endpoints != ""
self._worker_endpoints = os.getenv("PADDLE_TRAINER_ENDPOINTS",
"").split(",")
assert self._server_endpoints != ""
trainers_num = int(os.environ["PADDLE_TRAINERS_NUM"])
training_role = os.environ["TRAINING_ROLE"]
if training_role not in ["TRAINER", "PSERVER"]:
raise ValueError("TRAINING_ROLE must be PSERVER or TRAINER")
if training_role not in ["TRAINER", "PSERVER", "HETER_TRAINER"]:
raise ValueError(
"TRAINING_ROLE must be PSERVER or TRAINER or HETER_TRAINER, but get {}, please check your environment.".
format(training_role))
# For heter parameter server env setting
heter_trainer_eplist = os.getenv(
"PADDLE_HETER_TRAINER_IP_PORT_LIST", None)
heter_trainer_device = os.getenv("PADDLE_HETER_TRAINER_DEVICE",
None)
if heter_trainer_eplist and heter_trainer_device:
try:
heter_trainer_eplist = os.environ[
"PADDLE_HETER_TRAINER_IP_PORT_LIST"].split(",")
except:
raise ValueError(
"Can not Find PADDLE_HETER_TRAINER_IP_PORT_LIST in env or its format doesn't match the requirement: 'IP:PORT,IP:PORT' ."
)
self._is_heter_parameter_server_mode = True
heter_trainers_num = len(heter_trainer_eplist)
current_node_device = heter_trainer_device.upper()
if current_node_device not in ["CPU", "GPU", "XPU"]:
raise ValueError(
"Heter Trainer doesn't support {} device now, please use CPU / GPU / XPU(KunLun)".
format(heter_trainer_device))
self._heter_trainer_device = current_node_device
else:
self._is_heter_parameter_server_mode = False
heter_trainers_num = 0
if training_role == "TRAINER":
role = Role.WORKER
......@@ -365,17 +461,26 @@ class PaddleCloudRoleMaker(RoleMakerBase):
ip = os.environ["POD_IP"]
self._cur_endpoint = ip + ":" + port
current_id = self._server_endpoints.index(self._cur_endpoint)
elif training_role == "HETER_TRAINER":
role = Role.HETER_WORKER
cur_ip = os.environ["POD_IP"]
cur_port = os.environ["PADDLE_PORT"]
curr_endpoint = ":".join([cur_ip, cur_port])
current_id = heter_trainer_eplist.index(curr_endpoint)
else:
raise ValueError("TRAINING_ROLE must be PSERVER or TRAINER")
except ValueError as ve:
raise ValueError(
"something wrong with PaddleCloud, please check environment")
"TRAINING_ROLE must be PSERVER or TRAINER or HETER_TRAINER")
except ValueError as e:
raise ValueError(
"Something wrong with PaddleCloud, please check environment")
self._trainers_num = trainers_num
self._role = role
self._current_id = current_id
self._node_num = len(
set([x.split(':')[0] for x in self._worker_endpoints]))
self._heter_trainers_num = heter_trainers_num
self._heter_trainer_endpoints = heter_trainer_eplist
def _collective_env(self):
self._current_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
......
......@@ -15,24 +15,10 @@ from .amp_optimizer import AMPOptimizer
from .recompute_optimizer import RecomputeOptimizer
from .gradient_merge_optimizer import GradientMergeOptimizer
from .graph_execution_optimizer import GraphExecutionOptimizer
from .async_optimizer import AsyncMetaOptimizer
from .parameter_server_optimizer import ParameterServerOptimizer
from .pipeline_optimizer import PipelineOptimizer
from .localsgd_optimizer import LocalSGDOptimizer
from .lars_optimizer import LarsOptimizer
from .async_graph_execution_optimizer import AsyncGraphExecutionOptimizer
from .parameter_server_graph_optimizer import ParameterServerGraphOptimizer
from .dgc_optimizer import DGCOptimizer
from .lamb_optimizer import LambOptimizer
__all__ = [
'AMPOptimizer',
'RecomputeOptimizer',
'GradientMergeOptimizer',
'AsyncMetaOptimizer',
'GraphExecutionOptimizer',
'PipelineOptimizer',
'LocalSGDOptimizer',
'LarsOptimizer',
'AsyncGraphExecutionOptimizer',
'DGCOptimizer',
'LambOptimizer',
]
......@@ -14,8 +14,6 @@
import paddle.fluid.contrib.mixed_precision as mixed_precision
from .meta_optimizer_base import MetaOptimizerBase
__all__ = ["AMPOptimizer"]
class AMPOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -15,8 +15,6 @@ from paddle.fluid.optimizer import Momentum, DGCMomentumOptimizer
from .meta_optimizer_base import MetaOptimizerBase
import logging
__all__ = ["DGCOptimizer"]
class DGCOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -14,10 +14,6 @@
from paddle.fluid.optimizer import GradientMergeOptimizer as GM
from .meta_optimizer_base import MetaOptimizerBase
__all__ = ["GradientMergeOptimizer"]
# amp + gradient merge + lamb
class GradientMergeOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -16,8 +16,6 @@ from paddle.fluid.optimizer import LambOptimizer as LAMB
from .meta_optimizer_base import MetaOptimizerBase
import logging
__all__ = ["LambOptimizer"]
class LambOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -15,8 +15,6 @@ from paddle.fluid.optimizer import Momentum, LarsMomentumOptimizer
from .meta_optimizer_base import MetaOptimizerBase
import logging
__all__ = ["LarsOptimizer"]
class LarsOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ["MetaOptimizerBase"]
from paddle.fluid.optimizer import Optimizer
......
......@@ -13,12 +13,12 @@
from paddle import fluid
from paddle.fluid import compiler
from .async_optimizer import AsyncMetaOptimizer
from .parameter_server_optimizer import ParameterServerOptimizer
class AsyncGraphExecutionOptimizer(AsyncMetaOptimizer):
class ParameterServerGraphOptimizer(ParameterServerOptimizer):
def __init__(self, optimizer):
super(AsyncGraphExecutionOptimizer, self).__init__(optimizer)
super(ParameterServerGraphOptimizer, self).__init__(optimizer)
self.inner_opt = optimizer
# we do not allow meta optimizer to be inner optimizer currently
self.meta_optimizers_white_list = []
......@@ -31,6 +31,9 @@ class AsyncGraphExecutionOptimizer(AsyncMetaOptimizer):
if self.role_maker.is_server():
return False
if self.role_maker._is_heter_parameter_server_mode:
return False
return True
def _disable_strategy(self, dist_strategy):
......
......@@ -15,9 +15,9 @@ from paddle import fluid
from .meta_optimizer_base import MetaOptimizerBase
class AsyncMetaOptimizer(MetaOptimizerBase):
class ParameterServerOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
super(AsyncMetaOptimizer, self).__init__(optimizer)
super(ParameterServerOptimizer, self).__init__(optimizer)
self.inner_opt = optimizer
# we do not allow meta optimizer to be inner optimizer currently
self.meta_optimizers_white_list = []
......@@ -68,6 +68,21 @@ class AsyncMetaOptimizer(MetaOptimizerBase):
_startup = worker.init_from_server_pass(_startup, compiled_config)
_startup = worker.delet_extra_optimizes_pass(_startup,
compiled_config)
# for heter program
if self.role_maker._is_heter_parameter_server_mode:
from paddle.fluid.incubate.fleet.parameter_server.ir import heter_trainer_pass as heter_worker
if self.role_maker._is_heter_worker():
# for heter worker
_main = heter_worker.split_heter_worker_ops_pass(
_main, compiled_config)
else:
# for default worker
_main = heter_worker.split_trainer_ops_pass(_main,
compiled_config)
# for startup change
_startup = heter_worker.delete_startup_useless_ops_var_pass(
_startup, _main, compiled_config)
else:
_main = worker.append_send_ops_pass(_main, compiled_config)
_startup = _startup
......@@ -129,9 +144,12 @@ class AsyncMetaOptimizer(MetaOptimizerBase):
_origin_startup_program,
strategy, self.role_maker)
main_program, startup_program = \
self._build_trainer_programs(compiled_config) if self.role_maker.is_worker() \
else self._build_pserver_programs(compiled_config)
if self.role_maker.is_worker() or self.role_maker._is_heter_worker():
main_program, startup_program = self._build_trainer_programs(
compiled_config)
elif self.role_maker.is_server():
main_program, startup_program = self._build_pserver_programs(
compiled_config)
loss.block.program = main_program
fluid.framework.switch_startup_program(startup_program)
......
......@@ -20,8 +20,6 @@ from paddle.fluid.optimizer import PipelineOptimizer as PO
from .meta_optimizer_base import MetaOptimizerBase
from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_update_op, is_loss_grad_op, is_backward_op, is_optimizer_op
__all__ = ["PipelineOptimizer"]
class PipelineHelper(CollectiveHelper):
def __init__(self, role_maker, nrings=1, wait_port='6174'):
......
......@@ -14,8 +14,6 @@
from paddle.fluid.optimizer import RecomputeOptimizer as RO
from .meta_optimizer_base import MetaOptimizerBase
__all__ = ["RecomputeOptimizer"]
class RecomputeOptimizer(MetaOptimizerBase):
def __init__(self, optimizer):
......
......@@ -11,3 +11,16 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .metric import *
__all__ = [
"sum",
"max",
"min",
"auc",
"mae",
"rmse",
"mse",
"acc",
]
......@@ -14,5 +14,3 @@
from .collective_runtime import CollectiveRuntime
from .parameter_server_runtime import ParameterServerRuntime
__all__ = ["CollectiveRuntime," "ParameterServerRuntime", ]
......@@ -196,6 +196,18 @@ class ParameterServerRuntime(RuntimeBase):
else:
warnings.warn("communicator has been initialized, skip")
def _get_executor(self):
if self.role_maker._is_heter_worker():
if self.role_maker._get_heter_worker_device() == "GPU":
gpu_id = int(os.getenv("FLAGS_selected_gpus", "0"))
executor = Executor(fluid.CUDAPlace(gpu_id))
else:
raise ValueError("Not Support Device {}".format(
self.role_maker._get_heter_worker_device()))
else:
executor = fluid.Executor(fluid.CPUPlace())
return executor
def _init_server(self, *args, **kwargs):
if len(args) > 1:
raise ValueError("init server can only accept 1 args: `dirname`")
......@@ -204,9 +216,15 @@ class ParameterServerRuntime(RuntimeBase):
else:
model_dirname = None
executor = fluid.Executor(fluid.CPUPlace())
if self.role_maker._is_heter_worker():
self._init_worker()
executor = self._get_executor()
executor.run(fluid.default_startup_program())
if self.role_maker._is_heter_worker():
return
if not model_dirname:
return
......@@ -237,12 +255,12 @@ class ParameterServerRuntime(RuntimeBase):
# self._load_sparse_params(dirname=model_dir, varnames=distribtued_varnames)
def _run_server(self):
executor = fluid.Executor(fluid.CPUPlace())
executor = self._get_executor()
executor.run(fluid.default_main_program())
def _stop_worker(self):
self._communicator.stop()
executor = fluid.Executor(fluid.CPUPlace())
executor = self._get_executor()
executor.close()
def _get_optimizer_status(self, op, param_name):
......
......@@ -15,4 +15,4 @@
from .fs import *
from .http_server import KVHandler, KVHTTPServer, KVServer
__all__ = ['KVHandler', 'KVHTTPServer', 'KVServer'] + fs.__all__
#__all__ = ['KVHandler', 'KVHTTPServer', 'KVServer'] + fs.__all__
......@@ -145,7 +145,7 @@ class Fleet(object):
Returns:
bool: True if this is a node of server,
False if not.
False if not
"""
return self._role_maker.is_server()
......
......@@ -343,7 +343,6 @@ class MPISymetricRoleMaker(MPIRoleMaker):
def get_pserver_endpoints(self):
"""
get pserver endpoints
Returns:
endpoints(list): pserver endpoints
"""
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import warnings
import paddle.fluid.core as core
import paddle.fluid.framework as framework
from paddle.fluid.transpiler.details.program_utils import delete_ops
from paddle.fluid.incubate.fleet.parameter_server.ir.trainer_pass import find_heter_ops
from paddle.fluid.incubate.fleet.parameter_server.ir.trainer_pass import create_heter_program
from paddle.fluid.incubate.fleet.parameter_server.ir.trainer_pass import create_trainer_program
from paddle.fluid.incubate.fleet.parameter_server.ir.trainer_pass import find_block_joints
from paddle.fluid.incubate.fleet.parameter_server.ir.trainer_pass import find_op_input_output
from paddle.fluid.incubate.fleet.parameter_server.ir.trainer_pass import get_vars_name_in_block
def split_heter_worker_ops_pass(program, config):
"""
split heter worker program from origin-program
1. find heter op (located on different device)
2. find input&output of every heter-block
3. create heter worker program, add listen&serv op
"""
default_deveice = "cpu"
program, heter_ops, _, program_block_ops = find_heter_ops(program,
default_deveice)
if len(heter_ops) == 0:
warnings.warn(
"Currently running in Heter Parameter Server mode, but no OP running on heterogeneous devices, Please check your code."
)
return program
current_device = "gpu"
if current_device not in heter_ops:
raise ValueError("Op which run on device {} not exist.".format(
current_device))
block_vars_detail = find_block_joints(program, program_block_ops, heter_ops)
heter_program = framework.Program()
create_heter_program(program, config, heter_program, heter_ops,
block_vars_detail, current_device)
return heter_program
def split_trainer_ops_pass(program, config):
"""
split cpu-trainer program from origin-program
1. find heter op (located on different device)
2. find input&output of every heter-block
3. create cpu-trainer program, add send&recv op
"""
# Todo: support user define default_device (MrChengmo)
default_deveice = "cpu"
program, heter_ops, _, program_block_ops = find_heter_ops(program,
default_deveice)
block_vars_detail = find_block_joints(program, program_block_ops, heter_ops)
create_trainer_program(program, config, heter_ops, block_vars_detail)
return program
def delete_startup_useless_ops_var_pass(startup_program, main_program, config):
"""
delete variable which not used in current main_program
"""
# find all op and its var
vars_in_main_program = get_vars_name_in_block(main_program.global_block())
block_nums = startup_program.num_blocks
for block_index in range(1, block_nums):
current_block = startup_program.block(block_index)
# delete useless op
need_delete_op = []
for op in current_block.ops:
inputs, outputs = find_op_input_output(startup_program,
current_block, op)
inputs += outputs
# Todo: delete some concat op
if list(set(inputs) & set(vars_in_main_program)) == None:
need_delete_op.append(op)
delete_ops(current_block, need_delete_op)
# delete useless var
for var in current_block.vars:
if var.name not in vars_in_main_program:
startup_program._remove_var(var.name)
return startup_program
......@@ -12,33 +12,23 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright(c) 2020 PaddlePaddle Authors.All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0(the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http: // www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from functools import reduce
import collections
import math
import os
import warnings
import six
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.core import CommContext
import paddle.fluid.framework as framework
from paddle.fluid.incubate.fleet.parameter_server.mode import DistributedMode
from paddle.fluid.incubate.fleet.parameter_server.ir import vars_metatools
from paddle.fluid.incubate.fleet.parameter_server.ir.ps_dispatcher import RoundRobin, PSDispatcher
from paddle.fluid.transpiler.details.program_utils import delete_ops
OP_NAME_SCOPE = "op_namescope"
CLIP_OP_NAME_SCOPE = "@CLIP"
......@@ -122,9 +112,20 @@ class MergedVariable:
self.offsets = offsets
def Singleton(cls):
_instance = {}
def _singleton(*args, **kargs):
if cls not in _instance:
_instance[cls] = cls(*args, **kargs)
return _instance[cls]
return _singleton
@Singleton
class CompileTimeStrategy(object):
def __init__(self, main_program, startup_program, strategy, role_maker):
self.min_block_size = 8192
self.origin_main_program = main_program
......@@ -177,6 +178,12 @@ class CompileTimeStrategy(object):
def get_ps_endpoints(self):
return self.role_maker.get_pserver_endpoints()
def get_heter_worker_endpoints(self):
return self.role_maker._get_heter_worker_endpoints()
def get_heter_worker_endpoint(self):
return self.role_maker._get_heter_worker_endpoint()
def get_origin_programs(self):
return self.origin_main_program, self.origin_startup_program
......@@ -810,6 +817,30 @@ class CompileTimeStrategy(object):
return sparse_param_grads, dense_param_grads
def remove_var_pair_by_grad(self, var_name):
for index, pair in enumerate(self.merged_variables_pairs):
var = pair[0]
var_grad = pair[1]
if var_grad.merged_var.name == var_name:
del self.merged_variables_pairs[index]
for index, pair in enumerate(self.merged_dense_pairs):
var = pair[0]
var_grad = pair[1]
if var_grad.merged_var.name == var_name:
del self.merged_dense_pairs[index]
return
for index, pair in enumerate(self.merged_sparse_pairs):
var = pair[0]
var_grad = pair[1]
if var_grad.merged_var.name == var_name:
del self.merged_sparse_pairs[index]
return
print("Not find {} in self.merge_pairs".format(var_name))
def _is_opt_role_op(op):
# NOTE : depend on oprole to find out whether this op is for
......
......@@ -13,7 +13,13 @@
# limitations under the License.
from __future__ import print_function
import six
import collections
import warnings
import math
from functools import reduce
import paddle.fluid as fluid
import paddle.fluid.core as core
import paddle.fluid.framework as framework
......@@ -34,6 +40,10 @@ LR_SCHED_OP_ROLE_ATTR_VALUE = core.op_proto_and_checker_maker.OpRole.LRSched
OPT_OP_ROLE_ATTR_VALUE = core.op_proto_and_checker_maker.OpRole.Optimize
op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName()
DEVICE_LIST = ["cpu", "gpu", "xpu"]
COMMUNICATE_OPS_TYPE = ["send", "recv", "fetch_barrier", "send_barrier"]
DEFAULT_DEVICE = 'cpu'
def delete_optimizer_pass(program, config):
def _delete_optimizer_op_and_vars(_program, optimize_ops):
......@@ -250,7 +260,7 @@ def fake_init_ops_pass(program, config):
return list(set(dist_varnames + sparse_varnames))
def _fake_init_sparsetable(sparse_table_names):
#delete table init op
# delete table init op
for table_name in sparse_table_names:
table_var = program.global_block().vars[table_name]
table_param_init_op = []
......@@ -307,3 +317,871 @@ def delet_extra_optimizes_pass(program, config):
program.global_block()._remove_var(var)
return program
def find_heter_ops(program, default_device="cpu"):
if default_device not in DEVICE_LIST:
raise ValueError("Given device {} is not in device list {}".format(
default_device, DEVICE_LIST))
def _is_heter_op(op, current_heter_device, default_device="cpu"):
heter_devices = list(DEVICE_LIST)
heter_devices.remove(default_device)
op_device = op.attr("op_device")
op_type = op.type
if op_device in heter_devices:
return True
elif op_type in COMMUNICATE_OPS_TYPE and current_heter_device != default_device:
# for distributed communciate ops: send & recv & barrier etc.
# Todo: need update this method
op._set_attr('op_device', current_heter_device)
return True
elif op_device == None or op_device == default_device:
op._set_attr('op_device', default_device)
return False
return False
def _is_same_device(op, pre_device, default_device="cpu"):
op_device = op.attr("op_device")
if op_device == pre_device:
return True
if pre_device == default_device:
return True
return False
def _append_heter_op(op, current_heter_block_ops, heter_ops):
op_device = op.attr("op_device")
if op_device not in heter_ops:
heter_ops[op_device] = {}
current_heter_block_ops.append(op)
origin_porgram = program.clone()
block = program.global_block()
program_block_ops = []
default_ops = {default_device: {}}
heter_ops = {}
block_index = 0
# heter_ops: {"gpu": {1:[op1, op2, ...], 2:[op1, op2, ...] }; "xpu": {3:[op1, op2, ...], 4:[op1, op2, ...] }}
current_heter_block_ops = []
current_default_block_ops = []
current_heter_device = default_device
is_heter = False
for op in block.ops:
if _is_heter_op(op, current_heter_device, default_device):
# for gpu/xpu-op
is_heter = True
# for cpu-op block append
if len(current_default_block_ops) > 1:
default_ops[default_device][
block_index] = current_default_block_ops
program_block_ops.append(current_default_block_ops)
current_default_block_ops = []
block_index += 1
if _is_same_device(op, current_heter_device, default_device):
# for gpu-op, gpu-op -> gpu-op,...
current_heter_device = op.attr("op_device")
_append_heter_op(op, current_heter_block_ops, heter_ops)
else:
# for gpu-op -> xpu-op, ...
op_device = current_heter_block_ops[0].attr("op_device")
heter_ops[op_device][block_index] = current_heter_block_ops
program_block_ops.append(current_heter_block_ops)
block_index += 1
current_heter_block_ops = []
current_heter_device = op.attr("op_device")
_append_heter_op(op, current_heter_block_ops, heter_ops)
elif is_heter:
# for gpu/xpu-op -> cpu-op
op_device = current_heter_block_ops[0].attr("op_device")
heter_ops[op_device][block_index] = current_heter_block_ops
program_block_ops.append(current_heter_block_ops)
block_index += 1
current_heter_block_ops = []
current_heter_device = default_device
is_heter = False
current_default_block_ops.append(op)
else:
# for cpu-op
current_default_block_ops.append(op)
if current_default_block_ops != []:
default_ops[default_device][block_index] = current_default_block_ops
program_block_ops.append(current_default_block_ops)
if current_heter_block_ops != []:
op_device = current_heter_block_ops[0].attr("op_device")
heter_ops[op_device][block_index] = current_heter_block_ops
program_block_ops.append(current_heter_block_ops)
if len(heter_ops) == 0:
warnings.warn(
"No heterogeneous OP was found in your program , "
" please using fluid.device_guard() to run OPs on different device.")
total_heter_ops = 0
heter_blocks = 0
for device in heter_ops.keys():
heter_block_dict = heter_ops[device]
heter_blocks += len(heter_block_dict)
for _, heter_block in heter_block_dict.items():
total_heter_ops += len(heter_block)
print(
"There are {} OPs in your main_program, and contains {} heter-OPs which is made up of {} heter-blocks.".
format(len(block.ops), total_heter_ops, heter_blocks))
return origin_porgram, heter_ops, default_ops, program_block_ops
def create_heter_program(program, config, heter_program, heter_ops,
block_var_detail, current_device):
# add heter op
optimizer_block = []
grad_to_block_id = []
send_grad_var_list = []
pre_block_idx = heter_program.num_blocks - 1
for index, heter_block_ops in heter_ops[current_device].items():
heter_block = heter_program._create_block(pre_block_idx)
optimizer_block.append(heter_block)
for _, op in enumerate(heter_block_ops):
block_append_op(heter_program, program, heter_block, op)
# add relate variables
inputs = _get_input_map_from_op(program.global_block().vars, op)
add_vars_by_op_map(inputs, heter_program)
outputs = _get_output_map_from_op(program.global_block().vars, op)
add_vars_by_op_map(outputs, heter_program)
entrance_vars = block_var_detail[index]["entrance"]
add_vars_by_var_list(entrance_vars, program, heter_program)
exit_vars = block_var_detail[index]["exit"]
add_vars_by_var_list(exit_vars, program, heter_program)
comm_info = get_communicate_var_info(program, index, entrance_vars,
exit_vars)
grad_to_block_id.append(comm_info["block_input_var_name"] + ":" + str(
heter_block.idx))
# create slice op
first_op_index = 0
get_type_var_name = comm_info["input_var_reshape_name"][0].split(
".input_reshape@Heter")[0]
get_type_var = heter_program.global_block().vars[get_type_var_name]
insert_recv_slice_op(
heter_program, heter_block, first_op_index,
comm_info["block_input_var_name"],
(-1, sum(comm_info["input_var_reshape_dim"])), get_type_var.dtype,
get_type_var.type, comm_info["input_var_reshape_name"], [
(-1, comm_info["input_var_reshape_dim"][i])
for i in range(len(comm_info["input_var_reshape_dim"]))
])
first_op_index += len(comm_info["input_var_reshape_dim"])
# create reshape op
for i in range(len(comm_info["input_var_reshape_name"])):
var_name = entrance_vars[i]
insert_reshape_op(
heter_program,
heter_block,
first_op_index,
comm_info["input_var_reshape_name"][i],
var_name, )
first_op_index += 1
first_op_index = len(heter_block.ops)
# create send reshape op
for i in range(len(exit_vars)):
insert_reshape_op(heter_program, heter_block, first_op_index,
exit_vars[i],
comm_info["output_var_reshape_name"][i],
[-1, comm_info["output_var_reshape_dim"][i]])
first_op_index += 1
# create send concat op
insert_send_concat_op(heter_program, heter_block, first_op_index,
comm_info["output_var_reshape_name"],
comm_info["block_output_var_name"],
[-1, sum(comm_info["output_var_reshape_dim"])])
check_op_device(heter_block, current_device)
send_grad_var_list = send_grad_var_list + add_heter_send_op(
program, heter_program, heter_block, block_var_detail[index])
# add step conter
send_input_vars = []
dummy_output = []
trainer_id = config.get_role_id()
pserver_endpoints = config.get_ps_endpoints()
optimizer_block[-1].append_op(
type="send",
inputs={"X": send_input_vars},
outputs={"Out": dummy_output},
attrs={
"send_varnames": [STEP_COUNTER],
"merge_add": True,
"use_send_handler": False,
"endpoints": pserver_endpoints
})
# add info in listen&serv
attrs = {
"grad_to_block_id": grad_to_block_id,
"sparse_grad_to_param": None,
"lr_decay_block_id": None,
"dense_optimize_blocks": None,
"sparse_optimize_blocks": None,
"optimize_blocks": optimizer_block,
# runtime attribute
"endpoint": config.get_heter_worker_endpoint(),
"pserver_id": config.get_role_id(),
"Fanin": config.get_trainers(),
"distributed_mode": config.get_distributed_mode(),
"rpc_get_thread_num": 12,
"rpc_send_thread_num": 12,
"rpc_prefetch_thread_num": 12
}
# append the listen_and_serv op
heter_program.global_block().append_op(
type="listen_and_serv", inputs={'X': []}, outputs={}, attrs=attrs)
check_heter_compile_time_strategy(program, config, send_grad_var_list)
def check_heter_compile_time_strategy(program, config, send_grad_var_list):
origin_grad_var_list = []
for _, var_grad in config.merged_variables_pairs:
origin_grad_var_list.append(var_grad.merged_var.name)
origin_grad_var_list = list(set(origin_grad_var_list))
send_grad_var_list = list(set(send_grad_var_list))
useless_grad_var_list = list(
set(origin_grad_var_list) - set(send_grad_var_list))
for useless_grad_var in useless_grad_var_list:
config.remove_var_pair_by_grad(useless_grad_var)
def create_trainer_program(program, config, heter_ops, block_var_detail):
for device in heter_ops.keys():
for heter_block_index in sorted(heter_ops[device]):
replace_ops_by_communicate_op(program, config, heter_block_index,
heter_ops[device][heter_block_index],
block_var_detail)
remove_trainer_send_op(program, config, heter_block_index,
block_var_detail)
deleter_trainer_useless_var(program)
check_op_device(program.global_block(), DEFAULT_DEVICE)
def replace_ops_by_communicate_op(program, config, heter_block_index, ops_list,
block_var_detail):
all_op = program.global_block().ops
start_op = ops_list[0]
first_op_idx = -1
for op in all_op:
if is_same_op(op, start_op):
first_op_idx = all_op.index(op)
break
assert first_op_idx != -1
delete_same_ops(program.global_block(), ops_list)
mode = config.get_distributed_mode()
heter_worker_endpoint = config.get_heter_worker_endpoint()
entrance_var = block_var_detail[heter_block_index]["entrance"]
exit_var = block_var_detail[heter_block_index]["exit"]
default_device_comm_info = get_communicate_var_info(
program, heter_block_index - 1,
block_var_detail[heter_block_index - 1]["entrance"],
block_var_detail[heter_block_index - 1]["exit"])
comm_info = get_communicate_var_info(program, heter_block_index,
entrance_var, exit_var)
# create reshape op
for i in range(len(entrance_var)):
insert_reshape_op(
program,
program.global_block(), first_op_idx, entrance_var[i],
default_device_comm_info["output_var_reshape_name"][i],
[-1, default_device_comm_info["output_var_reshape_dim"][i]])
first_op_idx += 1
# create concat op
insert_send_concat_op(
program,
program.global_block(), first_op_idx,
default_device_comm_info["output_var_reshape_name"],
default_device_comm_info["block_output_var_name"],
[-1, sum(default_device_comm_info["output_var_reshape_dim"])])
first_op_idx += 1
# create send op
send_input_vars = [
program.global_block().vars[default_device_comm_info[
"block_output_var_name"]]
]
get_type_var_name = comm_info["output_var_reshape_name"][0].split(
".output_reshape@Heter")[0]
get_type_var = program.global_block().vars[get_type_var_name]
program.global_block().create_var(
name=comm_info["block_output_var_name"],
shape=(-1, sum(comm_info["output_var_reshape_dim"])),
dtype=get_type_var.dtype,
type=get_type_var.type)
recv_vars = [
program.global_block().vars[comm_info["block_output_var_name"]]
]
program.global_block()._insert_op(
index=first_op_idx,
type="send_and_recv",
inputs={"X": send_input_vars},
outputs={"Out": recv_vars},
attrs={
"send_var_name": default_device_comm_info["block_output_var_name"],
"recv_var_name": comm_info["block_output_var_name"],
"endpoint": heter_worker_endpoint,
"trainer_id": config.get_role_id(),
RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE
})
first_op_idx += 1
# recv
# create slice op
insert_recv_slice_op(
program,
program.global_block(), first_op_idx,
comm_info["block_output_var_name"],
(-1, sum(comm_info["output_var_reshape_dim"])), get_type_var.dtype,
get_type_var.type, comm_info["output_var_reshape_name"], [
(-1, comm_info["output_var_reshape_dim"][i])
for i in range(len(comm_info["output_var_reshape_dim"]))
])
first_op_idx += len(comm_info["output_var_reshape_dim"])
# create reshape op
for i in range(len(comm_info["output_var_reshape_name"])):
var_name = comm_info["output_var_reshape_name"][i].split(
".output_reshape@Heter")[0]
insert_reshape_op(
program,
program.global_block(),
first_op_idx,
comm_info["output_var_reshape_name"][i],
var_name, )
first_op_idx += 1
def remove_trainer_send_op(program, config, heter_block_index,
block_var_detaile):
# if trainer do FF->BP->SEND, it has follow vars: var, var@GRAD
# if trainer only do SEND, it has one var: var@GRAD
# Delete Send op ,if trainer doesn't has pair var (var<->var@GRAD)
persistables = block_var_detaile[heter_block_index]["persistables"]
need_remove_send_op = []
need_remove_grad_var = []
for op in find_send_op(program):
input_list, _ = find_op_input_output(program,
program.global_block(), op)
for var_name in input_list:
origin_var_name = var_name.split("@GRAD")[0]
if origin_var_name in persistables:
need_remove_send_op.append(op)
need_remove_grad_var.append(var_name)
need_remove_send_op = list(set(need_remove_send_op))
delete_ops(program.global_block(), need_remove_send_op)
for grad_var_name in need_remove_grad_var:
config.remove_var_pair_by_grad(grad_var_name)
def add_heter_send_op(program, heter_program, block, block_var_detail):
def _get_send_op_dict():
send_op_dict = {}
send_op_list = find_send_op(program)
for op in send_op_list:
input_list, _ = find_op_input_output(program,
program.global_block(), op)
for var in input_list:
send_op_dict[var] = op
return send_op_dict
send_grad_var_list = []
send_op_dict = _get_send_op_dict()
for persistable_var in block_var_detail["persistables"]:
# check var_name == var@GRAD
if "@GRAD" not in persistable_var:
continue
if "GRAD" != persistable_var.split("@")[-1]:
continue
if persistable_var not in send_op_dict:
continue
block_append_op(program, heter_program, block,
send_op_dict[persistable_var])
send_grad_var_list.append(persistable_var)
return send_grad_var_list
def find_send_op(program):
send_op_list = []
for op in program.global_block().ops:
if op.type == "send":
send_op_list.append(op)
return send_op_list
def get_communicate_var_info(program, block_index, entrance_var_list,
exit_var_list):
input_var_reshape_dim = []
input_var_reshape_name = []
block_input_var_name = "joint_{}_{}@Heter".format(block_index - 1,
block_index)
output_var_reshape_dim = []
output_var_reshape_name = []
block_output_var_name = "joint_{}_{}@Heter".format(block_index,
block_index + 1)
entrance_var_list.sort()
exit_var_list.sort()
# input
# Heter_SERVER_BLOCK_index@JOINT_VAR -> slice -> var@Heter_SERVER_BLOCK@INPUT_RESHAPE_VAR -> reshape -> var
for name in entrance_var_list:
var = program.global_block().vars[name]
shape = var.shape
if len(shape) < 2 or shape[0] != -1:
raise ValueError(
"Variable {} not support heter training. its shape is {}".
format(name, shape))
recv_var_dim = -1 * reduce(lambda x, y: x * y, shape)
input_var_reshape_dim.append(recv_var_dim)
input_var_reshape_name.append("{}.input_reshape@Heter".format(name))
# output
# var -> reshape -> var@Heter_SERVER_BLOCK@INPUT_RESHAPE_VAR -> concat -> Heter_SERVER_BLOCK_index@JOINT_VAR
for var_name in exit_var_list:
var = program.global_block().vars[var_name]
shape = var.shape
if len(shape) < 2 or shape[0] != -1:
raise ValueError(
"Variable {} not support heter training. its shape is {}".
format(var_name, shape))
send_reshape_dim = -1 * reduce(lambda x, y: x * y, shape)
output_var_reshape_dim.append(send_reshape_dim)
output_var_reshape_name.append("{}.output_reshape@Heter".format(
var_name))
info = {
"input_var_reshape_dim": input_var_reshape_dim,
"input_var_reshape_name": input_var_reshape_name,
"block_input_var_name": block_input_var_name,
"output_var_reshape_dim": output_var_reshape_dim,
"output_var_reshape_name": output_var_reshape_name,
"block_output_var_name": block_output_var_name
}
return info
def find_block_joints(program, program_block_ops_list, heter_ops):
block_var_detail = find_entrance_exit_private(program,
program_block_ops_list)
block_var_detail = entrance_exit_check(program, program_block_ops_list,
block_var_detail, heter_ops)
block_var_detail = delete_block_useless_exit(
program, program_block_ops_list, block_var_detail)
return block_var_detail
def find_entrance_exit_private(program, program_block_ops_list):
block_var_detail = []
persistables = []
for index, block_op_list in enumerate(program_block_ops_list):
block_input, block_output = find_ops_list_input_output(program,
block_op_list)
persistables = screen_persistables(
program, block_input) + screen_persistables(program, block_output)
# find entrance & exit
block_private_vars = list(set(block_input) & set(block_output))
block_entrance = list(set(block_input) - set(block_private_vars))
block_exit = list(set(block_output) - set(block_private_vars))
detail = {
"entrance": block_entrance,
"exit": block_exit,
"private": block_private_vars,
"persistables": persistables
}
block_var_detail.append(detail)
return block_var_detail
def entrance_exit_check(program, program_block_ops_list, block_var_detail,
heter_ops):
for index in range(len(block_var_detail) - 1, -1, -1):
if index - 1 < 0:
break
previous_block_exit = block_var_detail[index - 1]["exit"]
previous_block_exit.sort()
current_block_entrance = block_var_detail[index]["entrance"]
current_block_entrance.sort()
if previous_block_exit == current_block_entrance:
continue
exist_vars = list(
set(previous_block_exit) & set(current_block_entrance))
need_add_vars = list(set(current_block_entrance) - set(exist_vars))
need_add_vars = find_need_var_from_previous_block(
need_add_vars, block_var_detail, index, heter_ops)
previous_block_private = block_var_detail[index - 1]["private"]
previous_block_entrance = block_var_detail[index - 1]["entrance"]
for var in need_add_vars:
if var not in previous_block_private and var not in previous_block_entrance:
previous_block_entrance.append(var)
previous_block_exit.append(var)
return block_var_detail
def find_need_var_from_previous_block(need_add_vars, block_var_detail,
current_index, heter_ops):
# create index_device_map
index_device_map = {}
for index in range(len(block_var_detail)):
index_device_map[index] = DEFAULT_DEVICE
for device in heter_ops:
for index in heter_ops[device].keys():
index_device_map[index] = device
pre_index = current_index - 1
need_ignore_var = []
# if need_add_var in current device, no need communicate
for var in need_add_vars:
while (pre_index >= 0):
previous_block_private = block_var_detail[pre_index]["private"]
previous_block_exit = block_var_detail[pre_index]["exit"]
previous_block_entrance = block_var_detail[pre_index]["entrance"]
total_var = previous_block_private + previous_block_exit + previous_block_entrance
if var in total_var:
if index_device_map[current_index] == index_device_map[
pre_index] and index_device_map[
current_index] == DEFAULT_DEVICE:
need_ignore_var.append(var)
break
pre_index -= 1
need_add_vars = list(set(need_add_vars).difference(set(need_ignore_var)))
return need_add_vars
def delete_block_useless_exit(program, program_block_ops_list,
block_var_detail):
for index in range(len(block_var_detail)):
if index == len(block_var_detail) - 1:
break
current_block_exit = block_var_detail[index]["exit"]
next_block_entrance = block_var_detail[index + 1]["entrance"]
need_delete_var = []
for var in current_block_exit:
if var not in next_block_entrance:
need_delete_var.append(var)
for var in need_delete_var:
current_block_exit.remove(var)
return block_var_detail
def check_op_device(block, device):
for op in block.ops:
op._set_attr('op_device', device)
def screen_persistables(program, var_list):
need_remove = []
for var_name in var_list:
if "@GRAD" in var_name:
origin_var_name = var_name.split("@GRAD")[0]
var = program.global_block().vars[origin_var_name]
else:
var = program.global_block().vars[var_name]
if fluid.io.is_persistable(var):
need_remove.append(var_name)
for var_name in need_remove:
var_list.remove(var_name)
return need_remove
def insert_reshape_op(program,
block,
index,
var_name,
new_var_name,
new_var_shape=None):
input_var = program.global_block().vars[var_name]
if new_var_name not in program.global_block().vars:
out = program.global_block().create_var(
name=new_var_name,
shape=new_var_shape,
dtype=input_var.dtype,
type=input_var.type)
else:
out = program.global_block().vars[new_var_name]
new_var_shape = out.shape
x_shape = program.global_block().create_var(
name="{}.xshape@Heter".format(var_name), dtype=input_var.dtype)
block._insert_op(
index=index,
type="reshape2",
inputs={"X": input_var},
attrs={'shape': new_var_shape},
outputs={"Out": out,
"XShape": x_shape})
def insert_send_concat_op(program, block, index, var_name_list, new_var_name,
new_var_shape):
input_var_list = [
program.global_block().vars[var_name] for var_name in var_name_list
]
out = program.global_block().create_var(
name=new_var_name,
shape=new_var_shape,
dtype=input_var_list[0].dtype,
type=input_var_list[0].type)
block._insert_op(
index=index,
type='concat',
inputs={"X": input_var_list},
outputs={'Out': [out]},
attrs={'axis': -1,
'use_stack': False})
def insert_recv_slice_op(program, block, index, var_name, var_shape, dtype,
type, new_var_name_list, new_var_shape_list):
if var_name not in program.global_block().vars:
input_var = program.global_block().create_var(
name=var_name, shape=var_shape, dtype=dtype, type=type)
else:
input_var = program.global_block().vars[var_name]
out_list = []
for i in range(len(new_var_name_list)):
if new_var_name_list[i] not in program.global_block().vars:
out = program.global_block().create_var(
name=new_var_name_list[i],
shape=new_var_shape_list[i],
dtype=input_var.dtype,
type=input_var.type)
else:
out = program.global_block().vars[new_var_name_list[i]]
out_list.append(out)
start_index = 0
end_index = 0
for i in range(len(new_var_name_list)):
starts = []
ends = []
attrs = {'axes': [1]}
end_index += new_var_shape_list[i][1]
starts.append(start_index)
ends.append(end_index)
attrs['starts'] = starts
attrs['ends'] = ends
block._insert_op(
index=index,
type='slice',
inputs={'Input': input_var},
attrs=attrs,
outputs={'Out': out_list[i]})
start_index = end_index
index += 1
def deleter_trainer_useless_var(program):
porgram_useful_var_list = []
for op in program.global_block().ops:
input_var_list, output_var_list = find_op_input_output(
program, program.global_block(), op)
op_var_list = list(set(input_var_list).union(set(output_var_list)))
porgram_useful_var_list = list(
set(porgram_useful_var_list).union(set(op_var_list)))
program_useless_var_list = list(
set(get_vars_name_in_block(program.global_block())).difference(
set(porgram_useful_var_list)))
for var in program_useless_var_list:
program.global_block()._remove_var(var)
return program_useless_var_list
def block_append_op(program, origin_program, block, op):
inputs = _get_input_map_from_op(origin_program.global_block().vars, op)
for key, varlist in six.iteritems(inputs):
if not isinstance(varlist, list):
varlist = [varlist]
for var in varlist:
if var.name not in program.global_block().vars:
program.global_block()._clone_variable(var)
outputs = _get_output_map_from_op(origin_program.global_block().vars, op)
for key, varlist in six.iteritems(outputs):
if not isinstance(varlist, list):
varlist = [varlist]
for var in varlist:
if var.name not in program.global_block().vars:
program.global_block()._clone_variable(var)
if "_grad" not in op.type:
# for forward op
return block.append_op(
type=op.type, inputs=inputs, outputs=outputs, attrs=op.all_attrs())
else:
# for grad op
op_desc = op.desc
op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName()
backward = core.op_proto_and_checker_maker.OpRole.Backward
device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName()
# append grad op
new_op_desc = block.desc.append_op()
new_op_desc.copy_from(op_desc)
new_op_desc._set_attr(op_role_attr_name, backward)
# set device gard
if op.desc.has_attr(device_attr_name):
op_device = op_desc.attr(device_attr_name)
new_op_desc._set_attr(device_attr_name, op_device)
block._sync_with_cpp()
def add_vars_by_op_map(var_map, program):
for key, varlist in six.iteritems(var_map):
if not isinstance(varlist, list):
varlist = [varlist]
for i in range(len(varlist)):
var = varlist[i]
if var.name not in program.global_block().vars:
program.global_block()._clone_variable(var)
def add_vars_by_var_list(var_name_list, origin_program, program):
for var_name in var_name_list:
if var_name not in program.global_block().vars:
var = origin_program.global_block().vars[var_name]
program.global_block()._clone_variable(var)
def get_varlist_from_op_map(var_map):
var_list = []
for key, varlist in six.iteritems(var_map):
if not isinstance(varlist, list):
varlist = [varlist]
for i in range(len(varlist)):
var = varlist[i]
var_list.append(var.name)
return var_list
def find_ops_list_input_output(program, ops_list):
input_var_list = []
output_var_list = []
for op in ops_list:
inputs = _get_input_map_from_op(program.global_block().vars, op)
input_var_list += get_varlist_from_op_map(inputs)
outputs = _get_output_map_from_op(program.global_block().vars, op)
output_var_list += get_varlist_from_op_map(outputs)
input_var_list = list(set(input_var_list))
output_var_list = list(set(output_var_list))
return input_var_list, output_var_list
def find_op_input_output(program, block, op):
input_var_list = []
output_var_list = []
inputs = _get_input_map_from_op(block.vars, op)
input_var_list += get_varlist_from_op_map(inputs)
outputs = _get_output_map_from_op(block.vars, op)
output_var_list += get_varlist_from_op_map(outputs)
input_var_list = list(set(input_var_list))
output_var_list = list(set(output_var_list))
return input_var_list, output_var_list
def get_vars_name_in_block(block):
vars_list = block.vars.keys()
vars_name_list = [var_name for var_name in vars_list]
return vars_name_list
def is_same_op(op1, op2):
if str(op1) != str(op2):
return False
return True
def _get_input_map_from_op(varmap, op):
"""Returns a dict from op input name to the vars in varmap."""
iomap = collections.OrderedDict()
for key in op.input_names:
vars = []
for varname in op.input(key):
if varname == "@EMPTY@":
continue
if "lod_tensor_blocking_queue" in varname:
continue
vars.append(varmap[varname])
if len(vars) == 1:
iomap[key] = vars[0]
else:
iomap[key] = vars
return iomap
def _get_output_map_from_op(varmap, op):
"""Returns a dict from op output name to the vars in varmap."""
iomap = collections.OrderedDict()
for key in op.output_names:
vars = []
for varname in op.output(key):
if varname == "@EMPTY@":
continue
if "lod_tensor_blocking_queue" in varname:
continue
vars.append(varmap[varname])
if len(vars) == 1:
iomap[key] = vars[0]
else:
iomap[key] = vars
return iomap
def delete_same_ops(block, ops):
for op in ops:
try:
for origin_op in block.ops:
if is_same_op(origin_op, op):
idx = list(block.ops).index(origin_op)
block._remove_op(idx)
break
except Exception as e:
print(e)
......@@ -1858,6 +1858,7 @@ def conv3d(input,
return helper.append_activation(pre_act)
@deprecated(since="2.0.0", update_to="paddle.nn.functional.pool2d")
@templatedoc()
def pool2d(input,
pool_size=-1,
......@@ -2075,6 +2076,7 @@ def pool2d(input,
return pool_out
@deprecated(since="2.0.0", update_to="paddle.nn.functional.pool3d")
@templatedoc()
def pool3d(input,
pool_size=-1,
......@@ -2303,6 +2305,7 @@ def pool3d(input,
return pool_out
@deprecated(since="2.0.0", update_to="paddle.nn.functional.adaptive_pool2d")
@templatedoc(op_type="pool2d")
def adaptive_pool2d(input,
pool_size,
......@@ -2450,6 +2453,7 @@ def adaptive_pool2d(input,
return (pool_out, mask) if require_index else pool_out
@deprecated(since="2.0.0", update_to="paddle.nn.functional.adaptive_pool3d")
@templatedoc(op_type="pool3d")
def adaptive_pool3d(input,
pool_size,
......@@ -10205,6 +10209,7 @@ def unstack(x, axis=0, num=None):
return outs
@deprecated(since='2.0.0', update_to="paddle.expand")
def expand(x, expand_times, name=None):
"""
:alias_main: paddle.expand
......@@ -10312,6 +10317,7 @@ def expand(x, expand_times, name=None):
return out
@deprecated(since='2.0.0', update_to="paddle.expand_as")
def expand_as(x, target_tensor, name=None):
"""
:alias_main: paddle.expand_as
......@@ -10377,6 +10383,9 @@ def expand_as(x, target_tensor, name=None):
#(3,20)
"""
if in_dygraph_mode():
return core.ops.expand_as(x, target_tensor)
check_variable_and_dtype(
x, 'x', ['float32', 'float64', 'int32', 'int64', 'bool'], 'expand_as')
check_variable_and_dtype(target_tensor, 'target_tensor',
......@@ -15004,6 +15013,7 @@ def gather_tree(ids, parents):
return out
@deprecated(since="2.0.0", update_to="paddle.uniform")
@templatedoc()
def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0,
name=None):
......
......@@ -17,8 +17,9 @@ from __future__ import print_function
import os
import logging
import tarfile
import tempfile
import random
import warnings
import paddle
import paddle.fluid.incubate.data_generator as data_generator
......@@ -57,7 +58,7 @@ def load_dnn_input_record(sent):
def load_lr_input_record(sent):
res = []
for _ in [x.split(':') for x in sent.split()]:
res.append(int(_[0]))
res.append(int(_[0]) % 10000)
return res
......@@ -120,9 +121,62 @@ def prepare_data():
lr_input_dim = res[1]
logger.info('dnn input dim: %d' % dnn_input_dim)
logger.info('lr input dim: %d' % lr_input_dim)
return dnn_input_dim, lr_input_dim, train_file_path
def gen_fake_line(dnn_data_num=7,
dnn_data_range=1e5,
lr_data_num=5,
lr_data_range=1e5):
line = ""
# for deep data
for index in range(dnn_data_num):
data = str(random.randint(0, dnn_data_range - 1))
if index < dnn_data_num - 1:
data += " "
line += data
line += "\t"
# for wide data
for index in range(lr_data_num):
data = str(random.randint(0, lr_data_range - 1)) + ":" + str(1)
if index < lr_data_num - 1:
data += " "
line += data
line += "\t"
# for label
line += str(random.randint(0, 1))
line += "\n"
return line
def prepare_fake_data(file_nums=8, file_lines=1000):
"""
Create fake data with same type as avazu_ctr_data
"""
file_dir = tempfile.mkdtemp()
warnings.warn("Fake data write in {}".format(file_dir))
for file_index in range(file_nums):
with open(
os.path.join(file_dir,
"ctr_train_data_part_{}".format(file_index)),
'w+') as fin:
file_str = ""
for line_index in range(file_lines):
file_str += gen_fake_line()
fin.write(file_str)
warnings.warn("Write done ctr_train_data_part_{}".format(
file_index))
file_list = [os.path.join(file_dir, x) for x in os.listdir(file_dir)]
assert len(file_list) == file_nums
return file_list
if __name__ == "__main__":
pairwise_reader = DatasetCtrReader()
pairwise_reader.run_from_stdin()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Distribute CTR model for test fleet api
"""
from __future__ import print_function
import shutil
import tempfile
import time
import paddle
import paddle.fluid as fluid
import os
import numpy as np
import ctr_dataset_reader
from test_dist_fleet_heter_base import runtime_main, FleetDistHeterRunnerBase
from dist_fleet_ctr import TestDistCTR2x2, fake_ctr_reader
from paddle.distributed.fleet.base.util_factory import fleet_util
# Fix seed for test
fluid.default_startup_program().random_seed = 1
fluid.default_main_program().random_seed = 1
class TestHeterPsCTR2x2(FleetDistHeterRunnerBase):
"""
For test CTR model, using Fleet api
"""
def net(self, args, batch_size=4, lr=0.01):
"""
network definition
Args:
batch_size(int): the size of mini-batch for training
lr(float): learning rate of training
Returns:
avg_cost: LoDTensor of cost.
"""
dnn_input_dim, lr_input_dim = int(1e5), int(1e5)
dnn_data = fluid.layers.data(
name="dnn_data",
shape=[-1, 1],
dtype="int64",
lod_level=1,
append_batch_size=False)
lr_data = fluid.layers.data(
name="lr_data",
shape=[-1, 1],
dtype="int64",
lod_level=1,
append_batch_size=False)
label = fluid.layers.data(
name="click",
shape=[-1, 1],
dtype="float32",
lod_level=0,
append_batch_size=False)
datas = [dnn_data, lr_data, label]
if args.reader == "pyreader":
self.reader = fluid.io.PyReader(
feed_list=datas,
capacity=64,
iterable=False,
use_double_buffer=False)
# build dnn model
dnn_layer_dims = [128, 64, 32, 1]
dnn_embedding = fluid.layers.embedding(
is_distributed=False,
input=dnn_data,
size=[dnn_input_dim, dnn_layer_dims[0]],
param_attr=fluid.ParamAttr(
name="deep_embedding",
initializer=fluid.initializer.Constant(value=0.01)),
is_sparse=True)
dnn_pool = fluid.layers.sequence_pool(
input=dnn_embedding, pool_type="sum")
dnn_out = dnn_pool
# build lr model
lr_embbding = fluid.layers.embedding(
is_distributed=False,
input=lr_data,
size=[lr_input_dim, 1],
param_attr=fluid.ParamAttr(
name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01)),
is_sparse=True)
lr_pool = fluid.layers.sequence_pool(input=lr_embbding, pool_type="sum")
with fluid.device_guard("gpu"):
for i, dim in enumerate(dnn_layer_dims[1:]):
fc = fluid.layers.fc(
input=dnn_out,
size=dim,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)),
name='dnn-fc-%d' % i)
dnn_out = fc
merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
label = fluid.layers.cast(label, dtype="int64")
predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
fluid.layers.Print(avg_cost, message="avg_cost")
self.feeds = datas
self.train_file_path = ["fake1", "fake2"]
self.avg_cost = avg_cost
self.predict = predict
return avg_cost
def check_model_right(self, dirname):
model_filename = os.path.join(dirname, "__model__")
with open(model_filename, "rb") as f:
program_desc_str = f.read()
program = fluid.Program.parse_from_string(program_desc_str)
with open(os.path.join(dirname, "__model__.proto"), "w") as wn:
wn.write(str(program))
def do_pyreader_training(self, fleet):
"""
do training using dataset, using fetch handler to catch variable
Args:
fleet(Fleet api): the fleet object of Parameter Server, define distribute training role
"""
exe = fluid.Executor(fluid.CPUPlace())
fleet.init_worker()
exe.run(fluid.default_startup_program())
batch_size = 4
train_reader = paddle.batch(fake_ctr_reader(), batch_size=batch_size)
self.reader.decorate_sample_list_generator(train_reader)
for epoch_id in range(1):
self.reader.start()
try:
pass_start = time.time()
while True:
exe.run(program=fluid.default_main_program())
pass_time = time.time() - pass_start
except fluid.core.EOFException:
self.reader.reset()
fleet.stop_worker()
def do_dataset_training(self, fleet):
train_file_list = ctr_dataset_reader.prepare_fake_data()
exe = fluid.Executor(fluid.CPUPlace())
fleet.init_worker()
exe.run(fluid.default_startup_program())
thread_num = 1
batch_size = 128
filelist = fleet_util.get_file_shard(train_file_list)
print("filelist: {}".format(filelist))
# config dataset
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset()
dataset.set_batch_size(batch_size)
dataset.set_use_var(self.feeds)
pipe_command = 'python ctr_dataset_reader.py'
dataset.set_pipe_command(pipe_command)
dataset.set_filelist(filelist)
dataset.set_thread(thread_num)
for epoch_id in range(1):
pass_start = time.time()
dataset.set_filelist(filelist)
exe.train_from_dataset(
program=fluid.default_main_program(),
dataset=dataset,
fetch_list=[self.avg_cost],
fetch_info=["cost"],
print_period=2,
debug=int(os.getenv("Debug", "0")))
pass_time = time.time() - pass_start
print("do_dataset_training done. using time {}".format(pass_time))
if os.getenv("SAVE_MODEL") == "1":
model_dir = tempfile.mkdtemp()
fleet.save_inference_model(exe, model_dir,
[feed.name for feed in self.feeds],
self.avg_cost)
self.check_model_right(model_dir)
shutil.rmtree(model_dir)
fleet.stop_worker()
print("do_dataset_training stop worker.")
if __name__ == "__main__":
runtime_main(TestHeterPsCTR2x2)
......@@ -17,6 +17,8 @@ from __future__ import print_function
import unittest
import numpy as np
from op_test import OpTest
import paddle
import paddle.fluid as fluid
class TestAdadeltaOp1(OpTest):
......@@ -108,5 +110,54 @@ class TestAdadeltaOp2(OpTest):
self.check_output()
class TestAdadeltaV2(unittest.TestCase):
def test_adadelta_dygraph(self):
paddle.disable_static(paddle.CPUPlace())
value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5)
# This can be any optimizer supported by dygraph.
adam = paddle.optimizer.Adadelta(
learning_rate=0.01,
parameters=linear.parameters(),
weight_decay=0.01)
out = linear(a)
out.backward()
adam.step()
adam.clear_gradients()
def test_adadelta(self):
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
rms_optimizer = paddle.optimizer.Adadelta(learning_rate=0.1)
rms_optimizer.minimize(avg_cost)
fetch_list = [avg_cost]
train_reader = paddle.batch(
paddle.dataset.uci_housing.train(), batch_size=1)
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
for data in train_reader():
exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list)
def test_raise_error(self):
self.assertRaises(ValueError, paddle.optimizer.Adadelta, None)
self.assertRaises(
ValueError, paddle.optimizer.Adadelta, learning_rate=0.1, rho=None)
self.assertRaises(
ValueError,
paddle.optimizer.Adadelta,
learning_rate=0.1,
epsilon=None)
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import unittest
import numpy as np
from op_test import OpTest
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard
import paddle
import paddle.nn.functional as F
import paddle.fluid as fluid
def adaptive_start_index(index, input_size, output_size):
return int(np.floor(index * input_size / output_size))
def adaptive_end_index(index, input_size, output_size):
return int(np.ceil((index + 1) * input_size / output_size))
def avg_pool1D_forward_naive(x,
ksize,
strides,
paddings,
global_pool=0,
ceil_mode=False,
exclusive=False,
adaptive=False,
data_type=np.float64):
N, C, L = x.shape
if global_pool == 1:
ksize = [L]
if adaptive:
L_out = ksize[0]
else:
L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1
) // strides[0] + 1 if ceil_mode else (
L - ksize[0] + 2 * paddings[0]) // strides[0] + 1
out = np.zeros((N, C, L_out))
for i in range(L_out):
if adaptive:
r_start = adaptive_start_index(i, L, ksize[0])
r_end = adaptive_end_index(i, L, ksize[0])
else:
r_start = np.max((i * strides[0] - paddings[0], 0))
r_end = np.min((i * strides[0] + ksize[0] - paddings[0], L))
x_masked = x[:, :, r_start:r_end]
field_size = (r_end - r_start) \
if (exclusive or adaptive) else (ksize[0])
if data_type == np.int8 or data_type == np.uint8:
out[:, :, i] = (np.rint(
np.sum(x_masked, axis=(2, 3)) / field_size)).astype(data_type)
else:
out[:, :, i] = (np.sum(x_masked, axis=(2)) /
field_size).astype(data_type)
return out
class TestPool1d_API(unittest.TestCase):
def setUp(self):
np.random.seed(123)
self.places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
self.places.append(fluid.CUDAPlace(0))
def check_adaptive_avg_dygraph_results(self, place):
with fluid.dygraph.guard(place):
input_np = np.random.random([2, 3, 32]).astype("float32")
input = fluid.dygraph.to_variable(input_np)
result = F.adaptive_avg_pool1d(input, output_size=16)
result_np = avg_pool1D_forward_naive(
input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True)
self.assertTrue(np.allclose(result.numpy(), result_np))
ada_max_pool1d_dg = paddle.nn.layer.AdaptiveAvgPool1d(
output_size=16)
result = ada_max_pool1d_dg(input)
self.assertTrue(np.allclose(result.numpy(), result_np))
def check_adaptive_avg_static_results(self, place):
with fluid.program_guard(fluid.Program(), fluid.Program()):
input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32")
result = F.adaptive_avg_pool1d(input, output_size=16)
input_np = np.random.random([2, 3, 32]).astype("float32")
result_np = avg_pool1D_forward_naive(
input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True)
exe = fluid.Executor(place)
fetches = exe.run(fluid.default_main_program(),
feed={"input": input_np},
fetch_list=[result])
self.assertTrue(np.allclose(fetches[0], result_np))
def test_adaptive_avg_pool1d(self):
for place in self.places:
self.check_adaptive_avg_dygraph_results(place)
self.check_adaptive_avg_static_results(place)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import unittest
from op_test import OpTest
import paddle.fluid.core as core
from paddle.fluid import compiler, Program, program_guard
import paddle
import paddle.nn.functional as F
import paddle.fluid as fluid
def adaptive_start_index(index, input_size, output_size):
return int(np.floor(index * input_size / output_size))
def adaptive_end_index(index, input_size, output_size):
return int(np.ceil((index + 1) * input_size / output_size))
def max_pool1D_forward_naive(x,
ksize,
strides,
paddings,
global_pool=0,
ceil_mode=False,
exclusive=False,
adaptive=False,
data_type=np.float64):
N, C, L = x.shape
if global_pool == 1:
ksize = [L]
if adaptive:
L_out = ksize[0]
else:
L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1
) // strides[0] + 1 if ceil_mode else (
L - ksize[0] + 2 * paddings[0]) // strides[0] + 1
out = np.zeros((N, C, L_out))
for i in range(L_out):
if adaptive:
r_start = adaptive_start_index(i, L, ksize[0])
r_end = adaptive_end_index(i, L, ksize[0])
else:
r_start = np.max((i * strides[0] - paddings[0], 0))
r_end = np.min((i * strides[0] + ksize[0] - paddings[0], L))
x_masked = x[:, :, r_start:r_end]
out[:, :, i] = np.max(x_masked, axis=(2))
return out
class TestPool1d_API(unittest.TestCase):
def setUp(self):
np.random.seed(123)
self.places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
self.places.append(fluid.CUDAPlace(0))
def check_adaptive_max_dygraph_results(self, place):
with fluid.dygraph.guard(place):
input_np = np.random.random([2, 3, 32]).astype("float32")
input = fluid.dygraph.to_variable(input_np)
result = F.adaptive_max_pool1d(input, output_size=16)
result_np = max_pool1D_forward_naive(
input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True)
self.assertTrue(np.allclose(result.numpy(), result_np))
ada_max_pool1d_dg = paddle.nn.layer.AdaptiveMaxPool1d(
output_size=16)
result = ada_max_pool1d_dg(input)
self.assertTrue(np.allclose(result.numpy(), result_np))
def check_adaptive_max_static_results(self, place):
with fluid.program_guard(fluid.Program(), fluid.Program()):
input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32")
result = F.adaptive_max_pool1d(input, output_size=16)
input_np = np.random.random([2, 3, 32]).astype("float32")
result_np = max_pool1D_forward_naive(
input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True)
exe = fluid.Executor(place)
fetches = exe.run(fluid.default_main_program(),
feed={"input": input_np},
fetch_list=[result])
self.assertTrue(np.allclose(fetches[0], result_np))
def test_adaptive_max_pool1d(self):
for place in self.places:
self.check_adaptive_max_dygraph_results(place)
self.check_adaptive_max_static_results(place)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from __future__ import division
import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
import paddle
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
def adaptive_start_index(index, input_size, output_size):
return int(np.floor(index * input_size / output_size))
def adaptive_end_index(index, input_size, output_size):
return int(np.ceil((index + 1) * input_size / output_size))
def adaptive_pool2d_forward(x, output_size, data_format='NCHW',
pool_type="max"):
N = x.shape[0]
C, H, W = [x.shape[1], x.shape[2], x.shape[3]] if data_format == 'NCHW' \
else [x.shape[3], x.shape[1], x.shape[2]]
if (isinstance(output_size, int) or output_size == None):
H_out = output_size
W_out = output_size
output_size = [H_out, W_out]
else:
H_out, W_out = output_size
if output_size[0] == None:
output_size[0] = H
H_out = H
if output_size[1] == None:
output_size[1] = W
W_out = W
out = np.zeros((N, C, H_out, W_out)) if data_format=='NCHW' \
else np.zeros((N, H_out, W_out, C))
for i in range(H_out):
in_h_start = adaptive_start_index(i, H, output_size[0])
in_h_end = adaptive_end_index(i, H, output_size[0])
for j in range(W_out):
in_w_start = adaptive_start_index(j, W, output_size[1])
in_w_end = adaptive_end_index(j, W, output_size[1])
if data_format == 'NCHW':
x_masked = x[:, :, in_h_start:in_h_end, in_w_start:in_w_end]
if pool_type == 'avg':
field_size = (
(in_h_end - in_h_start) * (in_w_end - in_w_start))
out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size
elif pool_type == 'max':
out[:, :, i, j] = np.max(x_masked, axis=(2, 3))
elif data_format == 'NHWC':
x_masked = x[:, in_h_start:in_h_end, in_w_start:in_w_end, :]
if pool_type == 'avg':
field_size = (
(in_h_end - in_h_start) * (in_w_end - in_w_start))
out[:, i, j, :] = np.sum(x_masked, axis=(1, 2)) / field_size
elif pool_type == 'max':
out[:, i, j, :] = np.max(x_masked, axis=(1, 2))
return out
class TestAdaptiveMaxPool2dAPI(unittest.TestCase):
def setUp(self):
self.x_np = np.random.random([2, 3, 7, 7]).astype("float32")
self.res_1_np = adaptive_pool2d_forward(
x=self.x_np, output_size=[3, 3], pool_type="max")
self.res_2_np = adaptive_pool2d_forward(
x=self.x_np, output_size=5, pool_type="max")
self.res_3_np = adaptive_pool2d_forward(
x=self.x_np, output_size=[2, 5], pool_type="max")
"""
self.res_4_np = adaptive_pool2d_forward(
x=self.x_np,
output_size=[3, 3],
pool_type="max",
data_format="NHWC")
"""
self.res_5_np = adaptive_pool2d_forward(
x=self.x_np, output_size=[None, 3], pool_type="max")
def test_static_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.enable_static()
x = paddle.data(name="x", shape=[2, 3, 7, 7], dtype="float32")
out_1 = paddle.nn.functional.adaptive_max_pool2d(
x=x, output_size=[3, 3])
out_2 = paddle.nn.functional.adaptive_max_pool2d(x=x, output_size=5)
out_3 = paddle.nn.functional.adaptive_max_pool2d(
x=x, output_size=[2, 5])
#out_4 = paddle.nn.functional.adaptive_max_pool2d(
# x=x, output_size=[3, 3], data_format="NHWC")
out_5 = paddle.nn.functional.adaptive_max_pool2d(
x=x, output_size=[None, 3])
exe = paddle.static.Executor(place=place)
[res_1, res_2, res_3, res_5] = exe.run(
fluid.default_main_program(),
feed={"x": self.x_np},
fetch_list=[out_1, out_2, out_3, out_5])
assert np.allclose(res_1, self.res_1_np)
assert np.allclose(res_2, self.res_2_np)
assert np.allclose(res_3, self.res_3_np)
#assert np.allclose(res_4, self.res_4_np)
assert np.allclose(res_5, self.res_5_np)
def test_dynamic_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np)
out_1 = paddle.nn.functional.adaptive_max_pool2d(
x=x, return_indices=False, output_size=[3, 3])
out_2 = paddle.nn.functional.adaptive_max_pool2d(x=x, output_size=5)
out_3 = paddle.nn.functional.adaptive_max_pool2d(
x=x, output_size=[2, 5])
#out_4 = paddle.nn.functional.adaptive_max_pool2d(
# x=x, output_size=[3, 3], data_format="NHWC")
out_5 = paddle.nn.functional.adaptive_max_pool2d(
x=x, output_size=[None, 3])
assert np.allclose(out_1.numpy(), self.res_1_np)
assert np.allclose(out_2.numpy(), self.res_2_np)
assert np.allclose(out_3.numpy(), self.res_3_np)
#assert np.allclose(out_4.numpy(), self.res_4_np)
assert np.allclose(out_5.numpy(), self.res_5_np)
class TestAdaptiveMaxPool2dClassAPI(unittest.TestCase):
def setUp(self):
self.x_np = np.random.random([2, 3, 7, 7]).astype("float32")
self.res_1_np = adaptive_pool2d_forward(
x=self.x_np, output_size=[3, 3], pool_type="max")
self.res_2_np = adaptive_pool2d_forward(
x=self.x_np, output_size=5, pool_type="max")
self.res_3_np = adaptive_pool2d_forward(
x=self.x_np, output_size=[2, 5], pool_type="max")
#self.res_4_np = adaptive_pool2d_forward(
# x=self.x_np,
# output_size=[3, 3],
# pool_type="max",
# data_format="NHWC")
self.res_5_np = adaptive_pool2d_forward(
x=self.x_np, output_size=[None, 3], pool_type="max")
def test_static_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.enable_static()
x = paddle.data(name="x", shape=[2, 3, 7, 7], dtype="float32")
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[3, 3])
out_1 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=5)
out_2 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[2, 5])
out_3 = adaptive_max_pool(x=x)
# adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(
# output_size=[3, 3], data_format="NHWC")
# out_4 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(
output_size=[None, 3])
out_5 = adaptive_max_pool(x=x)
exe = paddle.static.Executor(place=place)
[res_1, res_2, res_3, res_5] = exe.run(
fluid.default_main_program(),
feed={"x": self.x_np},
fetch_list=[out_1, out_2, out_3, out_5])
assert np.allclose(res_1, self.res_1_np)
assert np.allclose(res_2, self.res_2_np)
assert np.allclose(res_3, self.res_3_np)
#assert np.allclose(res_4, self.res_4_np)
assert np.allclose(res_5, self.res_5_np)
def test_dynamic_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[3, 3])
out_1 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=5)
out_2 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[2, 5])
out_3 = adaptive_max_pool(x=x)
#adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(
# output_size=[3, 3], data_format="NHWC")
#out_4 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(
output_size=[None, 3])
out_5 = adaptive_max_pool(x=x)
assert np.allclose(out_1.numpy(), self.res_1_np)
assert np.allclose(out_2.numpy(), self.res_2_np)
assert np.allclose(out_3.numpy(), self.res_3_np)
#assert np.allclose(out_4.numpy(), self.res_4_np)
assert np.allclose(out_5.numpy(), self.res_5_np)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from __future__ import division
import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
import paddle
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
def adaptive_start_index(index, input_size, output_size):
return int(np.floor(index * input_size / output_size))
def adaptive_end_index(index, input_size, output_size):
return int(np.ceil((index + 1) * input_size / output_size))
def adaptive_pool3d_forward(x,
output_size,
adaptive=True,
data_format='NCDHW',
pool_type='max'):
N = x.shape[0]
C, D, H, W = [x.shape[1], x.shape[2], x.shape[3], x.shape[4]] \
if data_format == 'NCDHW' else [x.shape[4], x.shape[1], x.shape[2],x.shape[3]]
if (isinstance(output_size, int) or output_size == None):
H_out = output_size
W_out = output_size
D_out = output_size
output_size = [D_out, H_out, W_out]
else:
D_out, H_out, W_out = output_size
if output_size[0] == None:
output_size[0] = D
D_out = D
if output_size[1] == None:
output_size[1] = H
H_out = H
if output_size[2] == None:
output_size[2] = W
W_out = W
out = np.zeros((N, C, D_out, H_out, W_out)) if data_format=='NCDHW' \
else np.zeros((N, D_out, H_out, W_out, C))
for k in range(D_out):
d_start = adaptive_start_index(k, D, output_size[0])
d_end = adaptive_end_index(k, D, output_size[0])
for i in range(H_out):
h_start = adaptive_start_index(i, H, output_size[1])
h_end = adaptive_end_index(i, H, output_size[1])
for j in range(W_out):
w_start = adaptive_start_index(j, W, output_size[2])
w_end = adaptive_end_index(j, W, output_size[2])
if data_format == 'NCDHW':
x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:
w_end]
if pool_type == 'avg':
field_size = (d_end - d_start) * (h_end - h_start) * (
w_end - w_start)
out[:, :, k, i, j] = np.sum(x_masked,
axis=(2, 3, 4)) / field_size
elif pool_type == 'max':
out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4))
elif data_format == 'NDHWC':
x_masked = x[:, d_start:d_end, h_start:h_end, w_start:
w_end, :]
if pool_type == 'avg':
field_size = (d_end - d_start) * (h_end - h_start) * (
w_end - w_start)
out[:, k, i, j, :] = np.sum(x_masked,
axis=(1, 2, 3)) / field_size
elif pool_type == 'max':
out[:, k, i, j, :] = np.max(x_masked, axis=(1, 2, 3))
return out
class TestAdaptiveMaxPool3dAPI(unittest.TestCase):
def setUp(self):
self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32")
self.res_1_np = adaptive_pool3d_forward(
x=self.x_np, output_size=[3, 3, 3], pool_type="max")
self.res_2_np = adaptive_pool3d_forward(
x=self.x_np, output_size=5, pool_type="max")
self.res_3_np = adaptive_pool3d_forward(
x=self.x_np, output_size=[2, 3, 5], pool_type="max")
self.res_4_np = adaptive_pool3d_forward(
x=self.x_np,
output_size=[3, 3, 3],
pool_type="max",
data_format="NDHWC")
self.res_5_np = adaptive_pool3d_forward(
x=self.x_np, output_size=[None, 3, None], pool_type="max")
def test_static_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.enable_static()
x = paddle.data(name="x", shape=[2, 3, 5, 7, 7], dtype="float32")
out_1 = paddle.nn.functional.adaptive_max_pool3d(
x=x, output_size=[3, 3, 3])
out_2 = paddle.nn.functional.adaptive_max_pool3d(x=x, output_size=5)
out_3 = paddle.nn.functional.adaptive_max_pool3d(
x=x, output_size=[2, 3, 5])
#out_4 = paddle.nn.functional.adaptive_max_pool3d(
# x=x, output_size=[3, 3, 3], data_format="NDHWC")
out_5 = paddle.nn.functional.adaptive_max_pool3d(
x=x, output_size=[None, 3, None])
exe = paddle.static.Executor(place=place)
[res_1, res_2, res_3, res_5] = exe.run(
fluid.default_main_program(),
feed={"x": self.x_np},
fetch_list=[out_1, out_2, out_3, out_5])
assert np.allclose(res_1, self.res_1_np)
assert np.allclose(res_2, self.res_2_np)
assert np.allclose(res_3, self.res_3_np)
#assert np.allclose(res_4, self.res_4_np)
assert np.allclose(res_5, self.res_5_np)
def test_dynamic_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np)
out_1 = paddle.nn.functional.adaptive_max_pool3d(
x=x, output_size=[3, 3, 3])
out_2 = paddle.nn.functional.adaptive_max_pool3d(x=x, output_size=5)
out_3 = paddle.nn.functional.adaptive_max_pool3d(
x=x, output_size=[2, 3, 5])
#out_4 = paddle.nn.functional.adaptive_max_pool3d(
# x=x, output_size=[3, 3, 3], data_format="NDHWC")
out_5 = paddle.nn.functional.adaptive_max_pool3d(
x=x, output_size=[None, 3, None])
assert np.allclose(out_1.numpy(), self.res_1_np)
assert np.allclose(out_2.numpy(), self.res_2_np)
assert np.allclose(out_3.numpy(), self.res_3_np)
#assert np.allclose(out_4.numpy(), self.res_4_np)
assert np.allclose(out_5.numpy(), self.res_5_np)
class TestAdaptiveMaxPool3dClassAPI(unittest.TestCase):
def setUp(self):
self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32")
self.res_1_np = adaptive_pool3d_forward(
x=self.x_np, output_size=[3, 3, 3], pool_type="max")
self.res_2_np = adaptive_pool3d_forward(
x=self.x_np, output_size=5, pool_type="max")
self.res_3_np = adaptive_pool3d_forward(
x=self.x_np, output_size=[2, 3, 5], pool_type="max")
# self.res_4_np = adaptive_pool3d_forward(
# x=self.x_np,
# output_size=[3, 3, 3],
# pool_type="max",
# data_format="NDHWC")
self.res_5_np = adaptive_pool3d_forward(
x=self.x_np, output_size=[None, 3, None], pool_type="max")
def test_static_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.enable_static()
x = paddle.data(name="x", shape=[2, 3, 5, 7, 7], dtype="float32")
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
output_size=[3, 3, 3])
out_1 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(output_size=5)
out_2 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
output_size=[2, 3, 5])
out_3 = adaptive_max_pool(x=x)
# adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
# output_size=[3, 3, 3], data_format="NDHWC")
# out_4 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
output_size=[None, 3, None])
out_5 = adaptive_max_pool(x=x)
exe = paddle.static.Executor(place=place)
[res_1, res_2, res_3, res_5] = exe.run(
fluid.default_main_program(),
feed={"x": self.x_np},
fetch_list=[out_1, out_2, out_3, out_5])
assert np.allclose(res_1, self.res_1_np)
assert np.allclose(res_2, self.res_2_np)
assert np.allclose(res_3, self.res_3_np)
# assert np.allclose(res_4, self.res_4_np)
assert np.allclose(res_5, self.res_5_np)
def test_dynamic_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
output_size=[3, 3, 3])
out_1 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(output_size=5)
out_2 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
output_size=[2, 3, 5])
out_3 = adaptive_max_pool(x=x)
# adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
# output_size=[3, 3, 3], data_format="NDHWC")
# out_4 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
output_size=[None, 3, None])
out_5 = adaptive_max_pool(x=x)
assert np.allclose(out_1.numpy(), self.res_1_np)
assert np.allclose(out_2.numpy(), self.res_2_np)
assert np.allclose(out_3.numpy(), self.res_3_np)
# assert np.allclose(out_4.numpy(), self.res_4_np)
assert np.allclose(out_5.numpy(), self.res_5_np)
if __name__ == '__main__':
unittest.main()
......@@ -85,10 +85,35 @@ class TestBatchNorm(unittest.TestCase):
y = bn(fluid.dygraph.to_variable(x))
return y.numpy()
def compute_v3(x, is_test, trainable_statistics):
with fluid.dygraph.guard(p):
bn = fluid.dygraph.BatchNorm(
shape[1],
is_test=is_test,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0),
trainable=False),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0),
trainable=False),
trainable_statistics=trainable_statistics)
y = bn(fluid.dygraph.to_variable(x))
return y.numpy()
def compute_v4(x):
with fluid.dygraph.guard(p):
bn = paddle.nn.BatchNorm2d(
shape[1], weight_attr=False, bias_attr=False)
y = bn(fluid.dygraph.to_variable(x))
return y.numpy()
x = np.random.randn(*shape).astype("float32")
y1 = compute_v1(x, False, False)
y2 = compute_v2(x)
y3 = compute_v3(x, False, False)
y4 = compute_v4(x)
self.assertTrue(np.allclose(y1, y2))
self.assertTrue(np.allclose(y3, y4))
def test_static(self):
places = [fluid.CPUPlace()]
......
......@@ -166,12 +166,16 @@ class TestClipAPI(unittest.TestCase):
data_shape = [1, 9, 9, 4]
data = np.random.random(data_shape).astype('float32')
images = paddle.to_variable(data, dtype='float32')
v_min = paddle.to_variable(np.array([0.2], dtype=np.float32))
v_max = paddle.to_variable(np.array([0.8], dtype=np.float32))
out_1 = paddle.clip(images, min=0.2, max=0.8)
out_2 = paddle.clip(images, min=0.2, max=0.9)
out_3 = paddle.clip(images, min=v_min, max=v_max)
self.assertTrue(np.allclose(out_1.numpy(), data.clip(0.2, 0.8)))
self.assertTrue(np.allclose(out_2.numpy(), data.clip(0.2, 0.9)))
self.assertTrue(np.allclose(out_3.numpy(), data.clip(0.2, 0.8)))
def test_errors(self):
paddle.enable_static()
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
"""
high level unit test for distribute fleet.
"""
import os
import sys
import subprocess
import six
import shutil
import numpy as np
import argparse
from contextlib import closing
import socket
import time
import tempfile
import unittest
import paddle
import paddle.fluid as fluid
import paddle.distributed.fleet.base.role_maker as role_maker
from paddle.distributed.fleet.base.util_factory import fleet_util
from paddle.distributed.fleet import fleet
__all__ = ['FleetDistHeterRunnerBase', 'TestFleetHeterBase', 'runtime_main']
RUN_STEP = 5
LEARNING_RATE = 0.01
DIST_UT_PORT = 0
class FleetDistHeterRunnerBase(object):
"""
run_pserver,run_trainer : after init role, using transpiler split program
net : implment by child class, the network of model
do training : exe run program
"""
def build_role(self, args):
environs = {}
environs["PADDLE_PSERVERS_IP_PORT_LIST"] = args.endpoints
environs["PADDLE_TRAINER_ENDPOINTS"] = args.trainer_endpoints
environs[
"PADDLE_HETER_TRAINER_IP_PORT_LIST"] = args.heter_trainer_endpoints
environs["PADDLE_HETER_TRAINER_DEVICE"] = args.heter_trainer_device
environs["TRAINING_ROLE"] = args.role.upper()
environs["PADDLE_TRAINERS_NUM"] = args.trainers
environs["PADDLE_TRAINER_ID"] = args.current_id
if args.role.upper() == "PSERVER":
environs["POD_IP"] = args.endpoints.split(",")[int(
args.current_id)].split(":")[0]
environs["PADDLE_PORT"] = args.endpoints.split(",")[int(
args.current_id)].split(":")[1]
elif args.role.upper() == "HETER_TRAINER":
environs["POD_IP"] = args.heter_trainer_endpoints.split(",")[int(
args.current_id)].split(":")[0]
environs["PADDLE_PORT"] = args.heter_trainer_endpoints.split(",")[
int(args.current_id)].split(":")[1]
environs["FLAGS_selected_gpus"] = args.current_id
for k, v in environs.items():
os.environ[k] = str(v)
self.role = role_maker.PaddleCloudRoleMaker()
return self.role
def build_strategy(self, args):
self.strategy = paddle.distributed.fleet.DistributedStrategy()
self.strategy.a_sync = True
return self.strategy
def build_optimizer(self, avg_cost, strategy):
optimizer = fluid.optimizer.SGD(LEARNING_RATE)
optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
optimizer.minimize(avg_cost)
def run_pserver(self, args):
fleet.init_server()
fleet.run_server()
def run_dataset_trainer(self, args):
out = self.do_dataset_training(fleet)
def run_pyreader_trainer(self, args):
out = self.do_pyreader_training(fleet)
def net(self, args, batch_size=4, lr=0.01):
raise NotImplementedError(
"get_model should be implemented by child classes.")
def do_dataset_training(self, fleet):
raise NotImplementedError(
"do_dataset_training should be implemented by child classes.")
def do_pyreader_training(self, fleet):
raise NotImplementedError(
"do_pyreader_training should be implemented by child classes.")
class TestFleetHeterBase(unittest.TestCase):
"""
start_pserver,start_trainer : add start cmd to test
run_cluster : using multi process to test distribute program
"""
def _setup_config(self):
raise NotImplementedError("tests should have _setup_config implemented")
def tearDown(self):
t = time.time() - self.startTime
print('%s: %.3f' % (self.__class__.__name__, t))
def setUp(self):
self.startTime = time.time()
self._mode = "async"
self._reader = "pyreader"
self._trainers = 2
self._pservers = 2
self._port_set = set()
self._heter_device = "gpu"
global DIST_UT_PORT
if DIST_UT_PORT == 0 and os.getenv("PADDLE_DIST_UT_PORT"):
DIST_UT_PORT = int(os.getenv("PADDLE_DIST_UT_PORT"))
if DIST_UT_PORT:
print("set begin_port:", DIST_UT_PORT)
self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
DIST_UT_PORT, DIST_UT_PORT + 1)
self._tr_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
DIST_UT_PORT + 2, DIST_UT_PORT + 3)
self._heter_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
DIST_UT_PORT + 4, DIST_UT_PORT + 5)
DIST_UT_PORT += 6
else:
self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
self._find_free_port(), self._find_free_port())
self._tr_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
self._find_free_port(), self._find_free_port())
self._heter_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
self._find_free_port(), self._find_free_port())
self._python_interp = sys.executable
self._geo_sgd_need_push_nums = 5
self._grad_clip_mode = 0
self._setup_config()
def _find_free_port(self):
def __free_port():
with closing(socket.socket(socket.AF_INET,
socket.SOCK_STREAM)) as s:
s.bind(('', 0))
return s.getsockname()[1]
while True:
port = __free_port()
if port not in self._port_set:
self._port_set.add(port)
return port
def _start_pserver(self, cmd, required_envs):
ps0_cmd, ps1_cmd = cmd.format(0), cmd.format(1)
ps0_pipe = open(tempfile.gettempdir() + "/ps0_err.log", "wb+")
ps1_pipe = open(tempfile.gettempdir() + "/ps1_err.log", "wb+")
ps0_proc = subprocess.Popen(
ps0_cmd.strip().split(" "),
stdout=subprocess.PIPE,
stderr=ps0_pipe,
env=required_envs)
ps1_proc = subprocess.Popen(
ps1_cmd.strip().split(" "),
stdout=subprocess.PIPE,
stderr=ps1_pipe,
env=required_envs)
return ps0_proc, ps1_proc, ps0_pipe, ps1_pipe
def _start_trainer(self, cmd, required_envs):
tr0_cmd, tr1_cmd = cmd.format(0), cmd.format(1)
tr0_pipe = open(tempfile.gettempdir() + "/tr0_err.log", "wb+")
tr1_pipe = open(tempfile.gettempdir() + "/tr1_err.log", "wb+")
tr0_out = open(tempfile.gettempdir() + "/tr0_out.log", "wb+")
tr1_out = open(tempfile.gettempdir() + "/tr1_out.log", "wb+")
tr0_proc = subprocess.Popen(
tr0_cmd.strip().split(" "),
stdout=tr0_out,
stderr=tr0_pipe,
env=required_envs)
tr1_proc = subprocess.Popen(
tr1_cmd.strip().split(" "),
stdout=tr1_out,
stderr=tr1_pipe,
env=required_envs)
return tr0_proc, tr1_proc, tr0_pipe, tr1_pipe
def _start_heter_trainer(self, cmd, required_envs):
heter0_cmd, heter1_cmd = cmd.format(0), cmd.format(1)
heter0_pipe = open(tempfile.gettempdir() + "/heter0_err.log", "wb+")
heter1_pipe = open(tempfile.gettempdir() + "/heter1_err.log", "wb+")
heter0_out = open(tempfile.gettempdir() + "/heter0_out.log", "wb+")
heter1_out = open(tempfile.gettempdir() + "/heter1_out.log", "wb+")
heter0_proc = subprocess.Popen(
heter0_cmd.strip().split(" "),
stdout=heter0_out,
stderr=heter0_pipe,
env=required_envs)
heter1_proc = subprocess.Popen(
heter1_cmd.strip().split(" "),
stdout=heter1_out,
stderr=heter1_pipe,
env=required_envs)
return heter0_proc, heter1_proc, heter0_pipe, heter1_pipe
def _run_cluster(self, model, envs):
env = {'GRAD_CLIP': str(self._grad_clip_mode)}
python_path = self._python_interp
gloo_path = tempfile.mkdtemp()
if os.getenv('WITH_COVERAGE', 'OFF') == 'ON':
envs['COVERAGE_FILE'] = os.getenv('COVERAGE_FILE', '')
python_path += " -m coverage run --branch -p"
env.update(envs)
tr_cmd = "{0} {1} --role trainer --endpoints {2} --trainer_endpoints {3} --current_id {{}} --trainers {4} --mode {5} --geo_sgd_need_push_nums {6} --reader {7} --gloo_path {8} --heter_trainer_endpoints {9} --heter_trainer_device {10}".format(
python_path, model, self._ps_endpoints, self._tr_endpoints,
self._trainers, self._mode, self._geo_sgd_need_push_nums,
self._reader, gloo_path, self._heter_endpoints, self._heter_device)
ps_cmd = "{0} {1} --role pserver --endpoints {2} --trainer_endpoints {3} --current_id {{}} --trainers {4} --mode {5} --geo_sgd_need_push_nums {6} --reader {7} --gloo_path {8} --heter_trainer_endpoints {9} --heter_trainer_device {10}".format(
python_path, model, self._ps_endpoints, self._tr_endpoints,
self._trainers, self._mode, self._geo_sgd_need_push_nums,
self._reader, gloo_path, self._heter_endpoints, self._heter_device)
heter_cmd = "{0} {1} --role heter_trainer --endpoints {2} --trainer_endpoints {3} --current_id {{}} --trainers {4} --mode {5} --geo_sgd_need_push_nums {6} --reader {7} --gloo_path {8} --heter_trainer_endpoints {9} --heter_trainer_device {10}".format(
python_path, model, self._ps_endpoints, self._tr_endpoints,
self._trainers, self._mode, self._geo_sgd_need_push_nums,
self._reader, gloo_path, self._heter_endpoints, self._heter_device)
# Run dist train to compare with local results
ps0, ps1, ps0_pipe, ps1_pipe = self._start_pserver(ps_cmd, env)
tr0, tr1, tr0_pipe, tr1_pipe = self._start_trainer(tr_cmd, env)
heter0, heter1, heter0_pipe, heter1_pipe = self._start_heter_trainer(
heter_cmd, env)
# Wait until trainer process terminate
while True:
stat0 = tr0.poll()
time.sleep(0.1)
if stat0 is not None:
break
while True:
stat1 = tr1.poll()
time.sleep(0.1)
if stat1 is not None:
break
tr0_out, tr0_err = tr0.communicate()
tr1_out, tr1_err = tr1.communicate()
print("tr end communicate")
tr0_ret = tr0.returncode
tr1_ret = tr0.returncode
print("tr get returncode: {}".format(tr0_ret))
if tr0_ret != 0:
print(
"========================Error tr0_err begin==========================="
)
os.system("cat {}".format(tempfile.gettempdir() + "/tr0_err.log"))
print(
"========================Error tr0_err end==========================="
)
if tr1_ret != 0:
print(
"========================Error tr1_err begin==========================="
)
os.system("cat {}".format(tempfile.gettempdir() + "/tr1_err.log"))
print(
"========================Error tr1_err end==========================="
)
self.assertEqual(tr0_ret, 0, "something wrong in tr0, please check")
self.assertEqual(tr1_ret, 0, "something wrong in tr1, please check")
# close trainer file
tr0_pipe.close()
tr1_pipe.close()
ps0_pipe.close()
ps1_pipe.close()
heter0_pipe.close()
heter1_pipe.close()
ps0.terminate()
ps1.terminate()
heter0.terminate()
heter1.terminate()
shutil.rmtree(gloo_path)
return 0, 0
def check_with_place(self,
model_file,
delta=1e-3,
check_error_log=False,
need_envs={}):
required_envs = {
"PATH": os.getenv("PATH", ""),
"PYTHONPATH": os.getenv("PYTHONPATH", ""),
"LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""),
"FLAGS_rpc_deadline": "5000", # 5sec to fail fast
"http_proxy": ""
}
required_envs.update(need_envs)
if check_error_log:
required_envs["GLOG_v"] = "3"
required_envs["GLOG_logtostderr"] = "1"
tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs)
def runtime_main(test_class):
parser = argparse.ArgumentParser(description='Run Fleet test.')
parser.add_argument(
'--role',
type=str,
required=True,
choices=['pserver', 'trainer', 'heter_trainer'])
parser.add_argument('--endpoints', type=str, required=False, default="")
parser.add_argument(
'--trainer_endpoints', type=str, required=False, default="")
parser.add_argument(
'--heter_trainer_endpoints', type=str, required=False, default="")
parser.add_argument(
'--heter_trainer_device', type=str, required=False, default="gpu")
parser.add_argument('--gloo_path', type=str, required=False, default="")
parser.add_argument('--current_id', type=int, required=False, default=0)
parser.add_argument('--trainers', type=int, required=False, default=1)
parser.add_argument('--mode', type=str, required=False, default='async')
parser.add_argument(
'--geo_sgd_need_push_nums', type=int, required=False, default=2)
parser.add_argument('--reader', type=str, required=False, default='dataset')
args = parser.parse_args()
model = test_class()
role = model.build_role(args)
fleet.init(role)
strategy = model.build_strategy(args)
avg_cost = model.net(args)
model.build_optimizer(avg_cost, strategy)
fleet_util._set_strategy(strategy)
fleet_util._set_role_maker(role)
if args.role == "pserver" or args.role == "heter_trainer":
model.run_pserver(args)
else:
if args.reader == "dataset":
model.run_dataset_trainer(args)
else:
model.run_pyreader_trainer(args)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import unittest
import tempfile
from test_dist_fleet_heter_base import TestFleetHeterBase
class TestDistHeterDatasetAsync2x2(TestFleetHeterBase):
def _setup_config(self):
self._mode = "async"
self._reader = "dataset"
def check_with_place(self,
model_file,
delta=1e-3,
check_error_log=False,
need_envs={}):
required_envs = {
"PATH": os.getenv("PATH", ""),
"PYTHONPATH": os.getenv("PYTHONPATH", ""),
"LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""),
"FLAGS_rpc_deadline": "5000", # 5sec to fail fast
"http_proxy": "",
"CPU_NUM": "1"
}
required_envs.update(need_envs)
if check_error_log:
required_envs["GLOG_v"] = "4"
required_envs["GLOG_logtostderr"] = "1"
tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs)
def test_dist_train(self):
self.check_with_place(
"dist_fleet_heter_ctr.py", delta=1e-5, check_error_log=True)
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import paddle
import os
import math
import paddle.fluid as fluid
import paddle.distributed.fleet.base.role_maker as role_maker
from paddle.distributed.fleet.base.util_factory import fleet_util
from paddle.distributed.fleet import fleet
class TestDistFleetHeterProgram(unittest.TestCase):
def build_role(self):
environs = {}
environs[
"PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36012,127.0.0.1:36013"
environs["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36014,127.0.0.1:36015"
environs[
"PADDLE_HETER_TRAINER_IP_PORT_LIST"] = "127.0.0.1:36016,127.0.0.1:36017"
environs["PADDLE_HETER_TRAINER_DEVICE"] = "gpu"
environs["TRAINING_ROLE"] = "HETER_TRAINER"
environs["PADDLE_TRAINERS_NUM"] = 2
environs["PADDLE_TRAINER_ID"] = 0
environs["POD_IP"] = "127.0.0.1"
environs["PADDLE_PORT"] = "36016"
environs["FLAGS_selected_gpus"] = 0
for k, v in environs.items():
os.environ[k] = str(v)
self.role = role_maker.PaddleCloudRoleMaker()
return self.role
def build_strategy(self):
self.strategy = paddle.distributed.fleet.DistributedStrategy()
self.strategy.a_sync = True
return self.strategy
def build_input(self):
dense_input = fluid.layers.data(
name="dense_input", shape=[10], dtype="float32")
sparse_input_ids = [
fluid.layers.data(
name="C" + str(i), shape=[1], lod_level=1, dtype="int64")
for i in range(1, 27)
]
label = fluid.layers.data(name="label", shape=[1], dtype="float32")
inputs = [dense_input] + sparse_input_ids + [label]
return inputs
def build_net(self, inputs):
def embedding_layer(input):
return fluid.layers.embedding(
input=input,
is_sparse=True,
size=[100001, 10],
param_attr=fluid.ParamAttr(
name="SparseFeatFactors",
initializer=fluid.initializer.Uniform()), )
sparse_embed_seq = list(map(embedding_layer, inputs[1:-1]))
concated = fluid.layers.concat(sparse_embed_seq + inputs[0:1], axis=1)
with fluid.device_guard("gpu"):
fc1 = fluid.layers.fc(
input=concated,
size=400,
act="relu",
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(concated.shape[1]))),
name="fc1")
with fluid.device_guard("cpu"):
fc2 = fluid.layers.fc(input=fc1,
size=400,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc1.shape[1]))),
name="fc2")
with fluid.device_guard("gpu"):
fc3 = fluid.layers.fc(input=fc2,
size=400,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc2.shape[1]))),
name="fc3")
with fluid.device_guard("cpu"):
predict = fluid.layers.fc(
input=fc3,
size=2,
act="softmax",
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc3.shape[1]))), )
with fluid.device_guard("gpu"):
labels = fluid.layers.cast(inputs[-1], dtype="int64")
cost = fluid.layers.cross_entropy(input=predict, label=labels)
avg_cost = fluid.layers.reduce_sum(cost)
return avg_cost
def build_optimizer(self, avg_cost, strategy):
optimizer = fluid.optimizer.SGD(1e-2)
optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
optimizer.minimize(avg_cost)
def test(self):
role = self.build_role()
fleet.init(role)
strategy = self.build_strategy()
inputs = self.build_input()
avg_cost = self.build_net(inputs)
self.build_optimizer(avg_cost, strategy)
if __name__ == "__main__":
unittest.main()
......@@ -102,8 +102,23 @@ class TestExpandAsOpRank4(OpTest):
self.check_grad(['X'], 'Out')
# Test dygraph API
class TestExpandAsDygraphAPI(unittest.TestCase):
def test_api(self):
import paddle
paddle.disable_static()
np_data_x = np.array([1, 2, 3]).astype('int32')
np_data_y = np.array([1, 2, 3, 1, 2, 3]).astype('int32')
data_x = paddle.to_tensor(np_data_x)
data_y = paddle.to_tensor(np_data_y)
out = fluid.layers.expand_as(data_x, data_y)
np_out = out.numpy()
assert np.array_equal(np_out, np.tile(np_data_x, (2)))
paddle.enable_static()
# Test python API
class TestExpandAPI(unittest.TestCase):
class TestExpandAsAPI(unittest.TestCase):
def test_api(self):
input1 = np.random.random([12, 14]).astype("float32")
input2 = np.random.random([48, 14]).astype("float32")
......
......@@ -43,7 +43,7 @@ class TestFleetBase(unittest.TestCase):
role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role)
strategy = fleet.DistributedStrategy()
optimizer = paddle.optimizer.SGD(learning_rate=0.001)
optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.001)
optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
optimizer.minimize(avg_cost)
......
......@@ -22,6 +22,7 @@ import paddle.fluid.core as core
from paddle.fluid.op import Operator
from paddle.fluid.executor import Executor
from op_test import OpTest
import paddle
class TestGaussianRandomOp(OpTest):
......@@ -235,6 +236,56 @@ class TestGaussianRandomAPI(unittest.TestCase):
self.assertAlmostEqual(np.mean(res_6), 0.0, delta=0.1)
self.assertAlmostEqual(np.std(res_6), 1., delta=0.1)
def test_default_dtype(self):
paddle.disable_static()
def test_default_fp_16():
paddle.framework.set_default_dtype('float16')
paddle.tensor.random.gaussian_random([2, 3])
self.assertRaises(TypeError, test_default_fp_16)
def test_default_fp_32():
paddle.framework.set_default_dtype('float32')
out = paddle.tensor.random.gaussian_random([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP32)
def test_default_fp_64():
paddle.framework.set_default_dtype('float64')
out = paddle.tensor.random.gaussian_random([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64)
test_default_fp_64()
test_default_fp_32()
paddle.enable_static()
class TestStandardNormalDtype(unittest.TestCase):
def test_default_dtype(self):
paddle.disable_static()
def test_default_fp_16():
paddle.framework.set_default_dtype('float16')
paddle.tensor.random.standard_normal([2, 3])
self.assertRaises(TypeError, test_default_fp_16)
def test_default_fp_32():
paddle.framework.set_default_dtype('float32')
out = paddle.tensor.random.standard_normal([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP32)
def test_default_fp_64():
paddle.framework.set_default_dtype('float64')
out = paddle.tensor.random.standard_normal([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64)
test_default_fp_64()
test_default_fp_32()
paddle.enable_static()
if __name__ == "__main__":
unittest.main()
......@@ -658,7 +658,7 @@ class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list):
optimizer = paddle.optimizer.SGD(learning_rate=0.5,
parameter_list=parameter_list)
parameters=parameter_list)
optimizer = PipelineOptimizer(optimizer)
return optimizer
......@@ -670,7 +670,7 @@ class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list):
optimizer = paddle.optimizer.SGD(learning_rate=0.5,
parameter_list=parameter_list)
parameters=parameter_list)
optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
return optimizer
......@@ -682,7 +682,7 @@ class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list):
optimizer = paddle.optimizer.SGD(learning_rate=0.5,
parameter_list=parameter_list)
parameters=parameter_list)
optimizer = RecomputeOptimizer(optimizer)
return optimizer
......
......@@ -299,7 +299,7 @@ class TestLayer(LayerTest):
my_syncbn = paddle.nn.SyncBatchNorm(3)
dy_ret = my_syncbn(base.to_variable(t))
dy_ret_value = dy_ret.numpy()
self.assertTrue(np.array_equal(static_ret, static_ret))
self.assertTrue(np.array_equal(static_ret, dy_ret_value))
def test_relu(self):
with self.static_graph():
......
......@@ -19,6 +19,8 @@ import numpy as np
import paddle.fluid.core as core
from paddle.fluid.op import Operator
from op_test import OpTest
import paddle
import paddle.fluid as fluid
class TestMomentumOp1(OpTest):
......@@ -234,5 +236,48 @@ class TestSparseMomentumOp2(TestSparseMomentumOp):
self.use_nesterov = True
class TestMomentumV2(unittest.TestCase):
def test_momentum_dygraph(self):
paddle.disable_static()
value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5)
# This can be any optimizer supported by dygraph.
adam = paddle.optimizer.Momentum(
learning_rate=0.01, momentum=0.9, parameters=linear.parameters())
out = linear(a)
out.backward()
adam.step()
adam.clear_gradients()
def test_momentum(self):
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
rms_optimizer = paddle.optimizer.Momentum(
learning_rate=0.1, momentum=0.9)
rms_optimizer.minimize(avg_cost)
fetch_list = [avg_cost]
train_reader = paddle.batch(
paddle.dataset.uci_housing.train(), batch_size=1)
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
for data in train_reader():
exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list)
def test_raise_error(self):
self.assertRaises(
ValueError, paddle.optimizer.Momentum, learning_rate=None)
self.assertRaises(ValueError, paddle.optimizer.Momentum, momentum=None)
if __name__ == "__main__":
unittest.main()
......@@ -174,66 +174,6 @@ class TestPool1d_API(unittest.TestCase):
result = max_pool1d_dg(input)
self.assertTrue(np.allclose(result.numpy(), result_np))
def check_adaptive_max_dygraph_results(self, place):
with fluid.dygraph.guard(place):
input_np = np.random.random([2, 3, 32]).astype("float32")
input = fluid.dygraph.to_variable(input_np)
result = F.adaptive_max_pool1d(input, output_size=16)
result_np = max_pool1D_forward_naive(
input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True)
self.assertTrue(np.allclose(result.numpy(), result_np))
ada_max_pool1d_dg = paddle.nn.layer.AdaptiveMaxPool1d(
output_size=16)
result = ada_max_pool1d_dg(input)
self.assertTrue(np.allclose(result.numpy(), result_np))
def check_adaptive_avg_dygraph_results(self, place):
with fluid.dygraph.guard(place):
input_np = np.random.random([2, 3, 32]).astype("float32")
input = fluid.dygraph.to_variable(input_np)
result = F.adaptive_avg_pool1d(input, output_size=16)
result_np = avg_pool1D_forward_naive(
input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True)
self.assertTrue(np.allclose(result.numpy(), result_np))
ada_max_pool1d_dg = paddle.nn.layer.AdaptiveAvgPool1d(
output_size=16)
result = ada_max_pool1d_dg(input)
self.assertTrue(np.allclose(result.numpy(), result_np))
def check_adaptive_max_static_results(self, place):
with fluid.program_guard(fluid.Program(), fluid.Program()):
input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32")
result = F.adaptive_max_pool1d(input, output_size=16)
input_np = np.random.random([2, 3, 32]).astype("float32")
result_np = max_pool1D_forward_naive(
input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True)
exe = fluid.Executor(place)
fetches = exe.run(fluid.default_main_program(),
feed={"input": input_np},
fetch_list=[result])
self.assertTrue(np.allclose(fetches[0], result_np))
def check_adaptive_avg_static_results(self, place):
with fluid.program_guard(fluid.Program(), fluid.Program()):
input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32")
result = F.adaptive_avg_pool1d(input, output_size=16)
input_np = np.random.random([2, 3, 32]).astype("float32")
result_np = avg_pool1D_forward_naive(
input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True)
exe = fluid.Executor(place)
fetches = exe.run(fluid.default_main_program(),
feed={"input": input_np},
fetch_list=[result])
self.assertTrue(np.allclose(fetches[0], result_np))
def check_max_dygraph_padding_same(self, place):
with fluid.dygraph.guard(place):
input_np = np.random.random([2, 3, 32]).astype("float32")
......@@ -265,10 +205,6 @@ class TestPool1d_API(unittest.TestCase):
self.check_avg_dygraph_results(place)
self.check_max_static_results(place)
self.check_avg_static_results(place)
self.check_adaptive_max_dygraph_results(place)
self.check_adaptive_avg_dygraph_results(place)
self.check_adaptive_max_static_results(place)
self.check_adaptive_avg_static_results(place)
self.check_max_dygraph_padding_same(place)
self.check_avg_dygraph_padding_same(place)
......
......@@ -21,6 +21,7 @@ import paddle.fluid.core as core
from paddle import rand
import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard
import paddle
class TestRandOpError(unittest.TestCase):
......@@ -115,5 +116,31 @@ class TestRandOpForDygraph(unittest.TestCase):
self.run_net(True)
class TestRandDtype(unittest.TestCase):
def test_default_dtype(self):
paddle.disable_static()
def test_default_fp_16():
paddle.framework.set_default_dtype('float16')
paddle.tensor.random.rand([2, 3])
self.assertRaises(TypeError, test_default_fp_16)
def test_default_fp_32():
paddle.framework.set_default_dtype('float32')
out = paddle.tensor.random.rand([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP32)
def test_default_fp_64():
paddle.framework.set_default_dtype('float64')
out = paddle.tensor.random.rand([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64)
test_default_fp_64()
test_default_fp_32()
paddle.enable_static()
if __name__ == "__main__":
unittest.main()
......@@ -20,6 +20,7 @@ import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.op import Operator
from op_test import OpTest
import paddle
class TestSGDOp(OpTest):
......@@ -208,5 +209,46 @@ class TestSGDOpWithLargeInput(unittest.TestCase):
result = exe.run(compiled_prog, fetch_list=[avg_cost])
class TestSGDV2(unittest.TestCase):
def test_sgd_dygraph(self):
paddle.disable_static()
value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5)
# This can be any optimizer supported by dygraph.
adam = paddle.optimizer.SGD(learning_rate=0.01,
parameters=linear.parameters(),
weight_decay=0.01)
out = linear(a)
out.backward()
adam.step()
adam.clear_gradients()
def test_sgd(self):
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
rms_optimizer = paddle.optimizer.SGD(learning_rate=0.1)
rms_optimizer.minimize(avg_cost)
fetch_list = [avg_cost]
train_reader = paddle.batch(
paddle.dataset.uci_housing.train(), batch_size=1)
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
for data in train_reader():
exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list)
def test_raise_error(self):
self.assertRaises(ValueError, paddle.optimizer.SGD, learning_rate=None)
if __name__ == "__main__":
unittest.main()
......@@ -221,5 +221,21 @@ class TestDygraphSyncBatchNormAPIError(unittest.TestCase):
self.assertRaises(TypeError, my_sync_batch_norm, x2)
class TestConvertSyncBatchNorm(unittest.TestCase):
def test_convert(self):
if not core.is_compiled_with_cuda():
return
with program_guard(Program(), Program()):
model = paddle.nn.Sequential(
paddle.nn.Conv2d(3, 5, 3), paddle.nn.BatchNorm2d(5))
sync_model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
for idx, sublayer in enumerate(model.sublayers()):
if isinstance(sublayer, paddle.nn.BatchNorm2d):
self.assertEqual(
isinstance(sync_model[idx], paddle.nn.SyncBatchNorm),
True)
if __name__ == '__main__':
unittest.main()
......@@ -536,5 +536,31 @@ class TestUniformDygraphMode(unittest.TestCase):
self.assertTrue((x_np[i] > 0 and x_np[i] < 1.0))
class TestUniformDtype(unittest.TestCase):
def test_default_dtype(self):
paddle.disable_static()
def test_default_fp_16():
paddle.framework.set_default_dtype('float16')
paddle.tensor.random.uniform([2, 3])
self.assertRaises(TypeError, test_default_fp_16)
def test_default_fp_32():
paddle.framework.set_default_dtype('float32')
out = paddle.tensor.random.uniform([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP32)
def test_default_fp_64():
paddle.framework.set_default_dtype('float64')
out = paddle.tensor.random.uniform([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64)
test_default_fp_64()
test_default_fp_32()
paddle.enable_static()
if __name__ == "__main__":
unittest.main()
......@@ -891,10 +891,11 @@ class Model(object):
class Mnist(paddle.nn.Layer):
def __init__(self):
super(MyNet, self).__init__()
self._fc = Linear(784, 1, act='softmax')
super(Mnist, self).__init__()
self._fc = Linear(784, 10, act='softmax')
@paddle.jit.to_static # If save for inference in dygraph, need this
# If save for inference in dygraph, need this
@paddle.jit.to_static
def forward(self, x):
y = self._fc(x)
return y
......@@ -903,21 +904,18 @@ class Model(object):
device = hapi.set_device('cpu')
# if use static graph, do not set
paddle.disable_static(device) if dynamic else None
# inputs and labels are not required for dynamic graph.
input = hapi.Input([None, 784], 'float32', 'x')
label = hapi.Input([None, 1], 'int64', 'label')
model = hapi.Model(Mnist(), input, label)
optim = paddle.optimizer.SGD(learning_rate=1e-3,
parameter_list=model.parameters())
model.prepare(optim,
paddle.nn.CrossEntropyLoss(),
hapi.metrics.Accuracy())
model.prepare(optim, paddle.nn.CrossEntropyLoss())
mnist_data = hapi.datasets.MNIST(mode='train', chw_format=False)
model.fit(mnist_data, epochs=1, batch_size=32, verbose=0)
model.save('checkpoint/test') # save for training
model.save('inference_model', False) # save for inference
"""
if ParallelEnv().local_rank == 0:
......@@ -1534,47 +1532,6 @@ class Model(object):
Returns:
list: The fetch variables' name list
Examples:
.. code-block:: python
import numpy as np
import paddle
from paddle.static import InputSpec
import paddle.incubate.hapi as hapi
from paddle.nn import Linear
from paddle.incubate.hapi.datasets.mnist import MNIST as MnistDataset
class Mnist(Layer):
def __init__(self, classifier_act=None):
super(Mnist, self).__init__()
self.fc = Linear(input_dim=784, output_dim=10, act="softmax")
@paddle.jit.to_static # In static mode, you need to delete this.
def forward(self, inputs):
outputs = self.fc(inputs)
return outputs
dynamic = True # False
device = hapi.set_device('gpu')
# if use static graph, do not set
paddle.disable_static(device) if dynamic else None
# inputs and labels are not required for dynamic graph.
input = InputSpec([None, 784], 'float32', 'x')
label = InputSpec([None, 1], 'int64', 'label')
model = hapi.Model(Mnist(), input, label)
optim = paddle.optimizer.SGD(learning_rate=1e-3,
parameter_list=model.parameters())
model.prepare(optim,
paddle.nn.CrossEntropyLoss(),
hapi.metrics.Accuracy())
mnist_data = hapi.datasets.MNIST(mode='train', chw_format=False)
model.fit(mnist_data, epochs=1, batch_size=32, verbose=0)
model.save_inference_model('inference_model')
"""
def get_inout_spec(all_vars, return_name=False):
......@@ -1592,8 +1549,8 @@ class Model(object):
# the inputs of the model in running.
# 3. Make it Unnecessary to add `@paddle.jit.to_static` for users in dynamic mode.
if fluid.in_dygraph_mode():
with fluid.framework._dygraph_guard(None):
layer = self.network
fluid.disable_dygraph()
# 1. input check
prog_translator = ProgramTranslator()
......@@ -1631,7 +1588,8 @@ class Model(object):
if param_or_buffer.name in state_names_dict:
extra_info_dict['structured_name'] = state_names_dict[
param_or_buffer.name]
extra_info_dict['stop_gradient'] = param_or_buffer.stop_gradient
extra_info_dict[
'stop_gradient'] = param_or_buffer.stop_gradient
if isinstance(param_or_buffer, ParamBase):
extra_info_dict['trainable'] = param_or_buffer.trainable
extra_var_info[param_or_buffer.name] = extra_info_dict
......
......@@ -64,6 +64,11 @@ class TestTransforms(unittest.TestCase):
self.do_transform(trans)
def test_normalize(self):
normalize = transforms.Normalize(mean=0.5, std=0.5)
trans = transforms.Compose([transforms.Permute(mode='CHW'), normalize])
self.do_transform(trans)
def test_trans_resize(self):
trans = transforms.Compose([
transforms.Resize(300, [0, 1]),
......@@ -165,7 +170,7 @@ class TestTransforms(unittest.TestCase):
fake_img = np.random.rand(500, 400, 3).astype('float32')
fake_img_gray = trans_gray(fake_img)
np.testing.assert_equal(len(fake_img_gray.shape), 2)
np.testing.assert_equal(len(fake_img_gray.shape), 3)
np.testing.assert_equal(fake_img_gray.shape[0], 500)
np.testing.assert_equal(fake_img_gray.shape[1], 400)
......
......@@ -16,6 +16,7 @@ import sys
import collections
import random
import math
import functools
import cv2
import numbers
......@@ -31,6 +32,23 @@ else:
__all__ = ['flip', 'resize', 'pad', 'rotate', 'to_grayscale']
def keepdims(func):
"""Keep the dimension of input images unchanged"""
@functools.wraps(func)
def wrapper(image, *args, **kwargs):
if len(image.shape) != 3:
raise ValueError("Expect image have 3 dims, but got {} dims".format(
len(image.shape)))
ret = func(image, *args, **kwargs)
if len(ret.shape) == 2:
ret = ret[:, :, np.newaxis]
return ret
return wrapper
@keepdims
def flip(image, code):
"""
Accordding to the code (the type of flip), flip the input image
......@@ -62,6 +80,7 @@ def flip(image, code):
return cv2.flip(image, flipCode=code)
@keepdims
def resize(img, size, interpolation=cv2.INTER_LINEAR):
"""
resize the input data to given size
......@@ -103,6 +122,7 @@ def resize(img, size, interpolation=cv2.INTER_LINEAR):
return cv2.resize(img, size[::-1], interpolation=interpolation)
@keepdims
def pad(img, padding, fill=(0, 0, 0), padding_mode='constant'):
"""Pads the given CV Image on all sides with speficified padding mode and fill value.
......@@ -193,6 +213,7 @@ def pad(img, padding, fill=(0, 0, 0), padding_mode='constant'):
return img
@keepdims
def rotate(img,
angle,
interpolation=cv2.INTER_LINEAR,
......@@ -266,6 +287,7 @@ def rotate(img,
return dst.astype(dtype)
@keepdims
def to_grayscale(img, num_output_channels=1):
"""Converts image to grayscale version of image.
......
......@@ -505,7 +505,7 @@ class Normalize(object):
mean = [mean, mean, mean]
if isinstance(std, numbers.Number):
mean = [std, std, std]
std = [std, std, std]
self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1)
self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1)
......
......@@ -97,8 +97,20 @@ from .layer.common import Dropout #DEFINE_ALIAS
from .layer.common import Dropout2D #DEFINE_ALIAS
from .layer.common import Dropout3D #DEFINE_ALIAS
from .layer.common import AlphaDropout #DEFINE_ALIAS
from .layer.pooling import AvgPool1d #DEFINE_ALIAS
from .layer.pooling import AvgPool2d #DEFINE_ALIAS
from .layer.pooling import AvgPool3d #DEFINE_ALIAS
from .layer.pooling import MaxPool1d #DEFINE_ALIAS
from .layer.pooling import MaxPool2d #DEFINE_ALIAS
from .layer.pooling import MaxPool3d #DEFINE_ALIAS
from .layer.pooling import AdaptiveAvgPool1d #DEFINE_ALIAS
from .layer.pooling import AdaptiveAvgPool2d #DEFINE_ALIAS
from .layer.pooling import AdaptiveAvgPool3d #DEFINE_ALIAS
from .layer.pooling import AdaptiveMaxPool1d #DEFINE_ALIAS
from .layer.pooling import AdaptiveMaxPool2d #DEFINE_ALIAS
from .layer.pooling import AdaptiveMaxPool3d #DEFINE_ALIAS
from .layer.conv import Conv1d #DEFINE_ALIAS
from .layer.conv import Conv2d #DEFINE_ALIAS
from .layer.conv import Conv3d #DEFINE_ALIAS
......
......@@ -170,22 +170,28 @@ from .norm import layer_norm #DEFINE_ALIAS
from .norm import lrn #DEFINE_ALIAS
from .norm import normalize #DEFINE_ALIAS
# from .norm import spectral_norm #DEFINE_ALIAS
from .pooling import max_pool1d #DEFINE_ALIAS
from .pooling import avg_pool1d #DEFINE_ALIAS
from .pooling import adaptive_max_pool1d #DEFINE_ALIAS
from .pooling import adaptive_avg_pool1d #DEFINE_ALIAS
from .pooling import pool2d #DEFINE_ALIAS
from .pooling import pool3d #DEFINE_ALIAS
from .pooling import avg_pool1d #DEFINE_ALIAS
from .pooling import adaptive_pool2d #DEFINE_ALIAS
from .pooling import adaptive_pool3d #DEFINE_ALIAS
from .rnn import rnn #DEFINE_ALIAS
from .rnn import birnn #DEFINE_ALIAS
from .pooling import avg_pool2d #DEFINE_ALIAS
from .pooling import max_pool2d #DEFINE_ALIAS
from .pooling import avg_pool3d #DEFINE_ALIAS
from .pooling import max_pool1d #DEFINE_ALIAS
from .pooling import max_pool2d #DEFINE_ALIAS
from .pooling import max_pool3d #DEFINE_ALIAS
from .pooling import adaptive_pool2d #DEFINE_ALIAS
from .pooling import adaptive_pool3d #DEFINE_ALIAS
from .pooling import adaptive_max_pool1d #DEFINE_ALIAS
from .pooling import adaptive_max_pool2d #DEFINE_ALIAS
from .pooling import adaptive_max_pool3d #DEFINE_ALIAS
from .pooling import adaptive_avg_pool1d #DEFINE_ALIAS
from .pooling import adaptive_avg_pool2d #DEFINE_ALIAS
from .pooling import adaptive_avg_pool3d #DEFINE_ALIAS
from .rnn import rnn #DEFINE_ALIAS
from .rnn import birnn #DEFINE_ALIAS
# from .rnn import gru_unit #DEFINE_ALIAS
# from .rnn import lstm #DEFINE_ALIAS
# from .rnn import lstm_unit #DEFINE_ALIAS
......
......@@ -158,7 +158,7 @@ def conv1d(x,
bias (Tensor, optional): The bias with shape [M,]. Default: None.
stride (int or tuple, optional): The stride size. If stride is a tuple, it must
contain one integers, (stride_size). Default: 1.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms.
1. a string in ['valid', 'same'].
2. an int, which means the feature map is zero paded by size of `padding` on both sides.
3. a list[int] or tuple[int] whose length is 1, which means the feature map is zero paded by size of `padding[0]` on both sides.
......@@ -185,7 +185,7 @@ def conv1d(x,
same with input.
Raises:
ValueError: If the channel dimmention of the input is less than or equal to zero.
ValueError: If the channel dimension of the input is less than or equal to zero.
ValueError: If `data_format` is not "NCL" or "NLC".
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0
......@@ -238,7 +238,7 @@ def conv1d(x,
num_channels = x.shape[channel_dim]
num_filters = weight.shape[0]
if num_channels < 0:
raise ValueError("The channel dimmention of the input({}) "
raise ValueError("The channel dimension of the input({}) "
"should be defined. Received: {}.".format(
x.shape, num_channels))
if num_channels % groups != 0:
......@@ -260,7 +260,7 @@ def conv1d(x,
padding = padding + [0]
else:
raise ValueError(
"The size of padding's dimmention should 1 or 2. But got padding={}".
"The size of padding's dimension should be 1 or 2. But got padding={}".
format(padding))
stride = utils.convert_to_list(stride, 1, 'stride') + [1]
......@@ -424,7 +424,7 @@ def conv2d(x,
Raises:
ValueError: If `data_format` is not "NCHW" or "NHWC".
ValueError: If the channel dimmention of the input is less than or equal to zero.
ValueError: If the channel dimension of the input is less than or equal to zero.
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0
or the element corresponding to the input's channel is not 0.
......@@ -465,7 +465,7 @@ def conv2d(x,
num_channels = x.shape[channel_dim]
num_filters = weight.shape[0]
if num_channels < 0:
raise ValueError("The channel dimmention of the input({}) "
raise ValueError("The channel dimension of the input({}) "
"should be defined. Received: {}.".format(
x.shape, num_channels))
if num_channels % groups != 0:
......@@ -710,7 +710,7 @@ def conv_transpose1d(x,
num_channels = x.shape[channel_dim]
if num_channels < 0:
raise ValueError("The channel dimmention of the input({}) "
raise ValueError("The channel dimension of the input({}) "
"should be defined. Received: {}.".format(
x.shape, num_channels))
if num_channels % groups != 0:
......@@ -728,7 +728,7 @@ def conv_transpose1d(x,
padding = padding + [0]
else:
raise ValueError(
"The size of padding's dimmention should 1 or 2. But got padding={}".
"The size of padding's dimension should 1 or 2. But got padding={}".
format(padding))
stride = utils.convert_to_list(stride, 1, 'stride') + [1]
......@@ -807,10 +807,10 @@ def conv_transpose2d(x,
stride=1,
padding=0,
output_padding=0,
groups=1,
dilation=1,
data_format='NCHW',
groups=1,
output_size=None,
data_format='NCHW',
name=None):
"""
......@@ -883,28 +883,27 @@ def conv_transpose2d(x,
stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution.
If stride is a tuple, it must contain two integers, (stride_height, stride_width).
Otherwise, stride_height = stride_width = stride. Default: stride = 1.
padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds
`dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a
string, either 'VALID' or 'SAME' supported, which is the padding algorithm.
If `padding` is a tuple or list, it could be in three forms:
`[pad_height, pad_width]` or
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and
when `data_format` is `'NCHW'`,
`padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `'NHWC'`, `padding` can be in the form
padding(str|int|list|tuple, optional): The padding size. It means the number of zero-paddings
on both sides for each dimension. If `padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If padding size is a tuple or list,
it could be in three forms: `[pad_height, pad_width]` or
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
and when `data_format` is `"NCHW"`, `pool_padding` can be in the form
`[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
Default: padding = 0.
output_padding(int|list|tuple, optional): Additional size added to one side
of each dimension in the output shape. Default: 0.
dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width).
Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1.
groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: groups = 1.
dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width).
Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1.
output_size(int|tuple|list, optional): The output image size. If output size is a
tuple, it must contain two integers, (image_height, image_width). None if use
filter_size, padding, and stride to calculate output_size.
......@@ -950,7 +949,7 @@ def conv_transpose2d(x,
paddle.disable_static()
x_var = paddle.to_tensor(x)
w_var = paddle.to_tensor(w)
y_var = F.conv2d_transpose(x_var, w_var)
y_var = F.conv_transpose2d(x_var, w_var)
y_np = y_var.numpy()
print(y_np.shape)
......@@ -966,7 +965,7 @@ def conv_transpose2d(x,
channel_dim = -1 if channel_last else 1
num_channels = x.shape[channel_dim]
if num_channels < 0:
raise ValueError("The channel dimmention of the input({}) "
raise ValueError("The channel dimension of the input({}) "
"should be defined. Received: {}.".format(
x.shape, num_channels))
if num_channels % groups != 0:
......@@ -1147,7 +1146,7 @@ def conv3d(x,
Raises:
ValueError: If `data_format` is not "NCDHW" or "NDHWC".
ValueError: If the channel dimmention of the input is less than or equal to zero.
ValueError: If the channel dimension of the input is less than or equal to zero.
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0
or the element corresponding to the input's channel is not 0.
......@@ -1160,19 +1159,17 @@ def conv3d(x,
Examples:
.. code-block:: python
from paddle import fluid
import paddle.nn.functional as F
import paddle.fluid.dygraph as dg
import numpy as np
import paddle
import paddle.nn.functional as F
x = np.random.randn(2, 3, 8, 8, 8).astype(np.float32)
w = np.random.randn(6, 3, 3, 3, 3).astype(np.float32)
place = fluid.CPUPlace()
with dg.guard(place):
x_var = dg.to_variable(x)
w_var = dg.to_variable(w)
y_var = F.conv3d(x_var, w_var, act="relu")
paddle.disable_static()
x_var = paddle.to_tensor(x)
w_var = paddle.to_tensor(w)
y_var = F.conv3d(x_var, w_var)
y_np = y_var.numpy()
print(y_np.shape)
......@@ -1190,7 +1187,7 @@ def conv3d(x,
num_filters = weight.shape[0]
if num_channels < 0:
raise ValueError(
"The channel dimmention of the input({}) should be defined. "
"The channel dimension of the input({}) should be defined. "
"Received: {}.".format(x.shape, num_channels))
if num_channels % groups != 0:
raise ValueError(
......@@ -1260,8 +1257,8 @@ def conv_transpose3d(x,
output_padding=0,
groups=1,
dilation=1,
data_format='NCDHW',
output_size=None,
data_format='NCDHW',
name=None):
"""
The convolution3d transpose layer calculates the output based on the input,
......@@ -1338,37 +1335,37 @@ def conv_transpose3d(x,
If stride is a tuple, it must contain three integers, (stride_depth, stride_height,
stride_width). Otherwise, stride_depth = stride_height = stride_width = stride.
Default: stride = 1.
padding(int|list|str|tuple, optional): The padding size. The padding argument effectively
adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a string,
either 'VALID' or 'SAME' supported, which is the padding algorithm. If `padding`
is a tuple or list, it could be in three forms: `[pad_depth, pad_height, pad_width]` or
padding (string|int|list|tuple, optional): The padding size. It means the number of zero-paddings
on both sides for each dimension. If `padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If padding size is a tuple or list,
it could be in three forms: `[pad_depth, pad_height, pad_width]` or
`[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
and when `data_format` is `'NCDHW'`, `padding` can be in the form
and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
`[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `'NDHWC'`, `padding` can be in the form
when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
Default: padding = 0.
output_padding(int|list|tuple, optional): Additional size added to one side
of each dimension in the output shape. Default: 0.
dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height,
dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation.
Default: dilation = 1.
groups(int, optional): The groups number of the Conv3d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: groups=1
data_format (str, optional): Specify the data format of the input, and the data format of the output
will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height,
dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation.
Default: dilation = 1.
output_size(int|list|tuple, optional): The output image size. If output size is a
tuple, it must contain three integers, (image_depth, image_height, image_width). This
parameter only works when filter_size is None. If output_size and filter_size are
specified at the same time, They should follow the formula above. Default: None.
Output_size and filter_size should not be None at the same time.
data_format (str, optional): Specify the data format of the input, and the data format of the output
will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
......@@ -1425,7 +1422,7 @@ def conv_transpose3d(x,
num_filters = weight.shape[1]
if num_channels < 0:
raise ValueError(
"The channel dimmention of the input({}) should be defined. "
"The channel dimension of the input({}) should be defined. "
"Received: {}.".format(x.shape, num_channels))
if num_channels % groups != 0:
raise ValueError(
......
......@@ -784,30 +784,30 @@ def kl_div(input, label, reduction='mean', name=None):
import numpy as np
import paddle.nn.functional as F
paddle.enable_imperative()
paddle.disable_static()
shape = (5, 20)
input = np.random.uniform(-10, 10, shape).astype('float32')
target = np.random.uniform(-10, 10, shape).astype('float32')
# 'batchmean' reduction, loss shape will be [N]
pred_loss = F.kl_div(paddle.to_variable(input),
paddle.to_variable(target), reduction='batchmean')
pred_loss = F.kl_div(paddle.to_tensor(input),
paddle.to_tensor(target), reduction='batchmean')
# shape=[5]
# 'mean' reduction, loss shape will be [1]
pred_loss = F.kl_div(paddle.to_variable(input),
paddle.to_variable(target), reduction='mean')
pred_loss = F.kl_div(paddle.to_tensor(input),
paddle.to_tensor(target), reduction='mean')
# shape=[1]
# 'sum' reduction, loss shape will be [1]
pred_loss = F.kl_div(paddle.to_variable(input),
paddle.to_variable(target), reduction='sum')
pred_loss = F.kl_div(paddle.to_tensor(input),
paddle.to_tensor(target), reduction='sum')
# shape=[1]
# 'none' reduction, loss shape is same with input shape
pred_loss = F.kl_div(paddle.to_variable(input),
paddle.to_variable(target), reduction='none')
pred_loss = F.kl_div(paddle.to_tensor(input),
paddle.to_tensor(target), reduction='none')
# shape=[5, 20]
"""
......
......@@ -18,124 +18,146 @@ from ...fluid.layers import pool3d #DEFINE_ALIAS
from ...fluid.layers import adaptive_pool2d #DEFINE_ALIAS
from ...fluid.layers import adaptive_pool3d #DEFINE_ALIAS
from ...fluid import core
from ...fluid.framework import in_dygraph_mode, convert_np_dtype_to_dtype_
from ...fluid.layers import utils, LayerHelper
from ...fluid.data_feeder import check_type, check_variable_and_dtype, check_type, check_dtype, convert_dtype
from ...fluid.layers import unsqueeze, squeeze
from ...fluid.framework import in_dygraph_mode
from ...fluid.layers import utils, LayerHelper, unsqueeze, squeeze
from ...fluid.data_feeder import check_type, check_variable_and_dtype
__all__ = [
'pool2d',
'pool3d',
'adaptive_pool2d',
'adaptive_pool3d',
'avg_pool1d',
'avg_pool2d',
'avg_pool3d',
'max_pool1d',
'max_pool2d',
'max_pool3d',
'adaptive_avg_pool1d',
'adaptive_max_pool1d',
'adaptive_avg_pool2d',
'adaptive_avg_pool3d',
'adaptive_pool2d',
'adaptive_pool3d',
'max_pool2d',
'avg_pool2d',
'max_pool3d',
'avg_pool3d',
'adaptive_max_pool1d',
'adaptive_max_pool2d',
'adaptive_max_pool3d',
]
def check_input(x, dimension):
def _is_list_or_tuple(input):
return isinstance(input, (list, tuple))
def _check_input(x, dimension):
if len(x.shape) != dimension:
raise ValueError("Excepted Input X is 3-D tensor, but received {}-D {}".
format(len(x.shape), type(x)))
raise ValueError(
"Excepted Input X is {}-D tensor, but received {}-D {}".format(
dimension, len(x.shape), type(x)))
def check_instance(x, x_name, types=(int, float)):
def _check_instance(x, x_name, types=(int, float)):
if not isinstance(x, types):
raise ValueError("Excepted {} type for {} but received type: {}. ".
format(types, x_name, type(x)))
def update_padding1d(padding, pool_type='avg'):
def is_list_or_tuple(ele):
if isinstance(ele, list) or isinstance(ele, tuple):
return True
return False
if is_list_or_tuple(padding):
if padding.__len__() == 1 and not is_list_or_tuple(padding[0]):
return [0, padding[0]]
else:
raise ValueError(
"{}_pool1d() argument 'padding' should contain one int (got {})".
format(pool_type, padding.__len__()))
def _zero_padding_in_batch_and_channel(padding, channel_last):
if channel_last:
return list(padding[0]) == [0, 0] and list(padding[-1]) == [0, 0]
else:
padding = [0, padding]
return list(padding[0]) == [0, 0] and list(padding[1]) == [0, 0]
return padding
def _exclude_padding_in_batch_and_channel(padding, channel_last):
padding_ = padding[1:-1] if channel_last else padding[2:]
padding_ = [elem for pad_a_dim in padding_ for elem in pad_a_dim]
return padding_
def update_padding2d(padding, data_format):
def is_list_or_tuple(ele):
if isinstance(ele, list) or isinstance(ele, tuple):
return True
return False
if is_list_or_tuple(padding) and len(padding) == 4:
if is_list_or_tuple(padding[0]) and (data_format == "NCHW"):
if not (padding[0] == [0, 0] and padding[1] == [0, 0]):
def _channel_last(data_format, num_dims):
if num_dims == 1:
if data_format not in ['NCL', 'NLC']:
raise ValueError(
"Non-zero pool_padding(%s) in the batch or channel dimensions "
"is not supported." % str(padding))
padding = padding[2:4]
padding = [ele for a_list in padding for ele in a_list]
elif is_list_or_tuple(padding[0]) and (data_format == "NHWC"):
if not (padding[0] == [0, 0] and padding[3] == [0, 0]):
"Attr(data_format) should be 'NCL' or 'NLC'. Received "
"Attr(data_format): %s" % str(data_format))
else:
return True if data_format == "NLC" else False
if num_dims == 2:
if data_format not in ['NCHW', 'NHWC']:
raise ValueError(
"Non-zero pool_padding(%s) in the batch or channel dimensions "
"is not supported." % str(padding))
padding = padding[1:3]
padding = [ele for a_list in padding for ele in a_list]
padding = utils.convert_to_list(padding, 4, 'padding')
if utils._is_symmetric_padding(padding, 2):
padding = [padding[0], padding[2]]
"Attr(data_format) should be 'NCHW' or 'NHWC'. Received "
"Attr(data_format): %s" % str(data_format))
else:
padding = utils.convert_to_list(padding, 2, 'padding')
return padding
return True if data_format == "NHWC" else False
if num_dims == 3:
if data_format not in ['NCDHW', 'NDHWC']:
raise ValueError(
"Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
"Attr(data_format): %s" % str(data_format))
else:
return True if data_format == "NDHWC" else False
def update_padding3d(padding, data_format):
def is_list_or_tuple(ele):
if isinstance(ele, (list, tuple)):
return True
return False
if is_list_or_tuple(padding) and len(padding) == 5:
if is_list_or_tuple(padding[0]) and (data_format == "NCDHW"):
if not (padding[0] == [0, 0] and padding[1] == [0, 0]):
def _update_padding_nd(padding, num_dims, channel_last=False, ceil_mode=False):
if isinstance(padding, str):
padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Non-zero pool_padding(%s) in the batch or channel dimensions "
"is not supported." % str(padding))
padding = padding[2:5]
padding = [ele for a_list in padding for ele in a_list]
elif is_list_or_tuple(padding[0]) and (data_format == "NDHWC"):
if not (padding[0] == [0, 0] and padding[4] == [0, 0]):
"Unknown padding: '{}'. It can only be 'SAME' or 'VALID'.".
format(padding))
if padding == "VALID":
if ceil_mode != False:
raise ValueError(
"Non-zero pool_padding(%s) in the batch or channel dimensions "
"is not supported." % str(padding))
padding = padding[1:4]
padding = [ele for a_list in padding for ele in a_list]
padding = utils.convert_to_list(padding, 6, 'padding')
if utils._is_symmetric_padding(padding, 3):
padding = [padding[0], padding[2], padding[4]]
elif is_list_or_tuple(padding) and len(padding) == 6:
padding = utils.convert_to_list(padding, 6, 'padding')
if utils._is_symmetric_padding(padding, 3):
padding = [padding[0], padding[2], padding[4]]
"When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. "
"Received ceil_mode: True.")
padding_algorithm = "VALID"
padding = [0] * num_dims
else:
padding_algorithm = "SAME"
padding = [0] * num_dims
elif _is_list_or_tuple(padding):
# for padding like
# [(pad_before, pad_after), (pad_before, pad_after), ...]
# padding for batch_dim and channel_dim included
if len(padding) == 2 + num_dims and _is_list_or_tuple(padding[0]):
if not _zero_padding_in_batch_and_channel(padding, channel_last):
raise ValueError(
"Non-zero padding({}) in the batch or channel dimensions "
"is not supported.".format(padding))
padding_algorithm = "EXPLICIT"
padding = _exclude_padding_in_batch_and_channel(padding,
channel_last)
if utils._is_symmetric_padding(padding, num_dims):
padding = padding[0::2]
# for padding like [pad_before, pad_after, pad_before, pad_after, ...]
elif len(padding) == 2 * num_dims and isinstance(padding[0], int):
padding_algorithm = "EXPLICIT"
padding = utils.convert_to_list(padding, 2 * num_dims, 'padding')
if utils._is_symmetric_padding(padding, num_dims):
padding = padding[0::2]
# for padding like [pad_d1, pad_d2, ...]
elif len(padding) == num_dims and isinstance(padding[0], int):
padding_algorithm = "EXPLICIT"
padding = utils.convert_to_list(padding, num_dims, 'padding')
else:
raise ValueError("Invalid padding: {}".format(padding))
# for integer padding
else:
padding = utils.convert_to_list(padding, 3, 'padding')
padding_algorithm = "EXPLICIT"
padding = utils.convert_to_list(padding, num_dims, 'padding')
return padding, padding_algorithm
def _expand_low_nd_padding(padding):
#1d to 2d fake input
if len(padding) == 2:
padding = [0] * 2 + padding
elif len(padding) == 1:
padding = [0] + padding
else:
raise ValueError(
"The size of padding's dimmention should be 1 or 2. But got padding={}".
format(padding))
return padding
......@@ -147,72 +169,56 @@ def avg_pool1d(x,
ceil_mode=False,
name=None):
"""
This operation applies a 1D average pooling over an input signal composed
of several input planes, based on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
The output value of the layer with input size (N, C, L),
output (N, C, L_{out}) and kernel_size k can be precisely described as
For average pool1d:
.. math::
Output(N_i, C_i, l) &= mean(Input[N_i, C_i, stride \times l:stride \times l+k])
This API implements average pooling 1d operation,
See more details in :ref:`api_nn_pooling_AvgPool1d` .
Args:
x (Tensor): The input tensor of pooling operator which is a 3-D tensor with
shape [N, C, L]. where `N` is batch size, `C` is the number of channels,
`L` is the length of the feature. The data type if float32 or float64.
`L` is the length of the feature. The data type is float32 or float64.
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one integers.
it must contain an integer.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain one integers.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
it could be the following forms: `[pad_left, pad_right]`. If padding is non-zero,
then the input is implicitly zero-padded on both sides for padding number of points.
it must contain an integer.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An int, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is `true`.
mode, default is `True`.
ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width.
If it is set to False, the floor function will be used. Default False
If it is set to False, the floor function will be used. The default value is False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output tensor of pooling result. The data type is same as input tensor.
Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ValueError: If `padding` is a list or tuple but its length greater than 1.
ShapeError: If the input is not a 3-D.
ValueError: If `padding` is a list or tuple but its length is greater than 1.
ShapeError: If the input is not a 3-D tensor.
ShapeError: If the output's shape calculated is not greater than 0.
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
pool_out = F.avg_pool1d(data, kernel_size=2, stride=2, padding=0)
# pool_out shape: [1, 3, 16]
out = F.avg_pool1d(data, kernel_size=2, stride=2, padding=0)
# out shape: [1, 3, 16]
"""
"""NCL to NCHW"""
data_format = "NCHW"
check_variable_and_dtype(x, 'input', ['float32', 'float64'], 'avg_pool1d')
check_input(x, 3)
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool1d')
_check_input(x, 3)
x = unsqueeze(x, [2])
kernel_size = utils.convert_to_list(kernel_size, 1, 'pool_size')
kernel_size = utils.convert_to_list(kernel_size, 1, 'kernel_size')
kernel_size = [1] + kernel_size
if stride is None:
stride = kernel_size
......@@ -220,33 +226,20 @@ def avg_pool1d(x,
stride = utils.convert_to_list(stride, 1, 'pool_stride')
stride = [1] + stride
padding_algorithm = "EXPLICIT"
if isinstance(padding, str):
padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(padding))
if padding == "VALID":
padding_algorithm = "VALID"
padding = [0]
if ceil_mode != False:
raise ValueError(
"When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. "
"Received ceil_mode: True.")
elif padding == "SAME":
padding_algorithm = "SAME"
padding = [0]
channel_last = _channel_last("NCL", 1)
padding, padding_algorithm = _update_padding_nd(
padding, 1, channel_last=channel_last, ceil_mode=ceil_mode)
padding = update_padding1d(padding, "avg")
# use 2d to implenment 1d should expand padding in advance.
padding = _expand_low_nd_padding(padding)
if in_dygraph_mode():
output = core.ops.pool2d(
x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
False, 'strides', stride, 'paddings', padding, 'padding_algorithm',
padding_algorithm, 'use_cudnn', not count_include_pad, 'ceil_mode',
ceil_mode, 'use_mkldnn', False, 'exclusive', True, 'data_format',
data_format)
padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
'use_mkldnn', False, 'exclusive', not count_include_pad,
'data_format', data_format)
return squeeze(output, [2])
op_type = 'pool2d'
......@@ -275,126 +268,103 @@ def avg_pool1d(x,
return squeeze(pool_out, [2])
def max_pool1d(x,
def avg_pool2d(x,
kernel_size,
stride=None,
padding=0,
return_indices=False,
ceil_mode=False,
count_include_pad=True,
divisor_override=None,
data_format="NCHW",
name=None):
"""
Applies a 1D max pooling over an input signal composed of several input planes based
on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output value of the layer with input size (N, C, L),
output (N, C, L_{out}) and kernel_size k can be precisely described as
For average pool1d:
.. math::
Output(N_i, C_i, l) &= max(Input[N_i, C_i, stride \times l:stride \times l+k])}
This API implements average pooling 2d operation.
See more details in :ref:`api_nn_pooling_AvgPool2d` .
Args:
x (Tensor): The input tensor of pooling operator which is a 3-D tensor with
shape [N, C, L], where `N` is batch size, `C` is the number of channels,
`L` is the length of the feature. The data type if float32 or float64.
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one integers.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain one integers.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
it could be the following forms: `[pad_left, pad_right]`.
return_indices (bool): Whether return the max indices along with the outputs. default is `False`.
ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
If it is set to False, the floor function will be used. Default False.
x (Tensor): The input tensor of pooling operator which is a 4-D tensor with
shape [N, C, H, W]. The format of input tensor is `"NCHW"` or
`"NHWC"`, where `N` is batch size, `C` is the number of channels,
`H` is the height of the feature, and `W` is the width of the
feature. The data type if float32 or float64.
kernel_size (int|list|tuple): The pool kernel size. If it is a tuple or list,
it must contain two integers, (kernel_size_Height, kernel_size_Width).
Otherwise, the pool kernel size will be a square of an int.
stride (int|list|tuple): The stride size. If it is a tuple or list,
it must contain two integers, (stride_Height, stride_Width).
Otherwise, the stride size will be a square of an int.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An int, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is `true`.
divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output tensor of pooling result. The data type is same as input tensor.
Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ValueError: If `padding` is a list or tuple but its length greater than 1.
ShapeError: If the input is not a 3-D.
ShapeError: If the output's shape calculated is not greater than 0.
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
import numpy as np
paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0)
# pool_out shape: [1, 3, 16]
pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_indices=True)
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
# avg pool2d
x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
out = F.avg_pool2d(x,
kernel_size=2,
stride=2, padding=0)
# out.shape [1, 3, 16, 16]
"""
"""NCL to NCHW"""
data_format = "NCHW"
check_variable_and_dtype(x, 'input', ['float32', 'float64'], 'max_pool1d')
check_input(x, 3)
x = unsqueeze(x, [2])
kernel_size = [1] + utils.convert_to_list(kernel_size, 1, 'pool_size')
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool2d')
kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size')
if stride is None:
stride = kernel_size
else:
stride = [1] + utils.convert_to_list(stride, 1, 'pool_stride')
padding_algorithm = "EXPLICIT"
if isinstance(padding, str):
padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(padding))
if padding == "VALID":
padding_algorithm = "VALID"
padding = [0]
if ceil_mode != False:
raise ValueError(
"When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. "
"Received ceil_mode: True.")
elif padding == "SAME":
padding_algorithm = "SAME"
padding = [0]
stride = utils.convert_to_list(stride, 2, 'pool_stride')
padding = update_padding1d(padding, 'max')
channel_last = _channel_last(data_format, 2)
padding, padding_algorithm = _update_padding_nd(
padding, 2, channel_last, ceil_mode=ceil_mode)
if in_dygraph_mode():
pool_out = core.ops.max_pool2d_with_index(
x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride,
'paddings', padding, 'padding_algorithm', padding_algorithm,
'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False,
'exclusive', True, 'data_format', data_format)
return (squeeze(pool_out[0], [2]), squeeze(
pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2])
output = core.ops.pool2d(
x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
False, 'padding_algorithm', padding_algorithm, 'strides', stride,
'paddings', padding, 'use_cudnn', True, 'ceil_mode', ceil_mode,
'use_mkldnn', False, 'exclusive', not count_include_pad,
'data_format', data_format)
if divisor_override is None:
return output
else:
_check_instance(divisor_override, "divisor_override")
return output * (kernel_size[0] * kernel_size[1]) / divisor_override
op_type = 'max_pool2d_with_index'
op_type = 'pool2d'
helper = LayerHelper(op_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
mask = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out, "Mask": mask}
helper.append_op(
type=op_type,
inputs={"X": x},
outputs=outputs,
outputs={"Out": pool_out},
attrs={
"pooling_type": 'max',
"pooling_type": "avg",
"ksize": kernel_size,
"global_pooling": False,
"strides": stride,
......@@ -403,335 +373,211 @@ def max_pool1d(x,
"use_cudnn": True,
"ceil_mode": ceil_mode,
"use_mkldnn": False,
"exclusive": True,
"exclusive": not count_include_pad,
"data_format": data_format,
})
return (squeeze(pool_out, [2]),
squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2])
if divisor_override is None:
return pool_out
else:
_check_instance(divisor_override, "divisor_override")
return pool_out * (kernel_size[0] * kernel_size[1]) / divisor_override
def adaptive_avg_pool1d(x, output_size, name=None):
def avg_pool3d(x,
kernel_size,
stride=None,
padding=0,
ceil_mode=False,
count_include_pad=False,
divisor_override=None,
data_format="NCDHW",
name=None):
"""
This operation applies a 1D adaptive average pooling over an input signal composed
of several input planes, based on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
For average adaptive pool1d:
.. math::
lstart &= floor(i * L_{in} / L_{out})
lend &= ceil((i + 1) * L_{in} / L_{out})
Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)}
This API implements average pooling 3d operation.
See more details in :ref:`api_nn_pooling_AvgPool3d` .
Args:
x (Tensor): The input tensor of pooling operator, which is a 3-D tensor
with shape [N, C, L]. The format of input tensor is NCL,
where N is batch size, C is the number of channels, L is the
length of the feature. The data type is float32 or float64.
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one int.
x (Tensor): The input tensor of pooling operator, which is a 5-D tensor with
shape [N, C, D, H, W], where `N` represents the batch size, `C` represents
the number of channels, `D`, `H` and `W` represent the depth, height and width of the feature respectively.
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
is a tuple or list, it must contain three integers,
(kernel_size_Depth, kernel_size_Height, kernel_size_Width).
Otherwise, the pool kernel size will be the cube of an int.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain three integers, [stride_Depth, stride_Height, stride_Width).
Otherwise, the pool stride size will be a cube of an int.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An int, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
ceil_mode (bool): ${ceil_mode_comment}
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is True.
divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_depth, input_height, input_width]`.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output tensor of adaptive average pooling result. The data type is same
as input tensor.
Tensor: The output tensor of pooling result. The data type is same as input tensor.
Raises:
ValueError: 'output_size' should be a integer or list or tuple with length as 1.
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0.
Examples:
.. code-block:: python
# average adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
#
import paddle.fluid as fluid
import paddle
import paddle.nn.functional as F
paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
pool_out = F.adaptive_average_pool1d(data, output_size=16)
# pool_out shape: [1, 3, 16])
x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
# avg pool3d
out = paddle.nn.functional.avg_pool3d(
x,
kernel_size = 2,
stride = 2,
padding=0)
# out.shape: [1, 3, 16, 16, 16]
"""
pool_type = 'avg'
check_variable_and_dtype(x, 'input', ['float32', 'float64'],
'adaptive_pool2d')
check_input(x, 3)
check_type(output_size, 'pool_size', (int), 'adaptive_pool1d')
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d')
kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size')
if stride is None:
stride = kernel_size
else:
stride = utils.convert_to_list(stride, 3, 'pool_stride')
pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
channel_last = _channel_last(data_format, 3)
padding, padding_algorithm = _update_padding_nd(
padding, 3, channel_last=channel_last, ceil_mode=ceil_mode)
l_type = "pool2d"
x = unsqueeze(x, [2])
if in_dygraph_mode():
pool_out = core.ops.pool2d(x, 'pooling_type', pool_type, 'ksize',
pool_size, 'adaptive', True)
return squeeze(pool_out, [2])
output = core.ops.pool3d(
x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', stride,
'paddings', padding, 'global_pooling', False, 'padding_algorithm',
padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
'use_mkldnn', False, 'exclusive', not count_include_pad,
'data_format', data_format)
if divisor_override is None:
return output
else:
_check_instance(divisor_override, "divisor_override")
return output * (kernel_size[0] * kernel_size[1] *
kernel_size[2]) / divisor_override
helper = LayerHelper(l_type, **locals())
op_type = "pool3d"
helper = LayerHelper(op_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out}
helper.append_op(
type=l_type,
inputs={"X": x},
outputs=outputs,
attrs={
"pooling_type": pool_type,
"ksize": pool_size,
"adaptive": True,
})
return squeeze(pool_out, [2])
def adaptive_max_pool1d(x, output_size, return_indices=False, name=None):
"""
This operation applies a 1D adaptive max pooling over an input signal composed
of several input planes, based on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
For max adaptive pool1d:
.. math::
lstart &= floor(i * L_{in} / L_{out})
lend &= ceil((i + 1) * L_{in} / L_{out})
Output(i) &= max(Input[lstart:lend])}
Args:
x (Tensor): The input tensor of pooling operator, which is a 3-D tensor
with shape [N, C, L]. The format of input tensor is NCL,
where N is batch size, C is the number of channels, L is the
length of the feature. The data type is float32 or float64.
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one int.
return_indices (bool): If true, the index of max pooling point will be returned along
with outputs. It cannot be set in average pooling type. Default False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output tensor of adaptive pooling result. The data type is same
as input tensor.
Raises:
ValueError: 'output_size' should be a integer or list or tuple with length as 1.
Examples:
.. code-block:: python
# max adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = max(input[:, :, lstart: lend])
#
import paddle
import paddle.nn.functional as F
paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
pool_out = F.adaptive_max_pool1d(data, output_size=16)
# pool_out shape: [1, 3, 16])
pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_indices=True)
# pool_out shape: [1, 3, 16] indices shape: [1, 3, 16]
"""
pool_type = 'max'
check_variable_and_dtype(x, 'input', ['float32', 'float64'],
'adaptive_max_pool1d')
check_input(x, 3)
check_type(output_size, 'pool_size', (int), 'adaptive_max_pool1d')
check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool1d')
pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
l_type = 'max_pool2d_with_index'
x = unsqueeze(x, [2])
if in_dygraph_mode():
pool_out = core.ops.max_pool2d_with_index(
x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True)
return (squeeze(pool_out[0], [2]), squeeze(
pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2])
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
mask = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out, "Mask": mask}
helper.append_op(
type=l_type,
type=op_type,
inputs={"X": x},
outputs=outputs,
attrs={
"pooling_type": pool_type,
"ksize": pool_size,
"adaptive": True,
"pooling_type": 'avg',
"ksize": kernel_size,
"global_pooling": False,
"strides": stride,
"paddings": padding,
"padding_algorithm": padding_algorithm,
"use_cudnn": True,
"ceil_mode": ceil_mode,
"use_mkldnn": False,
"exclusive": not count_include_pad,
"data_format": data_format,
})
return (squeeze(pool_out, [2]),
squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2])
if divisor_override is None:
return pool_out
else:
_check_instance(divisor_override, "divisor_override")
return pool_out * (kernel_size[0] * kernel_size[1] *
kernel_size[2]) / divisor_override
def max_pool2d(x,
def max_pool1d(x,
kernel_size,
stride=None,
padding=0,
return_indices=False,
ceil_mode=False,
data_format="NCHW",
name=None):
"""
This operation applies 2D max pooling over input feature based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCHW format, where N is batch size, C is the number of channels,
H is the height of the feature, and W is the width of the feature.
Example:
Input:
X shape: $(N, C, H_{in}, W_{in})$
Attr:
kernel_size: ksize
stride: stride
Output:
Out shape: $(N, C, H_{out}, W_{out})$
$$
out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\
& \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
\text{stride[1]} \times w + n)
$$
This API implements max pooling 1d opereation.
See more details in :ref:`api_nn_pooling_MaxPool1d` .
Args:
x (Tensor): The input tensor of pooling operator which is a 4-D tensor with
shape [N, C, H, W]. The format of input tensor is `"NCHW"` or
`"NHWC"`, where `N` is batch size, `C` is the number of channels,
`H` is the height of the feature, and `W` is the width of the
feature. The data type if float32 or float64.
Args:
x (Tensor): The input tensor of pooling operator which is a 3-D tensor with
shape [N, C, L], where `N` is batch size, `C` is the number of channels,
`L` is the length of the feature. The data type if float32 or float64.
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be a square of an int.
it must contain an integer.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain two integers, (pool_stride_Height, pool_stride_Width).
Otherwise, the pool stride size will be a square of an int.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
it could be in three forms: `[pad_height, pad_width]` or
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`,
`pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
Otherwise, the pool padding size will be a square of an int.
ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
return_indices (bool): Whether to return the max indices along with the outputs.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
it must contain an integer.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An integer, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
return_indices (bool): Whether return the max indices along with the outputs. default is `False`.
ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
If it is set to False, the floor function will be used. Default False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output tensor of pooling result. The data type is same as input tensor.
Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the input is not a 3-D tensor.
ShapeError: If the output's shape calculated is not greater than 0.
Examples:
.. code-block:: python
import paddle
import paddle.nn.functional as F
import numpy as np
paddle.disable_static()
# max pool2d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
output = F.max_pool2d(input,
kernel_size=2,
stride=2, padding=0)
# output.shape [1, 3, 16, 16]
# for return_indices=True
output, max_indices = F.max_pool2d(input,
kernel_size=2,
stride=2,
padding=0,
return_indices=True)
# output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0)
# pool_out shape: [1, 3, 16]
pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_indices=True)
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
"""
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool2d')
kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size')
"""NCL to NCHW"""
data_format = "NCHW"
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool1d')
_check_input(x, 3)
x = unsqueeze(x, [2])
kernel_size = [1] + utils.convert_to_list(kernel_size, 1, 'pool_size')
if stride is None:
stride = kernel_size
else:
stride = utils.convert_to_list(stride, 2, 'pool_stride')
stride = [1] + utils.convert_to_list(stride, 1, 'pool_stride')
if data_format not in ["NCHW", "NHWC"]:
raise ValueError(
"Attr(data_format) should be 'NCHW' or 'NHWC'. Received "
"Attr(data_format): %s." % str(data_format))
padding_algorithm = "EXPLICIT"
if isinstance(padding, str):
padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(padding))
if padding == "VALID":
padding_algorithm = "VALID"
padding = [0, 0]
if ceil_mode != False:
raise ValueError(
"When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. "
"Received ceil_mode: True.")
elif padding == "SAME":
padding_algorithm = "SAME"
padding = [0, 0]
padding, padding_algorithm = _update_padding_nd(
padding, 1, ceil_mode=ceil_mode)
padding = update_padding2d(padding, data_format)
# use 2d to implenment 1d should expand padding in advance.
padding = _expand_low_nd_padding(padding)
if in_dygraph_mode():
output = core.ops.max_pool2d_with_index(
pool_out = core.ops.max_pool2d_with_index(
x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride,
'paddings', padding, 'padding_algorithm', padding_algorithm,
'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False,
'exclusive', True, 'data_format', data_format)
return output if return_indices else output[0]
return (squeeze(pool_out[0], [2]), squeeze(
pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2])
op_type = 'max_pool2d_with_index'
helper = LayerHelper(op_type, **locals())
......@@ -758,36 +604,21 @@ def max_pool2d(x,
"data_format": data_format,
})
return (pool_out, mask) if return_indices else pool_out
return (squeeze(pool_out, [2]),
squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2])
def avg_pool2d(x,
def max_pool2d(x,
kernel_size,
stride=None,
padding=0,
return_indices=False,
ceil_mode=False,
count_include_pad=True,
divisor_override=None,
data_format="NCHW",
name=None):
"""
This operation applies 2D average pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCHW format, where N is batch size, C is the number of channels,
H is the height of the feature, and W is the width of the feature.
Example:
Input:
X shape: $(N, C, H_{in}, W_{in})$
Attr:
kernel_size: ksize
Output:
Out shape: $(N, C, H_{out}, W_{out})$
$$
out(N_i, C_j, h, w) = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1}
input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
$$
This API implements max pooling 2d operation.
See more details in :ref:`api_nn_pooling_MaxPool2d` .
Args:
x (Tensor): The input tensor of pooling operator which is a 4-D tensor with
......@@ -796,30 +627,26 @@ def avg_pool2d(x,
`H` is the height of the feature, and `W` is the width of the
feature. The data type if float32 or float64.
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (pool_size_Height, pool_size_Width).
it must contain two integers, (kernel_size_Height, kernel_size_Width).
Otherwise, the pool kernel size will be a square of an int.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain two integers, (pool_stride_Height, pool_stride_Width).
it must contain two integers, (stride_Height, stride_Width).
Otherwise, the pool stride size will be a square of an int.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
it could be in three forms: `[pad_height, pad_width]` or
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`,
`pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
Otherwise, the pool padding size will be a square of an int.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An int, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is `true`.
divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
return_indices (bool): Whether to return the max indices along with the outputs.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output tensor of pooling result. The data type is same as input tensor.
Raises:
......@@ -832,87 +659,71 @@ def avg_pool2d(x,
import paddle.nn.functional as F
import numpy as np
paddle.disable_static()
# avg pool2d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
output = F.avg_pool2d(input,
# max pool2d
x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
out = F.max_pool2d(x,
kernel_size=2,
stride=2, padding=0)
# output.shape [1, 3, 16, 16]
# for return_indices=True
out, max_indices = F.max_pool2d(x,
kernel_size=2,
stride=2,
padding=0,
return_indices=True)
# out.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
"""
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool2d')
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool2d')
kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size')
if stride is None:
stride = kernel_size
else:
stride = utils.convert_to_list(stride, 2, 'pool_stride')
padding_algorithm = "EXPLICIT"
if isinstance(padding, str):
padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(padding))
if padding == "VALID":
padding_algorithm = "VALID"
padding = [0, 0]
if ceil_mode != False:
raise ValueError(
"When Attr(pool_padding) is \"VALID\", Attr(ceil_mode) must be False. "
"Received ceil_mode: True.")
elif padding == "SAME":
padding_algorithm = "SAME"
padding = [0, 0]
if data_format not in ["NCHW", "NHWC"]:
raise ValueError(
"Attr(data_format) should be 'NCHW' or 'NHWC'. Received "
"Attr(data_format): %s." % str(data_format))
pool_padding = update_padding2d(padding, data_format)
channel_last = True if data_format == "NHWC" else False
padding, padding_algorithm = _update_padding_nd(
padding, num_dims=2, channel_last=channel_last, ceil_mode=ceil_mode)
if in_dygraph_mode():
output = core.ops.pool2d(
x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
False, 'padding_algorithm', padding_algorithm, 'strides', stride,
'paddings', pool_padding, 'use_cudnn', True, 'ceil_mode', ceil_mode,
'use_mkldnn', False, 'exclusive', not count_include_pad,
'data_format', data_format)
if divisor_override is None:
return output
else:
check_instance(divisor_override, "divisor_override")
return output * (kernel_size[0] * kernel_size[1]) / divisor_override
output = core.ops.max_pool2d_with_index(
x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride,
'paddings', padding, 'padding_algorithm', padding_algorithm,
'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False,
'exclusive', True, 'data_format', data_format)
return output if return_indices else output[0]
op_type = 'pool2d'
op_type = 'max_pool2d_with_index'
helper = LayerHelper(op_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
mask = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out, "Mask": mask}
helper.append_op(
type=op_type,
inputs={"X": x},
outputs={"Out": pool_out},
outputs=outputs,
attrs={
"pooling_type": "avg",
"pooling_type": 'max',
"ksize": kernel_size,
"global_pooling": False,
"strides": stride,
"paddings": pool_padding,
"paddings": padding,
"padding_algorithm": padding_algorithm,
"use_cudnn": True,
"ceil_mode": ceil_mode,
"use_mkldnn": False,
"exclusive": not count_include_pad,
"exclusive": True,
"data_format": data_format,
})
if divisor_override is None:
return pool_out
else:
check_instance(divisor_override, "divisor_override")
return pool_out * (kernel_size[0] * kernel_size[1]) / divisor_override
return (pool_out, mask) if return_indices else pool_out
def max_pool3d(x,
......@@ -924,47 +735,25 @@ def max_pool3d(x,
data_format="NCDHW",
name=None):
"""
This operation applies 3D max pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCDHW format, where N is batch size, C is the number of channels,
H is the height of the feature, D is the depth of the feature, and W is the width of the feature.
Example:
Input:
X shape: $(N, C, D_{in}, H_{in}, W_{in})$
Attr:
kernel_size: ksize
Output:
Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
$$
\text{out}(N_i, C_j, d, h, w) ={} & \max_{k=0, \ldots, ksize[0]-1} \max_{m=0, \ldots, ksize[1]-1} \max_{n=0, \ldots, ksize[2]-1} \\
& \text{input}(N_i, C_j, \text{stride[0]} \times d + k,
\text{stride[1]} \times h + m, \text{stride[2]} \times w + n)
$$
This API implements max pooling 2d operation.
See more details in :ref:`api_nn_pooling_MaxPool3d` .
Args:
x (Tensor): The input tensor of pooling operator, which is a 5-D tensor with
shape [N, C, D, H, W]. The format of
input tensor is `"NCDHW"` or `"NDHWC"`, where `N` is batch size, `C` is
the number of channels, `D` is the depth of the feature,
`H` is the height of the feature, and `W` is the width
of the feature.
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
shape [N, C, D, H, W]. The format of input tensor is `"NCDHW"` or `"NDHWC"`, where N represents batch size, C represents the number of channels, D, H and W represent the depth, height and width of the feature respectively.
kernel_size (int|list|tuple): The pool kernel size. If the kernel size
is a tuple or list, it must contain three integers,
(pool_size_Depth, pool_size_Height, pool_size_Width).
(kernel_size_Depth, kernel_size_Height, kernel_size_Width).
Otherwise, the pool kernel size will be the cube of an int.
stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool stride size is a tuple or list,
it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain three integers, [stride_Depth, stride_Height, stride_Width).
Otherwise, the pool stride size will be a cube of an int.
padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list,
it could be in three forms: `[pad_depth, pad_height, pad_width]` or
`[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
`[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An int, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
ceil_mode (bool): ${ceil_mode_comment}
return_indices (bool): Whether to return the max indices along with the outputs.
data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
......@@ -973,7 +762,6 @@ def max_pool3d(x,
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output tensor of pooling result. The data type is same as input tensor.
Raises:
......@@ -986,23 +774,20 @@ def max_pool3d(x,
import paddle.nn.functional as F
import numpy as np
paddle.disable_static()
# max pool3d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
output = F.max_pool2d(input,
x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
output = F.max_pool2d(x,
kernel_size=2,
stride=2, padding=0)
output.shape [1, 3, 16, 16, 16]
# for return_indices=True
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
output, max_indices = paddle.nn.functional.max_pool3d(input,
x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
output, max_indices = paddle.nn.functional.max_pool3d(x,
kernel_size = 2,
stride = 2,
padding=0,
return_indices=True)
# output.shape [None, 3, 16, 16, 16], max_indices.shape [None, 3, 16, 16, 16],
"""
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d')
kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size')
......@@ -1011,29 +796,10 @@ def max_pool3d(x,
else:
stride = utils.convert_to_list(stride, 3, 'pool_stride')
padding_algorithm = "EXPLICIT"
if isinstance(padding, str):
padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(padding))
if padding == "VALID":
padding_algorithm = "VALID"
padding = [0, 0, 0]
if ceil_mode != False:
raise ValueError(
"When Attr(pool_padding) is \"VALID\", ceil_mode must be False. "
"Received ceil_mode: True.")
elif padding == "SAME":
padding_algorithm = "SAME"
padding = [0, 0, 0]
channel_last = _channel_last(data_format, 3)
if data_format not in ["NCDHW", "NDHWC"]:
raise ValueError(
"Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
"Attr(data_format): %s" % str(data_format))
padding = update_padding3d(padding, data_format)
padding, padding_algorithm = _update_padding_nd(
padding, 3, channel_last=channel_last, ceil_mode=ceil_mode)
if in_dygraph_mode():
output = core.ops.max_pool3d_with_index(
......@@ -1071,170 +837,83 @@ def max_pool3d(x,
return (pool_out, mask) if return_indices else pool_out
def avg_pool3d(x,
kernel_size,
stride=None,
padding=0,
ceil_mode=False,
count_include_pad=False,
divisor_override=None,
data_format="NCDHW",
name=None):
def adaptive_avg_pool1d(x, output_size, name=None):
"""
This operation applies 3D max pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCDHW format, where N is batch size, C is the number of channels,
H is the height of the feature, D is the depth of the feature, and W is the width of the feature.
This API implements adaptive average pooling 1d operation.
See more details in :ref:`api_nn_pooling_AdaptiveAvgPool1d` .
Args:
input (Tensor): The input tensor of pooling operator, which is a 5-D tensor with
shape [N, C, D, H, W], where `N` is batch size, `C` is
the number of channels, `D` is the depth of the feature,
`H` is the height of the feature, and `W` is the width
of the feature.
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
is a tuple or list, it must contain three integers,
(pool_size_Depth, pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be the cube of an int.
stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool stride size is a tuple or list,
it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`.
Otherwise, the pool stride size will be a cube of an int.
padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list,
it could be in three forms: `[pad_depth, pad_height, pad_width]` or
`[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
`[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
ceil_mode (bool): ${ceil_mode_comment}
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is True.
divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_depth, input_height, input_width]`.
x (Tensor): The input tensor of pooling operator, which is a 3-D tensor
with shape [N, C, L]. The format of input tensor is NCL,
where N is batch size, C is the number of channels, L is the
length of the feature. The data type is float32 or float64.
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one int.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output tensor of pooling result. The data type is same as input tensor.
Tensor: The output tensor of adaptive average pooling result. The data type is same
as input tensor.
Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0.
ValueError: 'output_size' should be an integer or list or tuple with length as 1.
Examples:
.. code-block:: python
import paddle.fluid as fluid
# average adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
#
import paddle
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
# avg pool3d
pool3d = paddle.nn.functional.avg_pool3d(
input,
kernel_size = 2,
stride = 2,
padding=0)
# pool3d.shape: [1, 3, 16, 16, 16]
import paddle.nn.functional as F
paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
pool_out = F.adaptive_average_pool1d(data, output_size=16)
# pool_out shape: [1, 3, 16])
"""
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d')
kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size')
if stride is None:
stride = kernel_size
else:
stride = utils.convert_to_list(stride, 3, 'pool_stride')
padding_algorithm = "EXPLICIT"
if isinstance(padding, str):
padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(padding))
if padding == "VALID":
padding_algorithm = "VALID"
padding = [0, 0, 0]
if ceil_mode != False:
raise ValueError(
"When Attr(pool_padding) is \"VALID\", ceil_mode must be False. "
"Received ceil_mode: True.")
elif padding == "SAME":
padding_algorithm = "SAME"
padding = [0, 0, 0]
pool_type = 'avg'
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'adaptive_pool2d')
_check_input(x, 3)
check_type(output_size, 'pool_size', (int), 'adaptive_pool1d')
if data_format not in ["NCDHW", "NDHWC"]:
raise ValueError(
"Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
"Attr(data_format): %s" % str(data_format))
padding = update_padding3d(padding, data_format)
pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
l_type = "pool2d"
x = unsqueeze(x, [2])
if in_dygraph_mode():
output = core.ops.pool3d(
x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', stride,
'paddings', padding, 'global_pooling', False, 'padding_algorithm',
padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
'use_mkldnn', False, 'exclusive', not count_include_pad,
'data_format', data_format)
if divisor_override is None:
return output
else:
check_instance(divisor_override, "divisor_override")
return output * (kernel_size[0] * kernel_size[1] *
kernel_size[2]) / divisor_override
pool_out = core.ops.pool2d(x, 'pooling_type', pool_type, 'ksize',
pool_size, 'adaptive', True)
return squeeze(pool_out, [2])
op_type = "pool3d"
helper = LayerHelper(op_type, **locals())
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out}
outputs = {"Out": pool_out}
helper.append_op(
type=op_type,
type=l_type,
inputs={"X": x},
outputs=outputs,
attrs={
"pooling_type": 'avg',
"ksize": kernel_size,
"global_pooling": False,
"strides": stride,
"paddings": padding,
"padding_algorithm": padding_algorithm,
"use_cudnn": True,
"ceil_mode": ceil_mode,
"use_mkldnn": False,
"exclusive": not count_include_pad,
"data_format": data_format,
"pooling_type": pool_type,
"ksize": pool_size,
"adaptive": True,
})
if divisor_override is None:
return pool_out
else:
check_instance(divisor_override, "divisor_override")
return pool_out * (kernel_size[0] * kernel_size[1] *
kernel_size[2]) / divisor_override
return squeeze(pool_out, [2])
def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
"""
This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size.
See more detail in :ref:`api_nn_pooling_AdaptiveAvgPool2d` .
For avg adaptive pool2d:
.. math::
hstart &= floor(i * H_{in} / H_{out})
hend &= ceil((i + 1) * H_{in} / H_{out})
wstart &= floor(j * W_{in} / W_{out})
wend &= ceil((j + 1) * W_{in} / W_{out})
Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
This API implements adaptive average pooling 2d operation.
See more details in :ref:`api_nn_pooling_AdaptiveAvgPool2d` .
Args:
x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor.
......@@ -1248,16 +927,12 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output tensor of avg adaptive pool2d result. The data type is same as input tensor.
Raises:
ValueError: If `data_format` is not "NCHW" or "NHWC".
Examples:
.. code-block:: python
# adaptive avg pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions
......@@ -1279,10 +954,10 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
input_data = np.random.rand(2, 3, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 32, 32]
pool_out = paddle.nn.functional.adaptive_avg_pool2d(
out = paddle.nn.functional.adaptive_avg_pool2d(
x = x,
output_size=[3, 3])
# pool_out.shape is [2, 3, 3, 3]
# out.shape is [2, 3, 3, 3]
"""
if not in_dygraph_mode():
check_variable_and_dtype(x, 'x', ['float32', 'float64'],
......@@ -1337,28 +1012,8 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
"""
This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size.
See more detail in :ref:`api_nn_pooling_AdaptiveAvgPool3d` .
For avg adaptive pool3d:
.. math::
dstart &= floor(i * D_{in} / D_{out})
dend &= ceil((i + 1) * D_{in} / D_{out})
hstart &= floor(j * H_{in} / H_{out})
hend &= ceil((j + 1) * H_{in} / H_{out})
wstart &= floor(k * W_{in} / W_{out})
wend &= ceil((k + 1) * W_{in} / W_{out})
Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
This API implements adaptive average pooling 3d operation.
See more details in :ref:`api_nn_pooling_AdaptiveAvgPool3d` .
Args:
x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor.
......@@ -1372,16 +1027,12 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output tensor of avg adaptive pool3d result. The data type is same as input tensor.
Raises:
ValueError: If `data_format` is not "NCDHW" or "NDHWC".
Examples:
.. code-block:: python
# adaptive avg pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
......@@ -1406,10 +1057,10 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
input_data = np.random.rand(2, 3, 8, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 8, 32, 32]
pool_out = paddle.nn.functional.adaptive_avg_pool3d(
out = paddle.nn.functional.adaptive_avg_pool3d(
x = x,
output_size=[3, 3, 3])
# pool_out.shape is [2, 3, 3, 3, 3]
# out.shape is [2, 3, 3, 3, 3]
"""
if not in_dygraph_mode():
check_variable_and_dtype(x, 'x', ['float32', 'float64'],
......@@ -1461,3 +1112,257 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
})
return pool_out
def adaptive_max_pool1d(x, output_size, return_indices=False, name=None):
"""
This API implements adaptive max pooling 1d operation.
See more details in :ref:`api_nn_pooling_AdaptiveMaxPool1d` .
Args:
x (Tensor): The input tensor of pooling operator, which is a 3-D tensor
with shape [N, C, L]. The format of input tensor is NCL,
where N is batch size, C is the number of channels, L is the
length of the feature. The data type is float32 or float64.
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one int.
return_indices (bool): If true, the index of max pooling point will be returned along
with outputs. It cannot be set in average pooling type. Default False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output tensor of adaptive pooling result. The data type is same
as input tensor.
Raises:
ValueError: 'output_size' should be a integer or list or tuple with length as 1.
Examples:
.. code-block:: python
# max adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = max(input[:, :, lstart: lend])
#
import paddle
import paddle.nn.functional as F
paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
pool_out = F.adaptive_max_pool1d(data, output_size=16)
# pool_out shape: [1, 3, 16])
pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_indices=True)
# pool_out shape: [1, 3, 16] indices shape: [1, 3, 16]
"""
pool_type = 'max'
check_variable_and_dtype(x, 'x', ['float32', 'float64'],
'adaptive_max_pool1d')
_check_input(x, 3)
check_type(output_size, 'pool_size', (int), 'adaptive_max_pool1d')
check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool1d')
pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
l_type = 'max_pool2d_with_index'
x = unsqueeze(x, [2])
if in_dygraph_mode():
pool_out = core.ops.max_pool2d_with_index(
x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True)
return (squeeze(pool_out[0], [2]), squeeze(
pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2])
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
mask = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out, "Mask": mask}
helper.append_op(
type=l_type,
inputs={"X": x},
outputs=outputs,
attrs={
"pooling_type": pool_type,
"ksize": pool_size,
"adaptive": True,
})
return (squeeze(pool_out, [2]),
squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2])
def adaptive_max_pool2d(x, output_size, return_indices=False, name=None):
"""
This operation applies a 2D adaptive max pooling on input tensor.
See more details in :ref:`api_nn_pooling_AdaptiveMaxPool2d` .
Args:
x (Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type can be float16, float32, float64, int32 or int64.
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two elements, (H, W). H and W can be either a int, or None which means the size will be the same as that of the input.
return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default.
Returns:
Tensor: The output tensor of adaptive max pool2d result. The data type is same as input tensor.
Examples:
.. code-block:: python
# max adaptive pool2d
# suppose input data in the shape of [N, C, H, W], `output_size` is [m, n]
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m*n grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
#
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 32, 32]
out = paddle.nn.functional.adaptive_max_pool2d(
x = x,
output_size=[3, 3])
# out.shape is [2, 3, 3, 3]
"""
if not in_dygraph_mode():
check_variable_and_dtype(x, 'x', ['float32', 'float64'],
'adaptive_max_pool2d')
_check_input(x, 4)
#check_type(output_size, 'pool_size', (int), 'adaptive_max_pool2d')
check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool2d')
in_h, in_w = x.shape[2:4]
if isinstance(output_size, int):
output_size = utils.convert_to_list(output_size, 2, 'output_size')
else:
if output_size[0] == None:
output_size[0] = in_h
if output_size[1] == None:
output_size[1] = in_w
if in_dygraph_mode():
pool_out = core.ops.max_pool2d_with_index(
x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True)
return pool_out if return_indices else pool_out[0]
l_type = 'max_pool2d_with_index'
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
mask = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out, "Mask": mask}
helper.append_op(
type=l_type,
inputs={"X": x},
outputs=outputs,
attrs={
"pooling_type": 'max',
"ksize": output_size,
"adaptive": True,
})
#return (pool_out, mask) if return_indices else pool_out
return pool_out
def adaptive_max_pool3d(x, output_size, return_indices=False, name=None):
"""
This operation applies a 3D adaptive max pooling on input tensor.
See more details in :ref:`api_nn_pooling_AdaptiveMaxPool3d` .
Args:
x (Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type can be float32, float64.
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as that of the input.
return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default.
Returns:
Tensor: The output tensor of adaptive max pool3d result. The data type is same as input tensor.
Examples:
.. code-block:: python
# adaptive max pool3d
# suppose input data in the shape of [N, C, D, H, W], `output_size` is [l, m, n]
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into m*n grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
# dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l)
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j, k] = max(input[:, :, dstart: dend, hstart: hend, wstart: wend])
#
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 8, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 8, 32, 32]
out = paddle.nn.functional.adaptive_max_pool3d(
x = x,
output_size=[3, 3, 3])
# out.shape is [2, 3, 3, 3, 3]
"""
if not in_dygraph_mode():
check_variable_and_dtype(x, 'x', ['float32', 'float64'],
'adaptive_max_pool3d')
_check_input(x, 5)
#check_type(output_size, 'pool_size', (int), 'adaptive_max_pool3d')
check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool3d')
in_l, in_h, in_w = x.shape[2:5]
if isinstance(output_size, int):
output_size = utils.convert_to_list(output_size, 3, 'output_size')
else:
if output_size[0] == None:
output_size[0] = in_l
if output_size[1] == None:
output_size[1] = in_h
if output_size[2] == None:
output_size[2] = in_w
if in_dygraph_mode():
pool_out = core.ops.max_pool3d_with_index(
x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True)
return pool_out if return_indices else pool_out[0]
l_type = 'max_pool3d_with_index'
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
mask = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out, "Mask": mask}
helper.append_op(
type=l_type,
inputs={"X": x},
outputs=outputs,
attrs={
"pooling_type": 'max',
"ksize": output_size,
"adaptive": True,
})
return (pool_out, mask) if return_indices else pool_out
......@@ -66,16 +66,18 @@ from .common import Dropout #DEFINE_ALIAS
from .common import Dropout2D #DEFINE_ALIAS
from .common import Dropout3D #DEFINE_ALIAS
from .common import AlphaDropout #DEFINE_ALIAS
from .pooling import AdaptiveAvgPool2d #DEFINE_ALIAS
from .pooling import AdaptiveAvgPool3d #DEFINE_ALIAS
from .pooling import AvgPool1d #DEFINE_ALIAS
from .pooling import MaxPool1d #DEFINE_ALIAS
from .pooling import AdaptiveAvgPool1d #DEFINE_ALIAS
from .pooling import AdaptiveMaxPool1d #DEFINE_ALIAS
from .pooling import AvgPool2d #DEFINE_ALIAS
from .pooling import MaxPool2d #DEFINE_ALIAS
from .pooling import AvgPool3d #DEFINE_ALIAS
from .pooling import MaxPool1d #DEFINE_ALIAS
from .pooling import MaxPool2d #DEFINE_ALIAS
from .pooling import MaxPool3d #DEFINE_ALIAS
from .pooling import AdaptiveAvgPool1d #DEFINE_ALIAS
from .pooling import AdaptiveAvgPool2d #DEFINE_ALIAS
from .pooling import AdaptiveAvgPool3d #DEFINE_ALIAS
from .pooling import AdaptiveMaxPool1d #DEFINE_ALIAS
from .pooling import AdaptiveMaxPool2d #DEFINE_ALIAS
from .pooling import AdaptiveMaxPool3d #DEFINE_ALIAS
from .conv import Conv1d #DEFINE_ALIAS
from .conv import Conv2d #DEFINE_ALIAS
from .conv import Conv3d #DEFINE_ALIAS
......
......@@ -99,7 +99,8 @@ class _ConvNd(layers.Layer):
raise ValueError("in_channels must be divisible by groups.")
if padding_mode in {'reflect', 'replicate', 'circular'}:
_paired_padding = utils.convert_to_list(padding, 2, 'padding')
_paired_padding = utils.convert_to_list(padding, dims,
'padding')
self._reversed_padding_repeated_twice = _reverse_repeat_list(
_paired_padding, 2)
......@@ -318,62 +319,80 @@ class Conv2d(_ConvNd):
output of the convolution, and the corresponding activation function is
applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \\sigma (W \\ast X + b)
Out = \sigma (W \\ast X + b)
Where:
* :math:`X`: Input value, a ``Tensor`` with NCHW format.
* :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] .
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Parameters:
in_channels(int): The number of channels in the input image.
out_channels(int): The number of channels produced by convolution.
kernel_size (int|list|tuple): The size of convolution kernel.
stride (int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: 1.
in_channels(int): The number of input channels in the input image.
out_channels(int): The number of output channels produced by the convolution.
kernel_size(int|list|tuple, optional): The size of the convolving kernel.
stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain three integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. The default value is 1.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
1. a string in ['valid', 'same'].
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`on both sides
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`
3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions.
5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
The default value is 0.
padding_mode (str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` .
dilation (int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: 1.
groups (int, optional): The groups number of the Conv2d Layer. According to grouped
dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
dilation_D = dilation_H = dilation_W = dilation. The default value is 1.
groups(int, optional): The groups number of the Conv3d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: 1.
weight_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter)
connected to the second half of the input channels. The default value is 1.
padding_mode(str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``.
weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights
of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None.
bias_attr (ParamAttr|bool, optional): The attribute for the bias of conv2d.
will create ParamAttr as param_attr. If it is set to None, the parameter
is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is
:math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None.
bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv2d.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
data_format (str, optional): Data format that specifies the layout of input.
is not set, the bias is initialized zero. The default value is None.
data_format(str, optional): Data format that specifies the layout of input.
It can be "NCHW" or "NHWC". Default: "NCHW".
Attribute:
**weight** (Parameter): the learnable weights of filter of this layer.
**bias** (Parameter or None): the learnable bias of this layer.
Shape:
- x: :math:`(N, C_{in}, H_{in}, W_{in})`
- output: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel_size[0] - 1) + 1))}{strides[0]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel_size[1] - 1) + 1))}{strides[1]} + 1
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn as nn
......@@ -646,35 +665,29 @@ class ConvTranspose2d(_ConvNd):
The details of convolution transpose layer, please refer to the following explanation and references
`conv2dtranspose <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_ .
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
Where:
* :math:`X`: Input value, a ``Tensor`` with NCHW format.
* :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] .
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
- Input:
Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\\\
H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] ) \\\\
W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
Parameters:
in_channels(int): The number of channels in the input image.
out_channels(int): The number of channels produced by the convolution.
kernel_size(int|list|uple): The kernel size. If kernel_size is a tuple,
it must contain two integers, (kernel_size_H, kernel_size_W).
Otherwise, the kernel will be a square.
output_padding(int|list|tuple, optional): Additional size added to one side
of each dimension in the output shape. Default: 0.
stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: 1.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
1. a string in ['valid', 'same'].
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` on both sides
......@@ -682,9 +695,8 @@ class ConvTranspose2d(_ConvNd):
4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions.
5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
The default value is 0.
stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: 1.
output_padding(int|list|tuple, optional): Additional size added to one side
of each dimension in the output shape. Default: 0.
dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: 1.
......@@ -694,29 +706,46 @@ class ConvTranspose2d(_ConvNd):
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
Default: 1.
weight_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter)
weight_attr(ParamAttr, optional): The parameter attribute for learnable weights(Parameter)
of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|bool, optional): The attribute for the bias of conv2d_transpose.
bias_attr(ParamAttr|bool, optional): The attribute for the bias of conv2d_transpose.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv2d_transpose
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None.
data_format (str, optional): Data format that specifies the layout of input.
data_format(str, optional): Data format that specifies the layout of input.
It can be "NCHW" or "NHWC". Default: "NCHW".
Attribute:
**weight** (Parameter): the learnable weights of filters of this layer.
**bias** (Parameter or None): the learnable bias of this layer.
Shape:
- x: :math:`(N, C_{in}, H_{in}, W_{in})`
- output: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel_size[0] - 1) + 1 \\\\
W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel_size[1] - 1) + 1 \\\\
H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel\_size[0] - 1) + 1
W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel\_size[1] - 1) + 1
H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] )
W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn as nn
......@@ -791,66 +820,86 @@ class Conv3d(_ConvNd):
provided, bias is added to the output of the convolution, and the
corresponding activation function is applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
In the above equation:
* :math:`X`: Input value, a tensor with NCDHW or NDHWC format.
* :math:`W`: Filter value, a tensor with MCDHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Parameters:
in_channels(int): The number of input channels in the input image.
out_channels(int): The number of output channels produced by the convolution.
kernel_size (int|list|tuple, optional): The size of the convolving kernel.
stride (int|list|tuple, optional): The stride size. If stride is a tuple, it must
kernel_size(int|list|tuple, optional): The size of the convolving kernel.
stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
stride_D = stride_H = stride_W = stride. The default value is 1.
padding (int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
1. a string in ['valid', 'same'].
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`
3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions.
5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
The default value is 0.
dilation (int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
dilation_D = dilation_H = dilation_W = dilation. The default value is 1.
groups (int, optional): The groups number of the Conv3d Layer. According to grouped
groups(int, optional): The groups number of the Conv3d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. The default value is 1.
padding_mode (str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``.
weight_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights
padding_mode(str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``.
weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights
of conv3d. If it is set to None or one attribute of ParamAttr, conv3d
will create ParamAttr as param_attr. If it is set to None, the parameter
is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is
:math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None.
bias_attr (ParamAttr|bool, optional): The parameter attribute for the bias of conv3d.
bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv3d.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv3d
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. The default value is None.
data_format (str, optional): Data format that specifies the layout of input.
data_format(str, optional): Data format that specifies the layout of input.
It can be "NCDHW" or "NDHWC". Default: "NCDHW".
Attribute:
**weight** (Parameter): the learnable weights of filters of this layer.
**bias** (Parameter): the learnable bias of this layer.
Shape:
- x: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
- output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
Where
.. math::
D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\
H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1
H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1
W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (kernel\_size[2] - 1) + 1))}{strides[2]} + 1
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
import numpy as np
import paddle
......@@ -936,17 +985,22 @@ class ConvTranspose3d(_ConvNd):
the output of the convolution, and the corresponding activation function
is applied to the final result.
For each input :math:`X`, the equation is:
.. math::
Out = \sigma (W \\ast X + b)
In the above equation:
* :math:`X`: Input value, a tensor with NCDHW format.
* :math:`W`: Filter value, a tensor with MCDHW format.
* :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
**Note**:
The conv_transpose3d can be seen as the backward of the conv3d. For conv3d,
when stride > 1, conv3d maps multiple input shape to the same output shape,
so for conv_transpose3d, when stride > 1, input shape maps multiple output shape.
......@@ -957,6 +1011,7 @@ class ConvTranspose3d(_ConvNd):
and :math:`H^\prime_{out} + strides[1]`, and the :math:`W_{out}` of the output size must
between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[2]`,
conv_transpose3d can compute the kernel size automatically.
Parameters:
in_channels(int): The number of channels in the input image.
out_channels(int): The number of channels produced by the convolution.
......@@ -985,11 +1040,11 @@ class ConvTranspose3d(_ConvNd):
first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels.
The default value is 1.
weight_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights
weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights
of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose
will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. The default value is None.
bias_attr (ParamAttr|bool, optional): The parameter attribute for the bias of conv3d_transpose.
bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv3d_transpose.
If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv3d_transpose
will create ParamAttr as bias_attr. If the Initializer of the bias_attr
......@@ -999,24 +1054,38 @@ class ConvTranspose3d(_ConvNd):
filter_size, padding, and stride to calculate output_size.
if output_size and filter_size are specified at the same time, They
should follow the formula above. Default: None.
data_format (str, optional): Data format that specifies the layout of input.
data_format(str, optional): Data format that specifies the layout of input.
It can be "NCDHW" or "NDHWC". Default: "NCDHW".
Attribute:
**weight** (Parameter): the learnable weights of filters of this layer.
**bias** (Parameter): the learnable bias of this layer.
Shape:
- x: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
- output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
Where
.. math::
D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel_size[0] - 1) + 1 \\\\
H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel_size[1] - 1) + 1 \\\\
W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (kernel_size[2] - 1) + 1 \\\\
D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel\_size[0] - 1) + 1
H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel\_size[1] - 1) + 1
W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (kernel\_size[2] - 1) + 1
Raises:
ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch.
Examples:
.. code-block:: python
import numpy as np
import paddle
import paddle.nn as nn
......@@ -1024,7 +1093,7 @@ class ConvTranspose3d(_ConvNd):
paddle.disable_static()
x_var = paddle.to_tensor(x)
conv = nn.Conv3DTranspose(4, 6, (3, 3, 3))
conv = nn.ConvTranspose3d(4, 6, (3, 3, 3))
y_var = conv(x_var)
y_np = y_var.numpy()
print(y_np.shape)
......
......@@ -634,9 +634,12 @@ class KLDivLoss(fluid.dygraph.Layer):
Default is ``'mean'``.
Shape:
- input: (N, *) where * means, any number of additional dimensions.
- label: (N, *), same shape as input
- output: tensor with shape: (1) by default.
- input (Tensor): (N, *), where * means, any number of additional dimensions.
- label (Tensor): (N, *), same shape as input.
- output (Tensor): tensor with shape: [1] by default.
Examples:
......@@ -646,7 +649,7 @@ class KLDivLoss(fluid.dygraph.Layer):
import numpy as np
import paddle.nn as nn
paddle.enable_imperative()
paddle.disable_static()
shape = (5, 20)
x = np.random.uniform(-10, 10, shape).astype('float32')
......@@ -654,26 +657,26 @@ class KLDivLoss(fluid.dygraph.Layer):
# 'batchmean' reduction, loss shape will be [N]
kldiv_criterion = nn.KLDivLoss(reduction='batchmean')
pred_loss = kldiv_criterion(paddle.to_variable(x),
paddle.to_variable(target))
pred_loss = kldiv_criterion(paddle.to_tensor(x),
paddle.to_tensor(target))
# shape=[5]
# 'mean' reduction, loss shape will be [1]
kldiv_criterion = nn.KLDivLoss(reduction='mean')
pred_loss = kldiv_criterion(paddle.to_variable(x),
paddle.to_variable(target))
pred_loss = kldiv_criterion(paddle.to_tensor(x),
paddle.to_tensor(target))
# shape=[1]
# 'sum' reduction, loss shape will be [1]
kldiv_criterion = nn.KLDivLoss(reduction='sum')
pred_loss = kldiv_criterion(paddle.to_variable(x),
paddle.to_variable(target))
pred_loss = kldiv_criterion(paddle.to_tensor(x),
paddle.to_tensor(target))
# shape=[1]
# 'none' reduction, loss shape is same with X shape
kldiv_criterion = nn.KLDivLoss(reduction='none')
pred_loss = kldiv_criterion(paddle.to_variable(x),
paddle.to_variable(target))
pred_loss = kldiv_criterion(paddle.to_tensor(x),
paddle.to_tensor(target))
# shape=[5, 20]
"""
......
......@@ -27,6 +27,7 @@
# TODO: define normalization api
import six
from ...fluid.dygraph.nn import InstanceNorm
from ...fluid.dygraph import BatchNorm #DEFINE_ALIAS
......@@ -36,7 +37,6 @@ from ...fluid.dygraph import BatchNorm #DEFINE_ALIAS
from ...fluid.dygraph import SpectralNorm #DEFINE_ALIAS
from ...fluid.dygraph import layers
from ...framework import get_default_dtype, set_default_dtype
from ...fluid.framework import in_dygraph_mode
......@@ -50,6 +50,7 @@ from ..functional import batch_norm, layer_norm, instance_norm
import numpy as np
import numbers
import warnings
from ...fluid.dygraph.base import no_grad
__all__ = [
'BatchNorm', 'GroupNorm', 'LayerNorm', 'SpectralNorm', 'InstanceNorm',
......@@ -566,17 +567,28 @@ class _BatchNormBase(layers.Layer):
param_shape = [num_features]
# create parameter
if weight_attr == False:
self.weight = self.create_parameter(
attr=None, shape=param_shape, default_initializer=Constant(1.0))
self.weight.stop_gradient = True
else:
self.weight = self.create_parameter(
attr=self._weight_attr,
shape=param_shape,
default_initializer=Constant(1.0))
self.weight.stop_gradient = (self._weight_attr is False) or (
self._weight_attr and self._weight_attr.learning_rate == 0.)
self.weight.stop_gradient = self._weight_attr != None and self._weight_attr.learning_rate == 0.
if bias_attr == False:
self.bias = self.create_parameter(
attr=None,
shape=param_shape,
default_initializer=Constant(0.0),
is_bias=True)
self.bias.stop_gradient = True
else:
self.bias = self.create_parameter(
attr=self._bias_attr, shape=param_shape, is_bias=True)
self.bias.stop_gradient = (self._bias_attr is False) or (
self._bias_attr and self._bias_attr.learning_rate == 0.)
self.bias.stop_gradient = self._bias_attr != None and self._bias_attr.learning_rate == 0.
moving_mean_name = None
moving_variance_name = None
......@@ -611,6 +623,7 @@ class _BatchNormBase(layers.Layer):
self._epsilon = epsilon
self._fuse_with_relu = False
self._track_running_stats = track_running_stats
self._name = name
def _check_input_dim(self, input):
raise NotImplementedError("BatchNorm Base error")
......@@ -898,7 +911,7 @@ class BatchNorm3d(_BatchNormBase):
len(input.shape)))
class SyncBatchNorm(layers.Layer):
class SyncBatchNorm(_BatchNormBase):
"""
This interface is used to construct a callable object of the ``SyncBatchNorm`` class.
It implements the function of the Cross-GPU Synchronized Batch Normalization Layer, and can
......@@ -984,72 +997,16 @@ class SyncBatchNorm(layers.Layer):
def __init__(self,
num_features,
epsilon=1e-05,
momentum=0.9,
track_running_stats=True,
epsilon=1e-05,
weight_attr=None,
bias_attr=None,
data_format='NCHW',
track_running_stats=True,
name=None):
super(SyncBatchNorm, self).__init__()
self._weight_attr = weight_attr
self._bias_attr = bias_attr
self._num_features = num_features
self._data_layout = data_format
self._momentum = momentum
self._epsilon = epsilon
self._track_running_stats = track_running_stats
if self._track_running_stats == False:
warnings.warn(
"moving mean and moving variance will be calculated whether `track_running_stats` is set to `True` or `False`, we will fix it in the next version."
)
param_shape = [self._num_features]
# create parameter
if weight_attr == False:
self.weight = self.create_parameter(
attr=None, shape=param_shape, default_initializer=Constant(1.0))
self.weight.stop_gradient = True
else:
self.weight = self.create_parameter(
attr=self._weight_attr,
shape=param_shape,
default_initializer=Constant(1.0))
self.weight.stop_gradient = self._weight_attr != None and self._weight_attr.learning_rate == 0.
if bias_attr == False:
self.bias = self.create_parameter(
attr=None,
shape=param_shape,
default_initializer=Constant(0.0),
is_bias=True)
self.bias.stop_gradient = True
else:
self.bias = self.create_parameter(
attr=self._bias_attr, shape=param_shape, is_bias=True)
self.bias.stop_gradient = self._weight_attr != None and self._weight_attr.learning_rate == 0.
self._mean = self.create_parameter(
attr=ParamAttr(
name=None,
initializer=Constant(0.0),
trainable=False,
do_model_average=True),
shape=param_shape,
dtype=self._dtype)
self._mean.stop_gradient = True
self._variance = self.create_parameter(
attr=ParamAttr(
name=None,
initializer=Constant(1.0),
trainable=False,
do_model_average=True),
shape=param_shape,
dtype=self._dtype)
self._variance.stop_gradient = True
super(SyncBatchNorm,
self).__init__(num_features, momentum, epsilon, weight_attr,
bias_attr, data_format, track_running_stats, name)
def forward(self, x):
# create output
......@@ -1063,7 +1020,7 @@ class SyncBatchNorm(layers.Layer):
if in_dygraph_mode():
attrs = ("momentum", self._momentum, "epsilon", self._epsilon,
"is_test", not self.training, "data_layout",
self._data_layout, "use_mkldnn", False, "fuse_with_relu",
self._data_format, "use_mkldnn", False, "fuse_with_relu",
False, "use_global_stats", False, 'trainable_statistics',
False)
sync_batch_norm_out, _, _, _, _, _ = core.ops.sync_batch_norm(
......@@ -1073,13 +1030,13 @@ class SyncBatchNorm(layers.Layer):
return sync_batch_norm_out
check_variable_and_dtype(x, 'input', ['float16', 'float32', 'float64'],
'BatchNorm')
'SyncBatchNorm')
attrs = {
"momentum": self._momentum,
"epsilon": self._epsilon,
"is_test": not self.training,
"data_layout": self._data_layout,
"data_layout": self._data_format,
"use_mkldnn": False,
"fuse_with_relu": False,
"use_global_stats": False,
......@@ -1112,3 +1069,45 @@ class SyncBatchNorm(layers.Layer):
self._helper.append_op(
type="sync_batch_norm", inputs=inputs, outputs=outputs, attrs=attrs)
return sync_batch_norm_out
@classmethod
def convert_sync_batchnorm(cls, layer):
"""
Helper function to convert :class: `paddle.nn.BatchNorm*d` layers in the model to :class: `paddle.nn.SyncBatchNorm` layers.
Parameters:
layer(paddle.nn.Layer): model containing one or more `BatchNorm*d` layers.
Returns:
The original model with converted SyncBatchNorm layers. If BatchNorm*d layer in the model, use SyncBatchNorm layer instead.
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
paddle.disable_static()
model = nn.Sequential(nn.Conv2d(3, 5, 3), nn.BatchNorm2d(5))
sync_model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
"""
layer_output = layer
if isinstance(layer, _BatchNormBase):
layer_output = SyncBatchNorm(layer._num_features, layer._epsilon,
layer._momentum, layer._weight_attr,
layer._bias_attr, layer._data_format,
layer._name)
if layer._weight_attr != False and layer._bias_attr != False:
with no_grad():
layer_output.weight = layer.weight
layer_output.bias = layer.bias
layer_output._mean = layer._mean
layer_output._variance = layer._variance
for name, sublayer in layer.named_sublayers():
layer_output.add_sublayer(name,
cls.convert_sync_batchnorm(sublayer))
del layer
return layer_output
......@@ -12,198 +12,26 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
from ...fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype
from ...fluid.layers import utils
from ...fluid.dygraph import layers
from ...fluid.layer_helper import LayerHelper
from .. import functional as F
__all__ = [
'AdaptiveAvgPool2d',
'AdaptiveAvgPool3d',
'AvgPool1d',
'maxPool1d',
'AdaptiveMaxPool1d',
'AdaptiveAvgPool1d',
'AvgPool2d',
'MaxPool2d',
'AvgPool3d',
'MaxPool1d',
'MaxPool2d',
'MaxPool3d',
'AdaptiveAvgPool1d',
'AdaptiveAvgPool2d',
'AdaptiveAvgPool3d',
'AdaptiveMaxPool1d',
'AdaptiveMaxPool2d',
'AdaptiveMaxPool3d',
]
class AdaptiveAvgPool2d(layers.Layer):
"""
This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size.
For avg adaptive pool2d:
.. math::
hstart &= floor(i * H_{in} / H_{out})
hend &= ceil((i + 1) * H_{in} / H_{out})
wstart &= floor(j * W_{in} / W_{out})
wend &= ceil((j + 1) * W_{in} / W_{out})
Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
Parameters:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two element, (H, W). H and W can be either a int, or None which means
the size will be the same as that of the input.
data_format (str): The data format of the input and output data. An optional string
from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in
the order of: [batch_size, input_channels, input_height, input_width].
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Shape:
x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type can be float32 or float64.
output (Tensor): The output tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type is same as input x.
Returns:
A callable object of AdaptiveAvgPool2d.
Examples:
.. code-block:: python
# adaptive avg pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive avg pool performs calculations as follow:
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
#
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 32, 32]
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=3)
pool_out = adaptive_avg_pool(x = x)
# pool_out.shape is [2, 3, 3, 3]
"""
def __init__(self, output_size, data_format="NCHW", name=None):
super(AdaptiveAvgPool2d, self).__init__()
self._output_size = output_size
self._data_format = data_format
self._name = name
def forward(self, x):
return F.adaptive_avg_pool2d(
x,
output_size=self._output_size,
data_format=self._data_format,
name=self._name)
class AdaptiveAvgPool3d(layers.Layer):
"""
This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size.
For avg adaptive pool3d:
.. math::
dstart &= floor(i * D_{in} / D_{out})
dend &= ceil((i + 1) * D_{in} / D_{out})
hstart &= floor(j * H_{in} / H_{out})
hend &= ceil((j + 1) * H_{in} / H_{out})
wstart &= floor(k * W_{in} / W_{out})
wend &= ceil((k + 1) * W_{in} / W_{out})
Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
Parameters:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means
the size will be the same as that of the input.
data_format (str): The data format of the input and output data. An optional string
from: "NCDHW", "NDHWC". The default is "NCDHW". When it is "NCDHW", the data is stored in
the order of: [batch_size, input_channels, input_depth, input_height, input_width].
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Shape:
x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type can be float32 or float64.
output (Tensor): The output tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type is same as input x.
Returns:
A callable object of AdaptiveAvgPool3d.
Examples:
.. code-block:: python
# adaptive avg pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into l * m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive avg pool performs calculations as follow:
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
# dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l)
# hstart = floor(j * H / m)
# hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] =
# avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 8, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 8, 32, 32]
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d(output_size=3)
pool_out = adaptive_avg_pool(x = x)
# pool_out = [2, 3, 3, 3, 3]
"""
def __init__(self, output_size, data_format="NCDHW", name=None):
super(AdaptiveAvgPool3d, self).__init__()
self._output_size = output_size
self._data_format = data_format
self._name = name
def forward(self, x):
return F.adaptive_avg_pool3d(
x,
output_size=self._output_size,
data_format=self._data_format,
name=self._name)
class AvgPool1d(layers.Layer):
"""
This operation applies a 1D average pooling over an input signal composed
......@@ -223,17 +51,20 @@ class AvgPool1d(layers.Layer):
Args:
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one integers.
it must contain an integer.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain one integers.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
it could be the following forms: `[pad_left, pad_right]`. If padding is non-zero,
then the input is implicitly zero-padded on both sides for padding number of points.
it must contain an integer.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An int, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is `true`.
mode, default is `True`.
ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width.
If it is set to False, the floor function will be used. Default False
If it is set to False, the floor function will be used. The default value is False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
......@@ -245,10 +76,14 @@ class AvgPool1d(layers.Layer):
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ValueError: If `padding` is a list or tuple but its length greater than 1.
ShapeError: If the input is not a 3-D.
ShapeError: If the input is not a 3-D tensor.
ShapeError: If the output's shape calculated is not greater than 0.
Shape:
- inpuut: 3-D tensor.
- output: 3-D tensor
Examples:
.. code-block:: python
......@@ -284,63 +119,74 @@ class AvgPool1d(layers.Layer):
return out
class MaxPool1d(layers.Layer):
class AvgPool2d(layers.Layer):
"""
Applies a 1D max pooling over an input signal composed of several input planes based
on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output value of the layer with input size (N, C, L),
output (N, C, L_{out}) and kernel_size k can be precisely described as
For average pool1d:
This operation applies 2D average pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCHW format, where N is batch size, C is the number of channels,
H is the height of the feature, and W is the width of the feature.
.. math::
Example:
Input:
X shape: $(N, C, H_{in}, W_{in})$
Attr:
kernel_size: ksize
Output(N_i, C_i, l) &= max(Input[N_i, C_i, stride \times l:stride \times l+k])}
Output:
Out shape: $(N, C, H_{out}, W_{out})$
$$
out(N_i, C_j, h, w) = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1}
input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
$$
Args:
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one integers.
it must contain two integers, (pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be a square of an int.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain one integers.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
it could be the following forms: `[pad_left, pad_right]`.
return_indices (bool): Whether return the max indices along with the outputs. default is `False`.
ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
If it is set to False, the floor function will be used. Default False
it must contain two integers, (pool_stride_Height, pool_stride_Width).
Otherwise, the pool stride size will be a square of an int.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An int, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is `true`.
divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
None.
Shape:
- x: 4-D tensor.
- out: 2-D tensor
Returns: None.
Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ValueError: If `padding` is a list or tuple but its length greater than 1.
ShapeError: If the input is not a 3-D.
ShapeError: If the output's shape calculated is not greater than 0.
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
import numpy as np
paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
pool_out = MaxPool1d(data)
# pool_out shape: [1, 3, 16]
MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0, return_indices=True)
pool_out, indices = MaxPool1d(data)
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
# max pool2d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
AvgPool2d = nn.AvgPool2d(kernel_size=2,
stride=2, padding=0)
output = AvgPoo2d(input)
# output.shape [1, 3, 16, 16]
"""
......@@ -348,113 +194,155 @@ class MaxPool1d(layers.Layer):
kernel_size,
stride=None,
padding=0,
return_indices=False,
ceil_mode=False,
count_include_pad=True,
divisor_override=None,
data_format="NCHW",
name=None):
super(MaxPool1d, self).__init__()
self.kernel_size = kernel_size
super(AvgPool2d, self).__init__()
self.ksize = kernel_size
self.stride = stride
self.padding = padding
self.ceil_mode = ceil_mode
self.return_indices = return_indices
self.name = name
self.count_include_pad = count_include_pad
self.divisor = divisor_override
self.data_format = data_format
self.name = name
def forward(self, input):
out = F.max_pool1d(input, self.kernel_size, self.stride, self.padding,
self.return_indices, self.ceil_mode, self.name)
return out
def forward(self, x):
return F.avg_pool2d(
x,
kernel_size=self.ksize,
stride=self.stride,
padding=self.padding,
ceil_mode=self.ceil_mode,
count_include_pad=self.count_include_pad,
divisor_override=self.divisor,
data_format=self.data_format,
name=self.name)
class AdaptiveAvgPool1d(layers.Layer):
class AvgPool3d(layers.Layer):
"""
This operation applies a 1D adaptive average pooling over an input signal composed
of several input planes, based on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
For average adaptive pool1d:
.. math::
lstart &= floor(i * L_{in} / L_{out})
lend &= ceil((i + 1) * L_{in} / L_{out})
Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)}
This operation applies 3D max pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCDHW format, where N is batch size, C is the number of channels,
H is the height of the feature, D is the depth of the feature, and W is the width of the feature.
Args:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one int.
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
is a tuple or list, it must contain three integers,
(kernel_size_Depth, kernel_size_Height, kernel_size_Width).
Otherwise, the pool kernel size will be the cube of an int.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain three integers, [stride_Depth, stride_Height, stride_Width).
Otherwise, the pool stride size will be a cube of an int.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An int, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
ceil_mode (bool): ${ceil_mode_comment}
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is True.
divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_depth, input_height, input_width]`.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
None.
Returns: None.
Raises:
ValueError: 'pool_size' should be a integer or list or tuple with length as 1.
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0.
Shape:
- x: 5-D tensor.
- out: 5-D tensor.
Examples:
.. code-block:: python
# average adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
#
import paddle
import paddle.nn as nn
import numpy as np
paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
AdaptiveAvgPool1d = nn.AdaptiveAvgPool1d(output_size=16)
pool_out = AdaptiveAvgPool1d(data)
# pool_out shape: [1, 3, 16]
# avg pool3d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
AvgPool3d = nn.AvgPool3d(kernel_size=2,
stride=2, padding=0)
output = AvgPool3d(input)
# output.shape [1, 2, 3, 16, 16]
"""
def __init__(self, output_size, name=None):
super(AdaptiveAvgPool1d, self).__init__()
self.output_size = output_size
def __init__(self,
kernel_size,
stride,
padding=0,
ceil_mode=False,
count_include_pad=True,
divisor_override=None,
data_format="NCDHW",
name=None):
super(AvgPool3d, self).__init__()
self.ksize = kernel_size
self.stride = stride
self.padding = padding
self.ceil_mode = ceil_mode
self.count_include_pad = count_include_pad
self.divisor = divisor_override
self.data_format = data_format
self.name = name
def forward(self, input):
return F.adaptive_avg_pool1d(input, self.output_size, self.name)
def forward(self, x):
return F.avg_pool3d(
x,
kernel_size=self.ksize,
stride=self.stride,
padding=self.padding,
ceil_mode=self.ceil_mode,
count_include_pad=self.count_include_pad,
divisor_override=self.divisor,
data_format=self.data_format,
name=self.name)
class AdaptiveMaxPool1d(layers.Layer):
class MaxPool1d(layers.Layer):
"""
This operation applies a 1D adaptive max pooling over an input signal composed
of several input planes, based on the input, output_size, return_indices parameters.
Applies a 1D max pooling over an input signal composed of several input planes based
on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
For max adaptive pool1d:
The output value of the layer with input size (N, C, L),
output (N, C, L_{out}) and kernel_size k can be precisely described as
For average pool1d:
.. math::
lstart &= floor(i * L_{in} / L_{out})
lend &= ceil((i + 1) * L_{in} / L_{out})
Output(i) &= max(Input[lstart:lend])}
Output(N_i, C_i, l) &= max(Input[N_i, C_i, stride \times l:stride \times l+k])}
Args:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one int.
return_indices (bool): If true, the index of max pooling point will be returned along
with outputs. It cannot be set in average pooling type. Default False.
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain an integer.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain an integer.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An integer, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
return_indices (bool): Whether return the max indices along with the outputs. default is `False`.
ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
If it is set to False, the floor function will be used. Default False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
......@@ -462,53 +350,60 @@ class AdaptiveMaxPool1d(layers.Layer):
None.
Raises:
ValueError: 'pool_size' should be a integer or list or tuple with length as 1.
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ValueError: If `padding` is a list or tuple but its length greater than 1.
ShapeError: If the input is not a 3-D.
ShapeError: If the output's shape calculated is not greater than 0.
Shape:
- x: 3-D tensor.
- out: 3-D tensor.
Examples:
.. code-block:: python
# max adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = max(input[:, :, lstart: lend])
#
import paddle
import paddle.nn as nn
paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16)
pool_out = AdaptiveMaxPool1d(data)
MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
pool_out = MaxPool1d(data)
# pool_out shape: [1, 3, 16]
# for return_indices = true
AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16, return_indices=True)
pool_out, indices = AdaptiveMaxPool1d(data)
MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0, return_indices=True)
pool_out, indices = MaxPool1d(data)
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
"""
def __init__(self, output_size, return_indices=False, name=None):
super(AdaptiveMaxPool1d, self).__init__()
self.output_size = output_size
def __init__(self,
kernel_size,
stride=None,
padding=0,
return_indices=False,
ceil_mode=False,
name=None):
super(MaxPool1d, self).__init__()
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.ceil_mode = ceil_mode
self.return_indices = return_indices
self.name = name
def forward(self, input):
return F.adaptive_max_pool1d(input, self.output_size,
self.return_indices, self.name)
out = F.max_pool1d(input, self.kernel_size, self.stride, self.padding,
self.return_indices, self.ceil_mode, self.name)
return out
class AvgPool2d(layers.Layer):
class MaxPool2d(layers.Layer):
"""
This operation applies 2D average pooling over input features based on the input,
This operation applies 2D max pooling over input feature based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCHW format, where N is batch size, C is the number of channels,
H is the height of the feature, and W is the width of the feature.
......@@ -522,8 +417,9 @@ class AvgPool2d(layers.Layer):
Output:
Out shape: $(N, C, H_{out}, W_{out})$
$$
out(N_i, C_j, h, w) = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1}
input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\
& \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
\text{stride[1]} \times w + n)
$$
Args:
......@@ -532,31 +428,33 @@ class AvgPool2d(layers.Layer):
Otherwise, the pool kernel size will be a square of an int.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain two integers, (pool_stride_Height, pool_stride_Width).
Otherwise, the pool stride size will be a square of an int. Default: kernel_size.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
it could be in three forms: `[pad_height, pad_width]` or
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`,
`pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
Otherwise, the pool padding size will be a square of an int.
Otherwise, the pool stride size will be a square of an int.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An int, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is `true`.
divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
return_indices (bool): Whether to return the max indices along with the outputs.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns: None.
Returns: None
Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0.
Shape:
- x: 4-D tensor.
- out: 4-D tensor.
Examples:
.. code-block:: python
import paddle
......@@ -566,95 +464,87 @@ class AvgPool2d(layers.Layer):
# max pool2d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
AvgPool2d = nn.AvgPool2d(kernel_size=2,
MaxPool2d = nn.MaxPool2d(kernel_size=2,
stride=2, padding=0)
output = AvgPoo2d(input)
output = MaxPool2d(input)
# output.shape [1, 3, 16, 16]
# for return_indices=True
MaxPool2d = nn.MaxPool2d(kernel_size=2,stride=2, padding=0, return_indices=True)
output, max_indices = MaxPool2d(input)
# output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
"""
def __init__(self,
kernel_size,
stride=None,
padding=0,
return_indices=False,
ceil_mode=False,
count_include_pad=True,
divisor_override=None,
data_format="NCHW",
name=None):
super(AvgPool2d, self).__init__()
super(MaxPool2d, self).__init__()
self.ksize = kernel_size
self.stride = stride
self.padding = padding
self.return_indices = return_indices
self.ceil_mode = ceil_mode
self.count_include_pad = count_include_pad
self.divisor = divisor_override
self.data_format = data_format
self.name = name
def forward(self, x):
return F.avg_pool2d(
return F.max_pool2d(
x,
kernel_size=self.ksize,
stride=self.stride,
padding=self.padding,
ceil_mode=self.ceil_mode,
count_include_pad=self.count_include_pad,
divisor_override=self.divisor,
return_indices=self.return_indices,
data_format=self.data_format,
name=self.name)
class MaxPool2d(layers.Layer):
class MaxPool3d(layers.Layer):
"""
This operation applies 2D max pooling over input feature based on the input,
This operation applies 3D max pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCHW format, where N is batch size, C is the number of channels,
H is the height of the feature, and W is the width of the feature.
Example:
Input:
X shape: $(N, C, H_{in}, W_{in})$
Attr:
kernel_size: ksize
Output:
Out shape: $(N, C, H_{out}, W_{out})$
$$
out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\
& \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
\text{stride[1]} \times w + n)
$$
in NCDHW format, where N is batch size, C is the number of channels,
H is the height of the feature, D is the depth of the feature, and W is the width of the feature.
Args:
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be a square of an int.
kernel_size (int|list|tuple): The pool kernel size. If the kernel size
is a tuple or list, it must contain three integers,
(kernel_size_Depth, kernel_size_Height, kernel_size_Width).
Otherwise, the pool kernel size will be the cube of an int.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain two integers, (pool_stride_Height, pool_stride_Width).
Otherwise, the pool stride size will be a square of an int. Default: kernel_size.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
it could be in three forms: `[pad_height, pad_width]` or
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`,
`pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
Otherwise, the pool padding size will be a square of an int.
ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
it must contain three integers, [stride_Depth, stride_Height, stride_Width).
Otherwise, the pool stride size will be a cube of an int.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An int, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
ceil_mode (bool): ${ceil_mode_comment}
return_indices (bool): Whether to return the max indices along with the outputs.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_depth, input_height, input_width]`.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns: None
Returns:None.
Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0.
Shape:
- x: 5-D tensor.
- out: 5-D tensor.
Examples:
.. code-block:: python
import paddle
......@@ -662,28 +552,28 @@ class MaxPool2d(layers.Layer):
import numpy as np
paddle.disable_static()
# max pool2d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
MaxPool2d = nn.MaxPool2d(kernel_size=2,
# max pool3d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
MaxPool3d = nn.MaxPool3d(kernel_size=2,
stride=2, padding=0)
output = MaxPool2d(input)
# output.shape [1, 3, 16, 16]
output = MaxPool3d(input)
# output.shape [1, 2, 3, 16, 16]
# for return_indices=True
MaxPool2d = nn.MaxPool2d(kernel_size=2,stride=2, padding=0, return_indices=True)
output, max_indices = MaxPool2d(input)
# output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
MaxPool3d = nn.MaxPool3d(kernel_size=2,stride=2, padding=0, return_indices=True)
output, max_indices = MaxPool3d(input)
# output.shape [1, 2, 3, 16, 16], max_indices.shape [1, 2, 3, 16, 16],
"""
def __init__(self,
kernel_size,
stride=None,
padding=0,
stride,
padding,
return_indices=False,
ceil_mode=False,
data_format="NCHW",
data_format="NCDHW",
name=None):
super(MaxPool2d, self).__init__()
super(MaxPool3d, self).__init__()
self.ksize = kernel_size
self.stride = stride
self.padding = padding
......@@ -693,7 +583,7 @@ class MaxPool2d(layers.Layer):
self.name = name
def forward(self, x):
return F.max_pool2d(
return F.max_pool3d(
x,
kernel_size=self.ksize,
stride=self.stride,
......@@ -703,175 +593,457 @@ class MaxPool2d(layers.Layer):
name=self.name)
class MaxPool3d(layers.Layer):
class AdaptiveAvgPool1d(layers.Layer):
"""
This operation applies 3D max pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCDHW format, where N is batch size, C is the number of channels,
H is the height of the feature, D is the depth of the feature, and W is the width of the feature.
This operation applies a 1D adaptive average pooling over an input signal composed
of several input planes, based on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
For average adaptive pool1d:
.. math::
lstart &= floor(i * L_{in} / L_{out})
lend &= ceil((i + 1) * L_{in} / L_{out})
Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)}
Args:
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
is a tuple or list, it must contain three integers,
(pool_size_Depth, pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be the cube of an int.
stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool stride size is a tuple or list,
it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`.
Otherwise, the pool stride size will be a cube of an int. Default kernel_size.
padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list,
it could be in three forms: `[pad_depth, pad_height, pad_width]` or
`[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
`[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
ceil_mode (bool): when True, will use ceil instead of floor to compute the output shape.
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is True.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one int.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
None.
Returns:None.
Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0.
ValueError: 'pool_size' should be a integer or list or tuple with length as 1.
Shape:
- x: 3-D tensor.
- out: 3-D tensor.
Examples:
.. code-block:: python
# average adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
#
import paddle
import paddle.nn as nn
import numpy as np
paddle.disable_static()
# max pool3d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
MaxPool3d = nn.MaxPool3d(kernel_size=2,
stride=2, padding=0)
output = MaxPool3d(input)
# output.shape [1, 2, 3, 16, 16]
# for return_indices=True
MaxPool3d = nn.MaxPool3d(kernel_size=2,stride=2, padding=0, return_indices=True)
output, max_indices = MaxPool3d(input)
# output.shape [1, 2, 3, 16, 16], max_indices.shape [1, 2, 3, 16, 16],
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
AdaptiveAvgPool1d = nn.AdaptiveAvgPool1d(output_size=16)
pool_out = AdaptiveAvgPool1d(data)
# pool_out shape: [1, 3, 16]
"""
def __init__(self,
kernel_size,
stride,
padding,
return_indices=False,
ceil_mode=False,
data_format="NCDHW",
name=None):
super(MaxPool3d, self).__init__()
self.ksize = kernel_size
self.stride = stride
self.padding = padding
self.return_indices = return_indices
self.ceil_mode = ceil_mode
self.data_format = data_format
def __init__(self, output_size, name=None):
super(AdaptiveAvgPool1d, self).__init__()
self.output_size = output_size
self.name = name
def forward(self, x):
return F.max_pool3d(
x,
kernel_size=self.ksize,
stride=self.stride,
padding=self.padding,
return_indices=self.return_indices,
data_format=self.data_format,
name=self.name)
def forward(self, input):
return F.adaptive_avg_pool1d(input, self.output_size, self.name)
class AvgPool3d(layers.Layer):
class AdaptiveAvgPool2d(layers.Layer):
"""
This operation applies 3D max pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCDHW format, where N is batch size, C is the number of channels,
H is the height of the feature, D is the depth of the feature, and W is the width of the feature.
Args:
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
is a tuple or list, it must contain three integers,
(pool_size_Depth, pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be the cube of an int.
stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool stride size is a tuple or list,
it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`.
Otherwise, the pool stride size will be a cube of an int.
padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list,
it could be in three forms: `[pad_depth, pad_height, pad_width]` or
`[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
`[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
ceil_mode (bool): ${ceil_mode_comment}
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is True.
divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size.
For avg adaptive pool2d:
.. math::
hstart &= floor(i * H_{in} / H_{out})
hend &= ceil((i + 1) * H_{in} / H_{out})
wstart &= floor(j * W_{in} / W_{out})
wend &= ceil((j + 1) * W_{in} / W_{out})
Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
Parameters:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two element, (H, W). H and W can be either a int, or None which means
the size will be the same as that of the input.
data_format (str): The data format of the input and output data. An optional string
from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in
the order of: [batch_size, input_channels, input_height, input_width].
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns: None.
Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0.
Shape:
x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type can be float32, float64.
output (Tensor): The output tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type is same as input x.
Returns:
A callable object of AdaptiveAvgPool2d.
Examples:
.. code-block:: python
# adaptive avg pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive avg pool performs calculations as follow:
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
#
import paddle
import paddle.nn as nn
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 32, 32]
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=3)
pool_out = adaptive_avg_pool(x = x)
# pool_out.shape is [2, 3, 3, 3]
"""
def __init__(self, output_size, data_format="NCHW", name=None):
super(AdaptiveAvgPool2d, self).__init__()
self._output_size = output_size
self._data_format = data_format
self._name = name
def forward(self, x):
return F.adaptive_avg_pool2d(
x,
output_size=self._output_size,
data_format=self._data_format,
name=self._name)
# avg pool3d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
AvgPool3d = nn.AvgPool3d(kernel_size=2,
stride=2, padding=0)
output = AvgPool3d(input)
# output.shape [1, 2, 3, 16, 16]
class AdaptiveAvgPool3d(layers.Layer):
"""
def __init__(self,
kernel_size,
stride,
padding=0,
ceil_mode=False,
count_include_pad=True,
divisor_override=None,
data_format="NCDHW",
name=None):
super(AvgPool3d, self).__init__()
self.ksize = kernel_size
self.stride = stride
self.padding = padding
self.ceil_mode = ceil_mode
self.count_include_pad = count_include_pad
self.divisor = divisor_override
self.data_format = data_format
This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size.
For avg adaptive pool3d:
.. math::
dstart &= floor(i * D_{in} / D_{out})
dend &= ceil((i + 1) * D_{in} / D_{out})
hstart &= floor(j * H_{in} / H_{out})
hend &= ceil((j + 1) * H_{in} / H_{out})
wstart &= floor(k * W_{in} / W_{out})
wend &= ceil((k + 1) * W_{in} / W_{out})
Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
Parameters:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means
the size will be the same as that of the input.
data_format (str): The data format of the input and output data. An optional string
from: "NCDHW", "NDHWC". The default is "NCDHW". When it is "NCDHW", the data is stored in
the order of: [batch_size, input_channels, input_depth, input_height, input_width].
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Shape:
x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type can be float32, float64.
output (Tensor): The output tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type is same as input x.
Returns:
A callable object of AdaptiveAvgPool3d.
Examples:
.. code-block:: python
# adaptive avg pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into l * m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive avg pool performs calculations as follow:
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
# dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l)
# hstart = floor(j * H / m)
# hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] =
# avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 8, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 8, 32, 32]
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d(output_size=3)
pool_out = adaptive_avg_pool(x = x)
# pool_out = [2, 3, 3, 3, 3]
"""
def __init__(self, output_size, data_format="NCDHW", name=None):
super(AdaptiveAvgPool3d, self).__init__()
self._output_size = output_size
self._data_format = data_format
self._name = name
def forward(self, x):
return F.adaptive_avg_pool3d(
x,
output_size=self._output_size,
data_format=self._data_format,
name=self._name)
class AdaptiveMaxPool1d(layers.Layer):
"""
This operation applies a 1D adaptive max pooling over an input signal composed
of several input planes, based on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
For max adaptive pool1d:
.. math::
lstart &= floor(i * L_{in} / L_{out})
lend &= ceil((i + 1) * L_{in} / L_{out})
Output(i) &= max(Input[lstart:lend])}
Args:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one int.
return_indices (bool): If true, the index of max pooling point will be returned along
with outputs. It cannot be set in average pooling type. Default False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
None.
Raises:
ValueError: 'pool_size' should be a integer or list or tuple with length as 1.
Shape:
x (Tensor): The input tensor of adaptive max pool1d operator, which is a 3-D tensor. The data type can be float32, float64.
output (Tensor): The output tensor of adaptive max pool1d operator, which is a 3-D tensor. The data type is same as input x.
Examples:
.. code-block:: python
# max adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = max(input[:, :, lstart: lend])
#
import paddle
import paddle.nn as nn
paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16)
pool_out = AdaptiveMaxPool1d(data)
# pool_out shape: [1, 3, 16]
# for return_indices = true
AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16, return_indices=True)
pool_out, indices = AdaptiveMaxPool1d(data)
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
"""
def __init__(self, output_size, return_indices=False, name=None):
super(AdaptiveMaxPool1d, self).__init__()
self.output_size = output_size
self.return_indices = return_indices
self.name = name
def forward(self, input):
return F.adaptive_max_pool1d(input, self.output_size,
self.return_indices, self.name)
class AdaptiveMaxPool2d(layers.Layer):
"""
This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size.
For adaptive max pool2d:
.. math::
hstart &= floor(i * H_{in} / H_{out})
hend &= ceil((i + 1) * H_{in} / H_{out})
wstart &= floor(j * W_{in} / W_{out})
wend &= ceil((j + 1) * W_{in} / W_{out})
Output(i ,j) &= max(Input[hstart:hend, wstart:wend])
Parameters:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two element, (H, W). H and W can be either a int, or None which means the size will be the same as that of the input.
return_indices (bool): If true, the index of max pooling point will be returned along with outputs. It cannot be set in average pooling type. Default False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Shape:
x (Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type can be float32, float64.
output (Tensor): The output tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type is same as input x.
Returns:
A callable object of AdaptiveMaxPool2d.
Examples:
.. code-block:: python
# adaptive max pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
#
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 32, 32)
x = paddle.to_tensor(input_data)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=3, return_indices=True)
pool_out, indices = adaptive_max_pool(x = x)
"""
def __init__(self, output_size, return_indices=False, name=None):
super(AdaptiveMaxPool2d, self).__init__()
self._output_size = output_size
self._return_indices = return_indices
self._name = name
def forward(self, x):
return F.avg_pool3d(
return F.adaptive_max_pool2d(
x,
kernel_size=self.ksize,
stride=self.stride,
padding=self.padding,
ceil_mode=self.ceil_mode,
count_include_pad=self.count_include_pad,
divisor_override=self.divisor,
data_format=self.data_format,
name=self.name)
output_size=self._output_size,
return_indices=self._return_indices,
name=self._name)
class AdaptiveMaxPool3d(layers.Layer):
"""
This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size.
For adaptive max pool3d:
.. math::
dstart &= floor(i * D_{in} / D_{out})
dend &= ceil((i + 1) * D_{in} / D_{out})
hstart &= floor(j * H_{in} / H_{out})
hend &= ceil((j + 1) * H_{in} / H_{out})
wstart &= floor(k * W_{in} / W_{out})
wend &= ceil((k + 1) * W_{in} / W_{out})
Output(i ,j, k) &= max(Input[dstart:dend, hstart:hend, wstart:wend])
Parameters:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means
the size will be the same as that of the input.
return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Shape:
x (Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type can be float32, float64.
output (Tensor): The output tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type is same as input x.
Returns:
A callable object of AdaptiveMaxPool3d.
Examples:
.. code-block:: python
# adaptive max pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into l * m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
# dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l)
# hstart = floor(j * H / m)
# hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] =
# max(input[:, :, dstart:dend, hstart: hend, wstart: wend])
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 8, 32, 32)
x = paddle.to_tensor(input_data)
pool = paddle.nn.AdaptiveMaxPool3d(output_size=4)
out = pool(x)
# out shape: [2, 3, 4, 4, 4]
pool, indices = paddle.nn.AdaptiveMaxPool3d(output_size=3, return_indices=True)
out = pool(x)
# out shape: [2, 3, 4, 4, 4], indices shape: [2, 3, 4, 4, 4]
"""
def __init__(self, output_size, return_indices=False, name=None):
super(AdaptiveMaxPool3d, self).__init__()
self._output_size = output_size
self._return_indices = return_indices
self._name = name
def forward(self, x):
return F.adaptive_max_pool3d(
x,
output_size=self._output_size,
return_indices=self._return_indices,
name=self._name)
......@@ -26,9 +26,8 @@ __all__ = [
]
from ..fluid.optimizer import SGD, Momentum, Adagrad, Dpsgd, DecayedAdagrad, \
Ftrl, Adadelta, \
SGDOptimizer, MomentumOptimizer, AdagradOptimizer,DpsgdOptimizer,\
from ..fluid.optimizer import Momentum, Adagrad, Dpsgd, DecayedAdagrad, Ftrl,\
AdagradOptimizer,DpsgdOptimizer,\
DecayedAdagradOptimizer,FtrlOptimizer,AdadeltaOptimizer, \
ModelAverage, LarsMomentum, DGCMomentumOptimizer, LambOptimizer,\
ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, \
......@@ -39,6 +38,9 @@ from .adam import Adam
from .adamw import AdamW
from .adamax import Adamax
from .rmsprop import RMSProp
from .adadelta import Adadelta
from .sgd import SGD
from .momentum import Momentum
from . import lr_scheduler
from .lr_scheduler import _LRScheduler, NoamLR, PiecewiseLR, NaturalExpLR, InverseTimeLR, PolynomialLR, \
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .optimizer import Optimizer
from ..fluid import core
from ..fluid import framework
from ..fluid.framework import Variable, name_scope
__all__ = ["Adadelta"]
class Adadelta(Optimizer):
"""
**Notes: This API does not support sparse parameter optimization.**
Adadelta Optimizer. Please refer to this for details:
`ADADELTA: AN ADAPTIVE LEARNING RATE METHOD <https://arxiv.org/abs/1212.5701>`_.
The update is done as follows:
.. math::
E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2
learning\_rate &= \sqrt{ ( E(dx_{t-1}^2) + \\epsilon ) / ( E(g_t^2) + \\epsilon ) }
E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\_rate)^2
Args:
learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``.
It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001.
epsilon (float): a small float number for numeric stability. Default 1.0e-6.
rho (float): a floating point value indicating the decay rate. Default 0.95.
parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \
This parameter is required in dygraph mode. \
The default value is None in static mode, at this time all parameters will be updated.
weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
It canbe a float value as coeff of L2 regularization or \
:ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \
the regularization setting here in optimizer will be ignored for this parameter. \
Otherwise, the regularization setting here in optimizer will take effect. \
Default None, meaning there is no regularization.
grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
some derived class of ``GradientClipBase`` . There are three cliping strategies
( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` ,
:ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping.
name (str, optional): The default value is None. Normally there is no need for user
to set this property. For more information, please refer to
:ref:`api_guide_Name` .
Examples:
.. code-block:: python
import paddle
import numpy as np
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp)
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
adadelta = paddle.optimizer.Adadelta(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
back = out.backward()
adadelta.step()
adadelta.clear_grad()
"""
_avg_squared_grad_acc_str = "_avg_squared_grad"
_avg_squared_update_acc_str = "_avg_squared_update"
def __init__(self,
learning_rate=0.001,
epsilon=1.0e-6,
rho=0.95,
parameters=None,
weight_decay=None,
grad_clip=None,
name=None):
if learning_rate is None:
raise ValueError("learning_rate is not set.")
if epsilon is None:
raise ValueError("epsilon is not set.")
if rho is None:
raise ValueError("rho is not set.")
super(Adadelta, self).__init__(
learning_rate=learning_rate,
parameters=parameters,
weight_decay=weight_decay,
grad_clip=grad_clip,
name=name)
self.type = "adadelta"
self._epsilon = epsilon
self._rho = rho
def _create_accumulators(self, block, parameters):
if not isinstance(block, framework.Block):
raise TypeError("block is not instance of framework.Block.")
for p in parameters:
self._add_accumulator(self._avg_squared_grad_acc_str, p)
self._add_accumulator(self._avg_squared_update_acc_str, p)
def _append_optimize_op(self, block, param_and_grad):
if not isinstance(block, framework.Block):
raise TypeError("block is not instance of framework.Block.")
avg_squared_grad_acc = self._get_accumulator(
self._avg_squared_grad_acc_str, param_and_grad[0])
avg_squared_update_acc = self._get_accumulator(
self._avg_squared_update_acc_str, param_and_grad[0])
# Create the adadelta optimizer op
adadelta_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"AvgSquaredGrad": avg_squared_grad_acc,
"AvgSquaredUpdate": avg_squared_update_acc
},
outputs={
"ParamOut": param_and_grad[0],
"AvgSquaredGradOut": avg_squared_grad_acc,
"AvgSquaredUpdateOut": avg_squared_update_acc
},
attrs={"epsilon": self._epsilon,
"rho": self._rho},
stop_gradient=True)
return adadelta_op
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .optimizer import Optimizer
from ..fluid import core
from ..fluid import framework
from ..fluid.framework import Variable, name_scope
__all__ = ["Momentum"]
class Momentum(Optimizer):
"""
Simple Momentum optimizer with velocity state
This optimizer has a flag for Nestrov Momentum.
The update equations are as follows:
.. math::
& velocity = mu * velocity + gradient
& if (use\_nesterov):
&\quad param = param - (gradient + mu * velocity) * learning\_rate
& else:
&\quad param = param - learning\_rate * velocity
Parameters:
learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``.
It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001.
momentum (float): Momentum factor. The default value is 0.9.
parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \
This parameter is required in dygraph mode. \
The default value is None in static mode, at this time all parameters will be updated.
weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
It canbe a float value as coeff of L2 regularization or \
:ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \
the regularization setting here in optimizer will be ignored for this parameter. \
Otherwise, the regularization setting here in optimizer will take effect. \
Default None, meaning there is no regularization.
grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
some derived class of ``GradientClipBase`` . There are three cliping strategies
( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` ,
:ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping.
name (str, optional): The default value is None. Normally there is no need for user
to set this property. For more information, please refer to
:ref:`api_guide_Name` .
Examples:
.. code-block:: python
import paddle
import numpy as np
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp)
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
momentum = paddle.optimizer.Momentum(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
back = out.backward()
momentum.step()
momentum.clear_grad()
"""
_velocity_acc_str = "velocity"
def __init__(self,
learning_rate=0.001,
momentum=0.9,
parameters=None,
use_nesterov=False,
weight_decay=None,
grad_clip=None,
name=None):
if learning_rate is None:
raise ValueError("learning_rate is not set")
if momentum is None:
raise ValueError("momentum is not set")
super(Momentum, self).__init__(
learning_rate=learning_rate,
parameters=parameters,
weight_decay=weight_decay,
grad_clip=grad_clip,
name=name)
self.type = "momentum"
self._momentum = momentum
self._use_nesterov = bool(use_nesterov)
def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
for p in parameters:
self._add_accumulator(self._velocity_acc_str, p)
def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
velocity_acc = self._get_accumulator(self._velocity_acc_str,
param_and_grad[0])
lr = self._create_param_lr(param_and_grad)
if framework.in_dygraph_mode():
_, _ = core.ops.momentum(param_and_grad[0], param_and_grad[1],
velocity_acc, lr, param_and_grad[0],
velocity_acc, 'mu', self._momentum,
'use_nesterov', self._use_nesterov)
return None
attrs = {"mu": self._momentum, "use_nesterov": self._use_nesterov}
inputs = {
"Param": [param_and_grad[0]],
"Grad": [param_and_grad[1]],
"Velocity": [velocity_acc],
"LearningRate": [lr]
}
outputs = {
"ParamOut": [param_and_grad[0]],
"VelocityOut": [velocity_acc]
}
# create the momentum optimize op
momentum_op = block.append_op(
type=self.type,
inputs=inputs,
outputs=outputs,
attrs=attrs,
stop_gradient=True)
return momentum_op
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .optimizer import Optimizer
from ..fluid import core
from ..fluid import framework
from ..fluid.framework import Variable, name_scope
from ..fluid.dygraph import no_grad
__all__ = ["SGD"]
class SGD(Optimizer):
"""
Optimizer of the stochastic gradient descent algorithm.
.. math::
param\_out = param - learning\_rate * grad
Parameters:
learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``.
It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001.
parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \
This parameter is required in dygraph mode. \
The default value is None in static mode, at this time all parameters will be updated.
weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
It canbe a float value as coeff of L2 regularization or \
:ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \
the regularization setting here in optimizer will be ignored for this parameter. \
Otherwise, the regularization setting here in optimizer will take effect. \
Default None, meaning there is no regularization.
grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
some derived class of ``GradientClipBase`` . There are three cliping strategies
( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` ,
:ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping.
name (str, optional): The default value is None. Normally there is no need for user
to set this property. For more information, please refer to
:ref:`api_guide_Name` .
Examples:
.. code-block:: python
import paddle
import numpy as np
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp)
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
sgd = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
back = out.backward()
sgd.step()
sgd.clear_grad()
"""
def __init__(self,
learning_rate=0.001,
parameters=None,
weight_decay=None,
grad_clip=None,
name=None):
if learning_rate is None:
raise ValueError("learning_rate is not set")
super(SGD, self).__init__(
learning_rate=learning_rate,
parameters=parameters,
weight_decay=weight_decay,
grad_clip=grad_clip,
name=name)
self.type = "sgd"
@no_grad()
def _append_optimize_op(self, block, param_and_grad):
lr = self._create_param_lr(param_and_grad)
if framework.in_dygraph_mode():
core.ops.sgd(param_and_grad[0], lr, param_and_grad[1],
param_and_grad[0])
return None
assert isinstance(block, framework.Block)
# create the optimize op
sgd_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"LearningRate": lr
},
outputs={"ParamOut": param_and_grad[0]},
stop_gradient=True)
return sgd_op
......@@ -1618,6 +1618,10 @@ def clip(x, min=None, max=None, name=None):
fmax = float(np.finfo(np_dtype).max)
if in_dygraph_mode():
if isinstance(min, Variable):
min = min.numpy().item(0)
if isinstance(max, Variable):
max = max.numpy().item(0)
min = fmin if min is None else min
max = fmax if max is None else max
return core.ops.clip(x, "min", min, "max", max)
......
......@@ -94,7 +94,7 @@ def bernoulli(x, name=None):
return out
def gaussian_random(shape, mean=0.0, std=1.0, dtype='float32', name=None):
def gaussian_random(shape, mean=0.0, std=1.0, dtype=None, name=None):
"""
This OP returns a Tensor filled with random values sampled from a Gaussian
distribution, with ``shape`` and ``dtype``.
......@@ -109,9 +109,10 @@ def gaussian_random(shape, mean=0.0, std=1.0, dtype='float32', name=None):
std(float|int, optional): Standard deviation of the output tensor, default
is 1.0.
seed(int, optional): ${seed_comment}
dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of
the output Tensor. Supported data types: float32, float64.
Default is float32.
dtype(str|np.dtype, optional): The data type of the output Tensor.
Supported data types: float32, float64.
Default is None, use global default dtype (see ``get_default_dtype``
for details).
name(str, optional): The default value is None. Normally there is no
need for user to set this property. For more information, please
refer to :ref:`api_guide_Name`.
......@@ -120,6 +121,13 @@ def gaussian_random(shape, mean=0.0, std=1.0, dtype='float32', name=None):
Tensor: A Tensor filled with random values sampled from a Gaussian
distribution, with ``shape`` and ``dtype``.
"""
if dtype is None:
dtype = paddle.framework.get_default_dtype()
if dtype not in ['float32', 'float64']:
raise TypeError(
"gaussian_random only supports [float32, float64], but the default dtype is %s"
% dtype)
if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype)
seed = 0
......@@ -169,9 +177,10 @@ def standard_normal(shape, dtype=None, name=None):
(with the shape [1], and the data type int32 or int64). If ``shape``
is a Tensor, it should be a 1-D Tensor(with the data type int32 or
int64).
dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of the
output tensor. Supported data types: float32, float64. If ``dytpe``
is None, the data type is float32. Default is None.
dtype(str|np.dtype, optional): The data type of the output Tensor.
Supported data types: float32, float64.
Default is None, use global default dtype (see ``get_default_dtype``
for details).
name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`.
......@@ -216,7 +225,11 @@ def standard_normal(shape, dtype=None, name=None):
"""
if dtype is None:
dtype = 'float32'
dtype = paddle.framework.get_default_dtype()
if dtype not in ['float32', 'float64']:
raise TypeError(
"standard_normal only supports [float32, float64], but the default dtype is %s"
% dtype)
return gaussian_random(
shape=shape, mean=0.0, std=1.0, dtype=dtype, name=name)
......@@ -325,7 +338,7 @@ def normal(mean=0.0, std=1.0, shape=None, name=None):
return out
def uniform(shape, dtype='float32', min=-1.0, max=1.0, seed=0, name=None):
def uniform(shape, dtype=None, min=-1.0, max=1.0, seed=0, name=None):
"""
This OP returns a Tensor filled with random values sampled from a uniform
distribution in the range [``min``, ``max``), with ``shape`` and ``dtype``.
......@@ -343,9 +356,10 @@ def uniform(shape, dtype='float32', min=-1.0, max=1.0, seed=0, name=None):
(with the shape [1], and the data type int32 or int64). If ``shape``
is a Tensor, it should be a 1-D Tensor(with the data type int32 or
int64).
dtype(str|np.dtype, optional): The data type of
the output Tensor. Supported data types: float32, float64.
Default is float32.
dtype(str|np.dtype, optional): The data type of the output Tensor.
Supported data types: float32, float64.
Default is None, use global default dtype (see ``get_default_dtype``
for details).
min(float|int, optional): The lower bound on the range of random values
to generate, ``min`` is included in the range. Default is -1.0.
max(float|int, optional): The upper bound on the range of random values
......@@ -401,6 +415,13 @@ def uniform(shape, dtype='float32', min=-1.0, max=1.0, seed=0, name=None):
"""
if dtype is None:
dtype = paddle.framework.get_default_dtype()
if dtype not in ['float32', 'float64']:
raise TypeError(
"uniform only supports [float32, float64], but the default dtype is %s"
% dtype)
if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype)
......@@ -447,7 +468,7 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None):
(with the shape [1], and the data type int32 or int64). If ``shape``
is a Tensor, it should be a 1-D Tensor(with the data type int32 or
int64). Default is [1].
dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of the
dtype(str|np.dtype, optional): The data type of the
output tensor. Supported data types: int32, int64. If ``dytpe``
is None, the data type is int64. Default is None.
name(str, optional): The default value is None. Normally there is no
......@@ -550,7 +571,7 @@ def randperm(n, dtype="int64", name=None):
Args:
n(int): The upper bound (exclusive), and it should be greater than 0.
dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of
dtype(str|np.dtype, optional): The data type of
the output Tensor. Supported data types: int32, int64, float32,
float64. Default is int64.
name(str, optional): The default value is None. Normally there is no
......@@ -622,9 +643,10 @@ def rand(shape, dtype=None, name=None):
(with the shape [1], and the data type int32 or int64). If ``shape``
is a Tensor, it should be a 1-D Tensor(with the data type int32 or
int64).
dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of the
output tensor. Supported data types: float32, float64. If ``dytpe``
is None, the data type is float32. Default is None.
dtype(str|np.dtype, optional): The data type of the output Tensor.
Supported data types: float32, float64.
Default is None, use global default dtype (see ``get_default_dtype``
for details).
name(str, optional): The default value is None. Normally there is no
need for user to set this property. For more information, please
refer to :ref:`api_guide_Name`.
......@@ -668,7 +690,11 @@ def rand(shape, dtype=None, name=None):
"""
if dtype is None:
dtype = 'float32'
dtype = paddle.framework.get_default_dtype()
if dtype not in ['float32', 'float64']:
raise TypeError(
"rand only supports [float32, float64], but the default dtype is %s"
% dtype)
out = uniform(shape, dtype, min=0.0, max=1.0, name=name)
out.stop_gradient = True
......
......@@ -55,7 +55,7 @@ def get_os_info():
else:
plat = None
ver = None
envs['os_info'] = "{} {}".format(plat, ver)
envs['os_info'] = "{0} {1}".format(plat, ver)
def get_python_info():
......@@ -93,7 +93,7 @@ def get_cudnn_info():
if cudnn_dll_path:
cudnn_header_path = cudnn_dll_path.split('bin')[
0] + 'include\cudnn.h'
cmd = 'type "{}" | findstr "{}" | findstr /v "CUDNN_VERSION"'
cmd = 'type "{0}" | findstr "{1}" | findstr /v "CUDNN_VERSION"'
else:
envs['cudnn_version'] = None
return
......@@ -102,7 +102,7 @@ def get_cudnn_info():
'whereis "cudnn.h" | awk \'{print $2}\'')
if cudnn_header_path:
cudnn_header_path = cudnn_header_path.strip()
cmd = 'cat "{}" | grep "{}" | grep -v "CUDNN_VERSION"'
cmd = 'cat "{0}" | grep "{1}" | grep -v "CUDNN_VERSION"'
else:
envs['cudnn_version'] = None
return
......@@ -112,7 +112,7 @@ def get_cudnn_info():
patch_level = _get_cudnn_ver(
cmd.format(cudnn_header_path, 'CUDNN_PATCHLEVEL'))
envs['cudnn_version'] = "{}.{}.{}".format(major, minor, patch_level)
envs['cudnn_version'] = "{0}.{1}.{2}".format(major, minor, patch_level)
def get_driver_info():
......@@ -132,7 +132,7 @@ def main():
get_cuda_info()
get_cudnn_info()
get_driver_info()
print(envs_template.format(**envs))
print('*' * 40 + envs_template.format(**envs) + '*' * 40)
if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册