提交 fef6f6f9 编写于 作者: S seiriosPlus

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into optimize/large_scale_kv_spped

...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h" #include "paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h"
#include <cmath>
#include <functional> #include <functional>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -74,12 +75,17 @@ void recompute_bias_and_weights(const Scope* scope, ir::Node* conv_weight, ...@@ -74,12 +75,17 @@ void recompute_bias_and_weights(const Scope* scope, ir::Node* conv_weight,
auto* weights = scope->FindVar(conv_weight->Name())->GetMutable<LoDTensor>(); auto* weights = scope->FindVar(conv_weight->Name())->GetMutable<LoDTensor>();
auto weights_shape = weights->dims(); auto weights_shape = weights->dims();
auto weights_shape_2d = flatten_to_2d(weights_shape, 1); auto weights_shape_2d = flatten_to_2d(weights_shape, 1);
auto* weights_data = weights->mutable_data<float>(platform::CPUPlace());
EigenMatrixArrayMap weights_array_2d( EigenMatrixArrayMap weights_array_2d(weights_data, weights_shape_2d[0],
weights->mutable_data<float>(platform::CPUPlace()), weights_shape_2d[0],
weights_shape_2d[1]); weights_shape_2d[1]);
weights_array_2d.colwise() *= scale_array; weights_array_2d.colwise() *= scale_array;
// Check for subnormal values that slows down convolution execution
for (int i = 0; i < weights->numel(); ++i) {
if (std::fpclassify(weights_data[i]) == FP_SUBNORMAL) weights_data[i] = 0;
}
} }
void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const { void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const {
...@@ -108,13 +114,6 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const { ...@@ -108,13 +114,6 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const {
GET_CONV_BN_NODES(conv_ac_pattern); GET_CONV_BN_NODES(conv_ac_pattern);
// check if fuse can be done and if MKL-DNN should be used
FuseOptions fuse_option = FindFuseOption(*conv, *affine_channel);
if (fuse_option == DO_NOT_FUSE) {
VLOG(3) << "do not perform conv+affinechannel fuse";
return;
}
// Create eltwise_y (conv bias) variable // Create eltwise_y (conv bias) variable
VarDesc eltwise_y_in_desc( VarDesc eltwise_y_in_desc(
patterns::PDNodeName(name_scope_, "eltwise_y_in")); patterns::PDNodeName(name_scope_, "eltwise_y_in"));
...@@ -143,6 +142,7 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const { ...@@ -143,6 +142,7 @@ void ConvAffineChannelFusePass::ApplyImpl(ir::Graph* graph) const {
desc.SetOutput("Out", std::vector<std::string>({ac_out->Name()})); desc.SetOutput("Out", std::vector<std::string>({ac_out->Name()}));
desc.SetType("elementwise_add"); desc.SetType("elementwise_add");
desc.SetAttr("axis", 1); desc.SetAttr("axis", 1);
desc.SetAttr("use_mkldnn", conv->Op()->GetAttrIfExists<bool>("use_mkldnn"));
auto eltwise_op = g->CreateOpNode(&desc); // OpDesc will be copied. auto eltwise_op = g->CreateOpNode(&desc); // OpDesc will be copied.
GraphSafeRemoveNodes(graph, {ac_scale, ac_bias, affine_channel}); GraphSafeRemoveNodes(graph, {ac_scale, ac_bias, affine_channel});
......
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h" #include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/inference/api/paddle_analysis_config.h" #include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_pass_builder.h" #include "paddle/fluid/inference/api/paddle_pass_builder.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/gpu_info.h"
...@@ -103,8 +102,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -103,8 +102,8 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// params_file_ fields. // params_file_ fields.
CP_MEMBER(opt_cache_dir_); CP_MEMBER(opt_cache_dir_);
prog_file_ = std::move(other.prog_file_); CP_MEMBER(prog_file_);
params_file_ = std::move(other.params_file_); CP_MEMBER(params_file_);
CP_MEMBER(use_fc_padding_); CP_MEMBER(use_fc_padding_);
// GPU related. // GPU related.
......
...@@ -32,7 +32,6 @@ ...@@ -32,7 +32,6 @@
#include "paddle/fluid/inference/analysis/helper.h" #include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h"
#include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h" #include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h" #include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
...@@ -517,6 +516,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() { ...@@ -517,6 +516,8 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
template <> template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) { AnalysisConfig, PaddleEngineKind::kAnalysis>(const AnalysisConfig &config) {
// TODO(NHZlX): Should add the link to the doc of
// paddle_infer::CreatePredictor<paddle_infer::Config>
if (config.glog_info_disabled()) { if (config.glog_info_disabled()) {
FLAGS_logtostderr = 1; FLAGS_logtostderr = 1;
FLAGS_minloglevel = 2; // GLOG_ERROR FLAGS_minloglevel = 2; // GLOG_ERROR
...@@ -1058,3 +1059,122 @@ USE_TRT_CONVERTER(skip_layernorm); ...@@ -1058,3 +1059,122 @@ USE_TRT_CONVERTER(skip_layernorm);
USE_TRT_CONVERTER(slice); USE_TRT_CONVERTER(slice);
USE_TRT_CONVERTER(scale); USE_TRT_CONVERTER(scale);
#endif #endif
namespace paddle_infer {
void Tensor::Reshape(const std::vector<int> &shape) { tensor_->Reshape(shape); }
std::vector<int> Tensor::shape() const { return tensor_->shape(); }
void Tensor::SetLoD(const std::vector<std::vector<size_t>> &x) {
return tensor_->SetLoD(x);
}
std::vector<std::vector<size_t>> Tensor::lod() const { return tensor_->lod(); }
const std::string &Tensor::name() const { return tensor_->name(); }
DataType Tensor::type() const { return tensor_->type(); }
Predictor::Predictor(const Config &config) {
const_cast<Config *>(&config)->SwitchUseFeedFetchOps(false);
// The second parameter indicates that the discard log is not printed
predictor_ = paddle::CreatePaddlePredictor<
Config, paddle::PaddleEngineKind::kAnalysis>(config);
}
std::vector<std::string> Predictor::GetInputNames() {
return predictor_->GetInputNames();
}
std::unique_ptr<Tensor> Predictor::GetInputHandle(const std::string &name) {
auto zero_copy_tensor = predictor_->GetInputTensor(name);
std::unique_ptr<Tensor> tensor(new Tensor(std::move(zero_copy_tensor)));
return tensor;
}
std::vector<std::string> Predictor::GetOutputNames() {
return predictor_->GetOutputNames();
}
std::unique_ptr<Tensor> Predictor::GetOutputHandle(const std::string &name) {
auto zero_copy_tensor = predictor_->GetOutputTensor(name);
std::unique_ptr<Tensor> tensor(new Tensor(std::move(zero_copy_tensor)));
return tensor;
}
bool Predictor::Run() { return predictor_->ZeroCopyRun(); }
std::unique_ptr<Predictor> Predictor::Clone() {
auto analysis_pred = predictor_->Clone();
std::unique_ptr<Predictor> pred(new Predictor(std::move(analysis_pred)));
return pred;
}
void Predictor::ClearIntermediateTensor() {
predictor_->ClearIntermediateTensor();
}
int GetNumBytesOfDataType(DataType dtype) {
switch (dtype) {
case DataType::FLOAT32:
return sizeof(float);
case DataType::INT64:
return sizeof(int64_t);
case DataType::INT32:
return sizeof(int32_t);
case DataType::UINT8:
return sizeof(uint8_t);
default:
assert(false);
return -1;
}
}
std::string GetVersion() { return paddle::get_version(); }
std::string UpdateDllFlag(const char *name, const char *value) {
return paddle::UpdateDllFlag(name, value);
}
} // namespace paddle_infer
namespace paddle_infer {
std::shared_ptr<Predictor> CreatePredictor(const Config &config) { // NOLINT
std::shared_ptr<Predictor> predictor(new Predictor(config));
return predictor;
}
namespace services {
PredictorPool::PredictorPool(const Config &config, size_t size) {
PADDLE_ENFORCE_GE(
size, 1UL,
paddle::platform::errors::InvalidArgument(
"The predictor pool size should be greater than 1, but it's (%d)",
size));
Config copy_config(config);
main_pred_.reset(new Predictor(config));
for (size_t i = 0; i < size - 1; i++) {
if (config.tensorrt_engine_enabled()) {
Config config_tmp(copy_config);
preds_.push_back(
std::move(std::unique_ptr<Predictor>(new Predictor(config_tmp))));
} else {
preds_.push_back(std::move(main_pred_->Clone()));
}
}
}
Predictor *PredictorPool::Retrive(size_t idx) {
PADDLE_ENFORCE_LT(
idx, preds_.size() + 1,
paddle::platform::errors::InvalidArgument(
"There are (%d) predictors in the pool, but the idx is (%d)", idx,
preds_.size() + 1));
if (idx == 0) {
return main_pred_.get();
}
return preds_[idx - 1].get();
}
} // namespace services
} // namespace paddle_infer
...@@ -112,6 +112,12 @@ void PaddleBuf::Free() { ...@@ -112,6 +112,12 @@ void PaddleBuf::Free() {
} }
} }
NativeConfig::NativeConfig() {
LOG(WARNING) << "The paddle::NativeConfig interface is going to be "
"deprecated in the next release, plase use the latest "
"paddle_infer::Config instead.";
}
std::string get_version() { std::string get_version() {
std::stringstream ss; std::stringstream ss;
ss << "version: " << framework::paddle_version() << "\n"; ss << "version: " << framework::paddle_version() << "\n";
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include <glog/logging.h> #include <glog/logging.h>
#include <algorithm> #include <algorithm>
#include <map> #include <map>
#include <memory>
#include <set> #include <set>
#include <sstream> #include <sstream>
#include <string> #include <string>
...@@ -25,6 +26,7 @@ limitations under the License. */ ...@@ -25,6 +26,7 @@ limitations under the License. */
#include "paddle/fluid/inference/api/api_impl.h" #include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/details/reset_tensor_array.h" #include "paddle/fluid/inference/api/details/reset_tensor_array.h"
#include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/platform/cpu_helper.h" #include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
...@@ -311,6 +313,8 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs, ...@@ -311,6 +313,8 @@ bool NativePaddlePredictor::GetFetch(std::vector<PaddleTensor> *outputs,
template <> template <>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) { NativeConfig, PaddleEngineKind::kNative>(const NativeConfig &config) {
// TODO(NHZlX): Should add the link to the doc of
// paddle_infer::CreatePredictor<paddle_infer::Config>
VLOG(3) << "create NativePaddlePredictor"; VLOG(3) << "create NativePaddlePredictor";
if (config.use_gpu) { if (config.use_gpu) {
// 1. GPU memory // 1. GPU memory
......
...@@ -347,6 +347,7 @@ class PD_INFER_DECL PaddlePredictor { ...@@ -347,6 +347,7 @@ class PD_INFER_DECL PaddlePredictor {
/// place of inference, etc.) /// place of inference, etc.)
/// ///
struct PD_INFER_DECL NativeConfig : public PaddlePredictor::Config { struct PD_INFER_DECL NativeConfig : public PaddlePredictor::Config {
NativeConfig();
/// GPU related fields. /// GPU related fields.
bool use_gpu{false}; bool use_gpu{false};
int device{0}; int device{0};
...@@ -421,7 +422,8 @@ enum class PaddleEngineKind { ...@@ -421,7 +422,8 @@ enum class PaddleEngineKind {
}; };
template <typename ConfigT, PaddleEngineKind engine> template <typename ConfigT, PaddleEngineKind engine>
std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(const ConfigT& config); PD_INFER_DECL std::unique_ptr<PaddlePredictor> CreatePaddlePredictor(
const ConfigT& config);
template <> template <>
PD_INFER_DECL std::unique_ptr<PaddlePredictor> CreatePaddlePredictor< PD_INFER_DECL std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
...@@ -437,6 +439,4 @@ PD_INFER_DECL std::string get_version(); ...@@ -437,6 +439,4 @@ PD_INFER_DECL std::string get_version();
PD_INFER_DECL std::string UpdateDllFlag(const char* name, const char* value); PD_INFER_DECL std::string UpdateDllFlag(const char* name, const char* value);
PD_INFER_DECL std::shared_ptr<framework::Cipher> MakeCipher(
const std::string& config_file);
} // namespace paddle } // namespace paddle
...@@ -22,9 +22,124 @@ limitations under the License. */ ...@@ -22,9 +22,124 @@ limitations under the License. */
#pragma once #pragma once
#include <cassert> #include <cassert>
#include <map>
#include <memory> #include <memory>
#include <string> #include <string>
#include <utility>
#include <vector> #include <vector>
#include "paddle_analysis_config.h" // NOLINT #include "paddle_analysis_config.h" // NOLINT
#include "paddle_api.h" // NOLINT #include "paddle_api.h" // NOLINT
namespace paddle_infer {
using DataType = paddle::PaddleDType;
using PlaceType = paddle::PaddlePlace;
using PrecisionType = paddle::AnalysisConfig::Precision;
using Config = paddle::AnalysisConfig;
class PD_INFER_DECL Tensor {
public:
// Can only be created by predictor->GetInputHandle(cosnt std::string& name)
// or predictor->GetOutputHandle(cosnt std::string& name)
Tensor() = delete;
explicit Tensor(std::unique_ptr<paddle::ZeroCopyTensor>&& tensor)
: tensor_(std::move(tensor)) {}
void Reshape(const std::vector<int>& shape);
template <typename T>
void CopyFromCpu(const T* data);
// should add the place
template <typename T>
T* mutable_data(PlaceType place);
template <typename T>
void CopyToCpu(T* data);
template <typename T>
T* data(PlaceType* place, int* size) const;
void SetLoD(const std::vector<std::vector<size_t>>& x);
std::vector<std::vector<size_t>> lod() const;
DataType type() const;
std::vector<int> shape() const;
const std::string& name() const;
private:
std::unique_ptr<paddle::ZeroCopyTensor> tensor_;
};
class PD_INFER_DECL Predictor {
public:
Predictor() = default;
~Predictor() {}
// Use for clone
explicit Predictor(std::unique_ptr<paddle::PaddlePredictor>&& pred)
: predictor_(std::move(pred)) {}
explicit Predictor(const Config& config);
std::vector<std::string> GetInputNames();
std::unique_ptr<Tensor> GetInputHandle(const std::string& name);
bool Run();
std::vector<std::string> GetOutputNames();
std::unique_ptr<Tensor> GetOutputHandle(const std::string& name);
std::unique_ptr<Predictor> Clone();
void ClearIntermediateTensor();
private:
std::unique_ptr<paddle::PaddlePredictor> predictor_;
};
PD_INFER_DECL std::shared_ptr<Predictor> CreatePredictor(
const Config& config); // NOLINT
PD_INFER_DECL int GetNumBytesOfDataType(DataType dtype);
PD_INFER_DECL std::string GetVersion();
PD_INFER_DECL std::string UpdateDllFlag(const char* name, const char* value);
template <typename T>
void Tensor::CopyFromCpu(const T* data) {
tensor_->copy_from_cpu<T>(data);
}
template <typename T>
void Tensor::CopyToCpu(T* data) {
return tensor_->copy_to_cpu<T>(data);
}
template <typename T>
T* Tensor::mutable_data(PlaceType place) {
return tensor_->mutable_data<T>(place);
}
template <typename T>
T* Tensor::data(PlaceType* place, int* size) const {
return tensor_->data<T>(place, size);
}
} // namespace paddle_infer
namespace paddle_infer {
namespace services {
class PD_INFER_DECL PredictorPool {
public:
PredictorPool() = delete;
PredictorPool(const PredictorPool&) = delete;
PredictorPool& operator=(const PredictorPool&) = delete;
explicit PredictorPool(const Config& config, size_t size = 1);
Predictor* Retrive(size_t idx);
private:
std::shared_ptr<Predictor> main_pred_;
std::vector<std::unique_ptr<Predictor>> preds_;
};
} // namespace services
} // namespace paddle_infer
...@@ -188,6 +188,8 @@ void CpuPassStrategy::EnableMKLDNN() { ...@@ -188,6 +188,8 @@ void CpuPassStrategy::EnableMKLDNN() {
"depthwise_conv_mkldnn_pass", // "depthwise_conv_mkldnn_pass", //
"conv_bn_fuse_pass", // Execute BN passes again to "conv_bn_fuse_pass", // Execute BN passes again to
"conv_eltwiseadd_bn_fuse_pass", // preserve correct pass order "conv_eltwiseadd_bn_fuse_pass", // preserve correct pass order
"conv_affine_channel_fuse_pass", //
"conv_eltwiseadd_affine_channel_fuse_pass", //
"conv_transpose_bn_fuse_pass", // "conv_transpose_bn_fuse_pass", //
"conv_transpose_eltwiseadd_bn_fuse_pass", // "conv_transpose_eltwiseadd_bn_fuse_pass", //
"conv_bias_mkldnn_fuse_pass", // "conv_bias_mkldnn_fuse_pass", //
......
...@@ -54,7 +54,7 @@ class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT { ...@@ -54,7 +54,7 @@ class SkipLayerNormPluginDynamic : public DynamicPluginTensorRT {
auto ptr = new SkipLayerNormPluginDynamic( auto ptr = new SkipLayerNormPluginDynamic(
bias_.data(), scale_.data(), bias_size_, scale_size_, eps_, ban_fp16_); bias_.data(), scale_.data(), bias_size_, scale_size_, eps_, ban_fp16_);
ptr->bias_gpu_ = bias_gpu_; ptr->bias_gpu_ = bias_gpu_;
ptr->scale_gpu_ = bias_gpu_; ptr->scale_gpu_ = scale_gpu_;
return ptr; return ptr;
} }
......
...@@ -515,3 +515,9 @@ if(WITH_MKLDNN) ...@@ -515,3 +515,9 @@ if(WITH_MKLDNN)
inference_analysis_test(test_analyzer_capi_ner SRCS analyzer_capi_ner_tester.cc inference_analysis_test(test_analyzer_capi_ner SRCS analyzer_capi_ner_tester.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} paddle_fluid_c
ARGS --infer_model=${CHINESE_NER_INSTALL_DIR}/model) ARGS --infer_model=${CHINESE_NER_INSTALL_DIR}/model)
if(WITH_GPU)
inference_analysis_test(paddle_infer_api_test SRCS paddle_infer_api_test.cc
EXTRA_DEPS ${INFERENCE_EXTRA_DEPS}
ARGS --infer_model=${RESNET50_MODEL_DIR})
endif()
...@@ -72,3 +72,59 @@ TEST(AnalysisPredictor, use_gpu) { ...@@ -72,3 +72,59 @@ TEST(AnalysisPredictor, use_gpu) {
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
namespace paddle_infer {
TEST(Predictor, use_gpu) {
std::string model_dir = FLAGS_infer_model + "/" + "model";
Config config;
config.EnableUseGpu(100, 0);
config.SetModel(model_dir + "/model", model_dir + "/params");
config.EnableLiteEngine(PrecisionType::kFloat32);
auto predictor = CreatePredictor(config);
const int batch = 1;
const int channel = 3;
const int height = 318;
const int width = 318;
const int input_num = batch * channel * height * width;
std::vector<float> input(input_num, 1);
auto input_names = predictor->GetInputNames();
auto input_t = predictor->GetInputHandle(input_names[0]);
input_t->Reshape({1, 3, 318, 318});
input_t->CopyFromCpu(input.data());
predictor->Run();
auto output_names = predictor->GetOutputNames();
auto output_t = predictor->GetOutputHandle(output_names[0]);
std::vector<int> output_shape = output_t->shape();
size_t out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int>());
std::vector<float> out_data;
out_data.resize(out_num);
output_t->CopyToCpu(out_data.data());
const std::vector<float> truth_values = {
127.780396f, 738.16656f, 1013.2264f, -438.17206f, 366.4022f,
927.66187f, 736.2241f, -633.68567f, -329.92737f, -430.15637f,
-633.0639f, -146.54858f, -1324.2804f, -1349.3661f, -242.67671f,
117.44864f, -801.7251f, -391.51495f, -404.8202f, 454.16132f,
515.48206f, -133.03114f, 69.293076f, 590.09753f, -1434.6917f,
-1070.8903f, 307.0744f, 400.52573f, -316.12177f, -587.1265f,
-161.05742f, 800.3663f, -96.47157f, 748.708f, 868.17645f,
-447.9403f, 112.73656f, 1127.1992f, 47.43518f, 677.7219f,
593.1881f, -336.4011f, 551.3634f, 397.82474f, 78.39835f,
-715.4006f, 405.96988f, 404.25684f, 246.01978f, -8.430191f,
131.36617f, -648.0528f};
float* data_o = out_data.data();
for (size_t j = 0; j < out_num; j += 10) {
EXPECT_NEAR((data_o[j] - truth_values[j / 10]) / truth_values[j / 10], 0.,
10e-5);
}
}
} // namespace paddle_infer
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cuda_runtime.h>
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <cstring>
#include <numeric>
#include "paddle/fluid/inference/tests/api/trt_test_helper.h"
namespace paddle_infer {
TEST(Predictor, use_gpu) {
LOG(INFO) << GetVersion();
UpdateDllFlag("conv_workspace_size_limit", "4000");
std::string model_dir = FLAGS_infer_model + "/model";
Config config;
config.SetModel(model_dir + "/model", model_dir + "/params");
config.EnableUseGpu(100, 0);
auto predictor = CreatePredictor(config);
auto pred_clone = predictor->Clone();
std::vector<int> in_shape = {1, 3, 318, 318};
int in_num = std::accumulate(in_shape.begin(), in_shape.end(), 1,
[](int &a, int &b) { return a * b; });
std::vector<float> input(in_num, 0);
auto input_names = predictor->GetInputNames();
auto input_t = predictor->GetInputHandle(input_names[0]);
input_t->Reshape(in_shape);
input_t->CopyFromCpu(input.data());
predictor->Run();
auto output_names = predictor->GetOutputNames();
auto output_t = predictor->GetOutputHandle(output_names[0]);
std::vector<int> output_shape = output_t->shape();
int out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1,
std::multiplies<int>());
std::vector<float> out_data;
out_data.resize(out_num);
output_t->CopyToCpu(out_data.data());
predictor->ClearIntermediateTensor();
}
TEST(PredictorPool, basic) {
LOG(INFO) << GetVersion();
UpdateDllFlag("conv_workspace_size_limit", "4000");
std::string model_dir = FLAGS_infer_model + "/model";
Config config;
config.SetModel(model_dir + "/model", model_dir + "/params");
config.EnableUseGpu(100, 0);
services::PredictorPool pred_pool(config, 4);
auto pred = pred_pool.Retrive(2);
std::vector<int> in_shape = {1, 3, 318, 318};
int in_num = std::accumulate(in_shape.begin(), in_shape.end(), 1,
[](int &a, int &b) { return a * b; });
std::vector<float> input(in_num, 0);
auto in_names = pred->GetInputNames();
auto input_t = pred->GetInputHandle(in_names[0]);
input_t->name();
input_t->Reshape(in_shape);
input_t->CopyFromCpu(input.data());
pred->Run();
auto out_names = pred->GetOutputNames();
auto output_t = pred->GetOutputHandle(out_names[0]);
auto out_type = output_t->type();
LOG(INFO) << GetNumBytesOfDataType(out_type);
if (out_type == DataType::FLOAT32) {
PlaceType place;
int size;
output_t->data<float>(&place, &size);
}
}
} // namespace paddle_infer
...@@ -41,7 +41,7 @@ TEST(AnalysisPredictor, use_gpu) { ...@@ -41,7 +41,7 @@ TEST(AnalysisPredictor, use_gpu) {
SetFakeImageInput(&inputs_all, model_dir, false, "__model__", ""); SetFakeImageInput(&inputs_all, model_dir, false, "__model__", "");
std::vector<PaddleTensor> outputs; std::vector<PaddleTensor> outputs;
for (auto& input : inputs_all) { for (auto &input : inputs_all) {
ASSERT_TRUE(predictor->Run(input, &outputs)); ASSERT_TRUE(predictor->Run(input, &outputs));
predictor->ClearIntermediateTensor(); predictor->ClearIntermediateTensor();
} }
...@@ -49,3 +49,27 @@ TEST(AnalysisPredictor, use_gpu) { ...@@ -49,3 +49,27 @@ TEST(AnalysisPredictor, use_gpu) {
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
namespace paddle_infer {
TEST(PredictorPool, use_gpu) {
std::string model_dir = FLAGS_infer_model + "/" + "mobilenet";
Config config;
config.EnableUseGpu(100, 0);
config.SetModel(model_dir);
config.EnableTensorRtEngine();
services::PredictorPool pred_pool(config, 1);
auto predictor = pred_pool.Retrive(0);
auto input_names = predictor->GetInputNames();
auto input_t = predictor->GetInputHandle(input_names[0]);
std::vector<int> in_shape = {1, 3, 224, 224};
int in_num = std::accumulate(in_shape.begin(), in_shape.end(), 1,
[](int &a, int &b) { return a * b; });
std::vector<float> input(in_num, 0);
input_t->Reshape(in_shape);
input_t->CopyFromCpu(input.data());
predictor->Run();
}
} // namespace paddle_infer
...@@ -20,6 +20,7 @@ limitations under the License. */ ...@@ -20,6 +20,7 @@ limitations under the License. */
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/common_infer_shape_functions.h" #include "paddle/fluid/operators/common_infer_shape_functions.h"
#include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h" #include "paddle/fluid/operators/mkldnn/mkldnn_activation_op.h"
#include "paddle/fluid/platform/port.h" #include "paddle/fluid/platform/port.h"
...@@ -1231,3 +1232,24 @@ REGISTER_OP_CPU_KERNEL( ...@@ -1231,3 +1232,24 @@ REGISTER_OP_CPU_KERNEL(
ops::ActivationGradKernel<paddle::platform::CPUDeviceContext, ops::ActivationGradKernel<paddle::platform::CPUDeviceContext,
ops::AbsGradFunctor<int64_t>>); ops::AbsGradFunctor<int64_t>>);
/* ========================================================================== */ /* ========================================================================== */
/* ========================== register checkpoint ===========================*/
REGISTER_OP_VERSION(leaky_relu)
.AddCheckpoint(
R"ROC(fix leaky_relu, bahavior changed when alpha < 0 or alpha > 1)ROC",
paddle::framework::compatible::OpVersionDesc()
.BugfixWithBehaviorChanged(
"leaky_relu calculate formula before checkponit: out = max(x, "
"alpha * x); after checkpoint: out = x if x > 0 else alpha * "
"x"));
REGISTER_OP_VERSION(hard_shrink)
.AddCheckpoint(
R"ROC(fix hard_shrink, bahavior changed when threshold<0)ROC",
paddle::framework::compatible::OpVersionDesc()
.BugfixWithBehaviorChanged(
"hard_shrink calculate formula before checkponit: out = x * "
"((x < -threshold) + (x > threshold)); after checkpoint: out = "
"x * (((x < -threshold) + (x > threshold)) > 0)"));
/* ========================================================================== */
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/platform/cudnn_workspace_helper.h" #include "paddle/fluid/platform/cudnn_workspace_helper.h"
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
...@@ -567,3 +568,14 @@ REGISTER_OP_CPU_KERNEL( ...@@ -567,3 +568,14 @@ REGISTER_OP_CPU_KERNEL(
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, float>, ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, float>,
ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext, ops::GemmConvTransposeGradKernel<paddle::platform::CPUDeviceContext,
double>); double>);
REGISTER_OP_VERSION(conv_transpose)
.AddCheckpoint(
R"ROC(
Upgrade convtranspose add a new attribute [output_padding].
)ROC",
paddle::framework::compatible::OpVersionDesc().NewAttr(
"output_padding",
"In order to add additional size to one side of each dimension "
"in the output",
{}));
...@@ -56,7 +56,7 @@ endif() ...@@ -56,7 +56,7 @@ endif()
cc_test(rpc_server_test SRCS rpc_server_test.cc cc_test(rpc_server_test SRCS rpc_server_test.cc
DEPS ${RPC_DEPS} executor scope proto_desc lookup_sparse_table_read_op) DEPS ${RPC_DEPS} executor scope proto_desc lookup_sparse_table_read_op scale_op)
cc_test(varhandle_test SRCS varhandle_test.cc DEPS profiler scope) cc_test(varhandle_test SRCS varhandle_test.cc DEPS profiler scope)
cc_library(parameter_prefetch SRCS parameter_prefetch.cc DEPS sendrecvop_rpc memory) cc_library(parameter_prefetch SRCS parameter_prefetch.cc DEPS sendrecvop_rpc memory)
cc_library(parameter_send SRCS parameter_send.cc DEPS sendrecvop_rpc memory) cc_library(parameter_send SRCS parameter_send.cc DEPS sendrecvop_rpc memory)
......
...@@ -132,6 +132,15 @@ void ProcGetResponse(const VarHandle& var_h, ...@@ -132,6 +132,15 @@ void ProcGetResponse(const VarHandle& var_h,
&trainer_id); &trainer_id);
} }
void ProcGetRecvResponse(const VarHandle& var_h,
const ::grpc::ByteBuffer& ret_msg) {
VLOG(4) << "ProcGetRecvResponse";
framework::Variable* outvar = nullptr;
int trainer_id;
DeserializeRecvFromByteBuffer(ret_msg, *var_h.ctx(), var_h.scope(), &outvar,
&trainer_id);
}
template <typename T> template <typename T>
void RequestToByteBuffer(const T& proto, ::grpc::ByteBuffer* result) { void RequestToByteBuffer(const T& proto, ::grpc::ByteBuffer* result) {
::grpc::Slice slice(proto.ByteSizeLong()); ::grpc::Slice slice(proto.ByteSizeLong());
...@@ -482,6 +491,79 @@ VarHandlePtr GRPCClient::AsyncDistributeNotify( ...@@ -482,6 +491,79 @@ VarHandlePtr GRPCClient::AsyncDistributeNotify(
return h; return h;
} }
VarHandlePtr GRPCClient::AsyncSendAndRecv(const std::string& ep,
const platform::DeviceContext& ctx,
const framework::Scope& scope,
const std::string& send_var_name,
const std::string& recv_var_name,
const std::string& table_name,
int64_t time_out) {
const platform::DeviceContext* p_ctx = &ctx;
const std::string ep_val = ep;
const std::string send_var_name_val = send_var_name;
const std::string recv_var_name_val = recv_var_name;
const std::string table_name_val = table_name;
const framework::Scope* p_scope = &scope;
const auto ch = GetChannel(ep_val);
const std::string method = kSendAndRecvRPC;
VLOG(4) << "GRPCClient::SendAndRecv Begin ,Send_var_name: "
<< send_var_name_val << " Recv_var_name: " << recv_var_name_val;
int retry_times_ = 0;
while (true) {
SendAndRecvProcessor* s = new SendAndRecvProcessor(ch);
VarHandlePtr h(
new VarHandle(ep, method, send_var_name_val, p_ctx, p_scope));
VarHandlePtr h_recv(
new VarHandle(ep, method, recv_var_name_val, p_ctx, p_scope));
s->Prepare(h, time_out);
s->RecvPrepare(h_recv);
framework::AsyncIO([send_var_name_val, recv_var_name_val, table_name_val,
p_scope, p_ctx, s, method, h, this] {
auto* send_var = p_scope->FindVar(send_var_name_val);
send_var->GetMutable<framework::LoDTensor>()->set_lod({});
::grpc::ByteBuffer buf;
VLOG(4) << "SerializeToByteBuffer: send_var_name_val: "
<< send_var_name_val
<< " recv_var_name_val: " << recv_var_name_val;
SerializeToByteBuffer(send_var_name_val, send_var, *p_ctx, &buf,
recv_var_name_val, trainer_id_, table_name_val);
VLOG(3) << s->GetVarHandlePtr()->String() << " begin";
// stub context
s->response_call_back_ = ProcGetRecvResponse;
platform::RecordRPCEvent record_event(method);
auto call = s->stub_g_.PrepareUnaryCall(
s->context_.get(), "/sendrecv.SendRecvService/SendAndRecvVariable",
buf, &cq_);
call->StartCall();
call->Finish(&s->reply_, &s->status_, reinterpret_cast<void*>(s));
if (UNLIKELY(platform::IsProfileEnabled())) {
h->Wait();
}
});
req_count_++;
if (FLAGS_rpc_retry_times > 0 && retry_times_ < FLAGS_rpc_retry_times) {
h->Wait();
if (h->should_retry) {
VLOG(3) << "rpc call failed, retry times " << retry_times_;
retry_times_++;
std::random_device rd;
std::this_thread::sleep_for(std::chrono::milliseconds(rd() % 5));
continue;
}
}
return h;
}
}
bool GRPCClient::Wait() { bool GRPCClient::Wait() {
std::unique_lock<std::mutex> lk(sync_mutex_); std::unique_lock<std::mutex> lk(sync_mutex_);
sync_cond_.wait(lk, [this] { return (req_count_ == 0 || ok_ == false); }); sync_cond_.wait(lk, [this] { return (req_count_ == 0 || ok_ == false); });
......
...@@ -53,6 +53,8 @@ namespace distributed { ...@@ -53,6 +53,8 @@ namespace distributed {
void ProcGetResponse(const VarHandle& var_h, const grpc::ByteBuffer& msg); void ProcGetResponse(const VarHandle& var_h, const grpc::ByteBuffer& msg);
void ProcGetRecvResponse(const VarHandle& var_h, const grpc::ByteBuffer& msg);
class BaseProcessor { class BaseProcessor {
public: public:
BaseProcessor() { context_ = nullptr; } BaseProcessor() { context_ = nullptr; }
...@@ -131,6 +133,28 @@ class GetProcessor : public BaseProcessor { ...@@ -131,6 +133,28 @@ class GetProcessor : public BaseProcessor {
RequestGetCallBack response_call_back_ = ProcGetResponse; RequestGetCallBack response_call_back_ = ProcGetResponse;
}; };
class SendAndRecvProcessor : public BaseProcessor {
public:
explicit SendAndRecvProcessor(std::shared_ptr<grpc::Channel> ch)
: BaseProcessor(), stub_g_(ch) {}
virtual ~SendAndRecvProcessor() {}
void ProcessImpl() override {
if (response_call_back_) {
response_call_back_(*var_h_recv_.get(), reply_);
var_h_recv_->Finish(true);
}
}
void RecvPrepare(VarHandlePtr h_recv) { var_h_recv_ = h_recv; }
::grpc::ByteBuffer reply_;
::grpc::GenericStub stub_g_;
RequestGetCallBack response_call_back_ = ProcGetResponse;
VarHandlePtr var_h_recv_;
};
class BatchBarrierProcessor : public BaseProcessor { class BatchBarrierProcessor : public BaseProcessor {
public: public:
explicit BatchBarrierProcessor(std::shared_ptr<grpc::Channel> ch) explicit BatchBarrierProcessor(std::shared_ptr<grpc::Channel> ch)
...@@ -231,6 +255,14 @@ class GRPCClient : public RPCClient { ...@@ -231,6 +255,14 @@ class GRPCClient : public RPCClient {
const framework::Scope& scope, const std::string& var_name, const framework::Scope& scope, const std::string& var_name,
int64_t time_out = FLAGS_rpc_deadline) override; int64_t time_out = FLAGS_rpc_deadline) override;
VarHandlePtr AsyncSendAndRecv(const std::string& ep,
const platform::DeviceContext& ctx,
const framework::Scope& scope,
const std::string& send_var_name,
const std::string& recv_var_name,
const std::string& table_name = "",
int64_t time_out = FLAGS_rpc_deadline) override;
VarHandlePtr AsyncSendComplete( VarHandlePtr AsyncSendComplete(
const std::string& ep, int64_t time_out = FLAGS_rpc_deadline) override; const std::string& ep, int64_t time_out = FLAGS_rpc_deadline) override;
......
...@@ -76,7 +76,6 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, ...@@ -76,7 +76,6 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
PADDLE_THROW("Serialize does not support type: %s", PADDLE_THROW("Serialize does not support type: %s",
typeid(var->Type()).name()); typeid(var->Type()).name());
} }
std::string header; std::string header;
request.AppendToString(&header); request.AppendToString(&header);
auto buffer = std::unique_ptr<char[]>(new char[1024]); auto buffer = std::unique_ptr<char[]>(new char[1024]);
...@@ -101,7 +100,6 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, ...@@ -101,7 +100,6 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
} }
#endif #endif
PADDLE_ENFORCE_NOT_NULL(payload); PADDLE_ENFORCE_NOT_NULL(payload);
e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber, e.WriteVarlengthBeginning(VarMsg::kSerializedFieldNumber,
payload->memory_size()); payload->memory_size());
if (payload->memory_size() >= std::numeric_limits<int>::max()) { if (payload->memory_size() >= std::numeric_limits<int>::max()) {
...@@ -140,7 +138,6 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var, ...@@ -140,7 +138,6 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
::grpc::Slice::STEAL_REF); ::grpc::Slice::STEAL_REF);
num_slices = 4; num_slices = 4;
} }
::grpc::ByteBuffer tmp(&slices[0], num_slices); ::grpc::ByteBuffer tmp(&slices[0], num_slices);
msg->Swap(&tmp); msg->Swap(&tmp);
} }
...@@ -156,6 +153,19 @@ void DeserializeFromByteBuffer(const ::grpc::ByteBuffer& msg, ...@@ -156,6 +153,19 @@ void DeserializeFromByteBuffer(const ::grpc::ByteBuffer& msg,
*trainer_id = resp.GetTrainerId(); *trainer_id = resp.GetTrainerId();
} }
void DeserializeRecvFromByteBuffer(const ::grpc::ByteBuffer& msg,
const platform::DeviceContext& ctx,
const framework::Scope* scope,
framework::Variable** var, int* trainer_id) {
platform::RecordRPCEvent record_event("deserial");
operators::distributed::GRPCVariableResponse resp(scope, &ctx);
PADDLE_ENFORCE_EQ(
resp.Parse(msg), 0,
platform::errors::InvalidArgument("parse bytebuffer to tensor error!"));
*var = resp.GetRecvVar();
*trainer_id = resp.GetTrainerId();
}
} // namespace distributed } // namespace distributed
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -47,6 +47,11 @@ void DeserializeFromByteBuffer(const ::grpc::ByteBuffer& msg, ...@@ -47,6 +47,11 @@ void DeserializeFromByteBuffer(const ::grpc::ByteBuffer& msg,
const framework::Scope* scope, const framework::Scope* scope,
framework::Variable** var, int* trainer_id); framework::Variable** var, int* trainer_id);
void DeserializeRecvFromByteBuffer(const ::grpc::ByteBuffer& msg,
const platform::DeviceContext& ctx,
const framework::Scope* scope,
framework::Variable** var, int* trainer_id);
} // namespace distributed } // namespace distributed
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -28,6 +28,7 @@ DECLARE_int32(rpc_retry_bind_port); ...@@ -28,6 +28,7 @@ DECLARE_int32(rpc_retry_bind_port);
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace distributed { namespace distributed {
enum CallStatus { PROCESS = 0, FINISH }; enum CallStatus { PROCESS = 0, FINISH };
// reference: // reference:
...@@ -433,6 +434,51 @@ class RequestNotify final : public RequestBase { ...@@ -433,6 +434,51 @@ class RequestNotify final : public RequestBase {
ServerAsyncResponseWriter<sendrecv::VoidMessage> responder_; ServerAsyncResponseWriter<sendrecv::VoidMessage> responder_;
}; };
class RequestSendAndRecv final : public RequestBase {
public:
explicit RequestSendAndRecv(GrpcService::AsyncService* service,
::grpc::ServerCompletionQueue* cq,
RequestHandler* request_handler, int req_id)
: RequestBase(service, cq, request_handler, req_id), responder_(&ctx_) {
request_.reset(new GRPCVariableResponse(
request_handler->scope(), request_handler->dev_ctx(),
request_handler->distributed_mode()));
int method_id =
static_cast<int>(distributed::GrpcMethod::kRequestSendAndRecv);
service_->RequestAsyncUnary(
method_id, &ctx_, request_.get(), &responder_, cq_, cq_,
reinterpret_cast<void*>(static_cast<intptr_t>(req_id)));
}
virtual ~RequestSendAndRecv() {}
std::string GetReqName() override { return request_->Varname(); }
void Process() override {
std::string in_var_name = request_->Varname();
std::string out_var_name = request_->OutVarname();
std::string table_name = request_->TableName();
int trainer_id = request_->GetTrainerId();
VLOG(4) << "RequestSendAndRecv, in_var_name: " << in_var_name
<< " out_var_name: " << out_var_name << " trainer: " << trainer_id;
auto scope = request_->GetMutableLocalScope();
auto invar = scope->FindVar(in_var_name);
framework::Variable* outvar = nullptr;
request_handler_->Handle(in_var_name, scope, invar, &outvar, trainer_id,
out_var_name, table_name);
SerializeToByteBuffer(out_var_name, outvar, *request_handler_->dev_ctx(),
&reply_);
Finish(reply_, &responder_);
}
protected:
std::shared_ptr<GRPCVariableResponse> request_;
::grpc::ByteBuffer reply_;
ServerAsyncResponseWriter<::grpc::ByteBuffer> responder_;
};
void AsyncGRPCServer::WaitServerReady() { void AsyncGRPCServer::WaitServerReady() {
VLOG(4) << "AsyncGRPCServer is waiting server ready"; VLOG(4) << "AsyncGRPCServer is waiting server ready";
std::unique_lock<std::mutex> lock(this->mutex_ready_); std::unique_lock<std::mutex> lock(this->mutex_ready_);
...@@ -586,6 +632,8 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name, ...@@ -586,6 +632,8 @@ void AsyncGRPCServer::TryToRegisterNewOne(const std::string& rpc_name,
b = new RequestCheckpointNotify(service_.get(), cq.get(), handler, req_id); b = new RequestCheckpointNotify(service_.get(), cq.get(), handler, req_id);
} else if (rpc_name == kRequestNotify) { } else if (rpc_name == kRequestNotify) {
b = new RequestNotify(service_.get(), cq.get(), handler, req_id); b = new RequestNotify(service_.get(), cq.get(), handler, req_id);
} else if (rpc_name == kRequestSendAndRecv) {
b = new RequestSendAndRecv(service_.get(), cq.get(), handler, req_id);
} else { } else {
PADDLE_ENFORCE(false, "not supported rpc"); PADDLE_ENFORCE(false, "not supported rpc");
} }
......
...@@ -85,10 +85,12 @@ enum class GrpcMethod { ...@@ -85,10 +85,12 @@ enum class GrpcMethod {
kGetMonomerVariable, kGetMonomerVariable,
kGetMonomerBarrier, kGetMonomerBarrier,
kRequestNotify, kRequestNotify,
kRequestSendAndRecv,
// when you add new handler, change kGrpcNumMethods at the same time!
}; };
static const int kGrpcNumMethods = static const int kGrpcNumMethods =
static_cast<int>(GrpcMethod::kRequestNotify) + 1; static_cast<int>(GrpcMethod::kRequestSendAndRecv) + 1;
inline const char* GrpcMethodName(GrpcMethod id) { inline const char* GrpcMethodName(GrpcMethod id) {
switch (id) { switch (id) {
...@@ -108,6 +110,8 @@ inline const char* GrpcMethodName(GrpcMethod id) { ...@@ -108,6 +110,8 @@ inline const char* GrpcMethodName(GrpcMethod id) {
return "/sendrecv.SendRecvService/CheckpointNotify"; return "/sendrecv.SendRecvService/CheckpointNotify";
case GrpcMethod::kRequestNotify: case GrpcMethod::kRequestNotify:
return "/sendrecv.SendRecvService/DistributeNotify"; return "/sendrecv.SendRecvService/DistributeNotify";
case GrpcMethod::kRequestSendAndRecv:
return "/sendrecv.SendRecvService/SendAndRecvVariable";
} }
// Shouldn't be reached. // Shouldn't be reached.
......
...@@ -46,6 +46,7 @@ constexpr char kRequestCheckpoint[] = "RequestCheckpoint"; ...@@ -46,6 +46,7 @@ constexpr char kRequestCheckpoint[] = "RequestCheckpoint";
constexpr char kRequestPassBarrier[] = "RequestPassBarrier"; constexpr char kRequestPassBarrier[] = "RequestPassBarrier";
constexpr char kRequestGetNoBarrier[] = "GetVariableNoBarrier"; constexpr char kRequestGetNoBarrier[] = "GetVariableNoBarrier";
constexpr char kRequestNotify[] = "RequestNotify"; constexpr char kRequestNotify[] = "RequestNotify";
constexpr char kRequestSendAndRecv[] = "RequestSendAndRecv";
constexpr char kSendRPC[] = "SendRPC"; constexpr char kSendRPC[] = "SendRPC";
constexpr char kGetRPC[] = "GetRPC"; constexpr char kGetRPC[] = "GetRPC";
...@@ -57,6 +58,7 @@ constexpr char kFetchBarrierRPC[] = "FetchBarrierRPC"; ...@@ -57,6 +58,7 @@ constexpr char kFetchBarrierRPC[] = "FetchBarrierRPC";
constexpr char kSendMonomerFetchBarrierRPC[] = "SendMonomerFetchBarrierRPC"; constexpr char kSendMonomerFetchBarrierRPC[] = "SendMonomerFetchBarrierRPC";
constexpr char kSendCompleteRPC[] = "SendCompleteRPC"; constexpr char kSendCompleteRPC[] = "SendCompleteRPC";
constexpr char kCheckPointNotifyRPC[] = "CheckPointNotifyRPC"; constexpr char kCheckPointNotifyRPC[] = "CheckPointNotifyRPC";
constexpr char kSendAndRecvRPC[] = "SendAndRecvRPC";
constexpr int64_t kPrefetchTimeout = 60000; constexpr int64_t kPrefetchTimeout = 60000;
#define LISTEN_TERMINATE_MESSAGE "TERMINATE@RECV" #define LISTEN_TERMINATE_MESSAGE "TERMINATE@RECV"
......
...@@ -325,6 +325,22 @@ bool RequestNotifyHandler::Handle(const std::string &varname, ...@@ -325,6 +325,22 @@ bool RequestNotifyHandler::Handle(const std::string &varname,
return true; return true;
} }
bool RequestSendAndRecvHandler::Handle(const std::string &varname,
framework::Scope *Scope,
framework::Variable *var,
framework::Variable **outvar,
const int trainer_id,
const std::string &out_var_name,
const std::string &table_name) {
VLOG(3) << "SendAndRecvHandle: " << varname
<< " out_var_name: " << out_var_name
<< " , trainer_id: " << trainer_id;
executor_->RunPreparedContext((*grad_to_prepared_ctx_)[varname].get(), Scope);
*outvar = Scope->FindVar(out_var_name);
return true;
}
} // namespace distributed } // namespace distributed
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -176,6 +176,17 @@ class RequestNotifyHandler final : public RequestHandler { ...@@ -176,6 +176,17 @@ class RequestNotifyHandler final : public RequestHandler {
std::unordered_map<int, int64_t> decay_counters; std::unordered_map<int, int64_t> decay_counters;
}; };
class RequestSendAndRecvHandler final : public RequestHandler {
public:
explicit RequestSendAndRecvHandler(int distributed_mode)
: RequestHandler(distributed_mode) {}
virtual ~RequestSendAndRecvHandler() {}
bool Handle(const std::string& varname, framework::Scope* Scope,
framework::Variable* var, framework::Variable** outvar,
const int trainer_id, const std::string& out_var_name = "",
const std::string& table_name = "") override;
};
} // namespace distributed } // namespace distributed
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -85,6 +85,12 @@ class RPCClient { ...@@ -85,6 +85,12 @@ class RPCClient {
const framework::Scope& scope, const std::string& var_name, const framework::Scope& scope, const std::string& var_name,
int64_t time_out = FLAGS_rpc_deadline) = 0; int64_t time_out = FLAGS_rpc_deadline) = 0;
virtual VarHandlePtr AsyncSendAndRecv(
const std::string& ep, const platform::DeviceContext& ctx,
const framework::Scope& scope, const std::string& send_var_name,
const std::string& recv_var_name, const std::string& table_name = "",
int64_t time_out = FLAGS_rpc_deadline) = 0;
virtual VarHandlePtr AsyncSendComplete( virtual VarHandlePtr AsyncSendComplete(
const std::string& ep, int64_t time_out = FLAGS_rpc_deadline) = 0; const std::string& ep, int64_t time_out = FLAGS_rpc_deadline) = 0;
......
...@@ -35,27 +35,24 @@ namespace platform = paddle::platform; ...@@ -35,27 +35,24 @@ namespace platform = paddle::platform;
namespace distributed = paddle::operators::distributed; namespace distributed = paddle::operators::distributed;
USE_NO_KERNEL_OP(lookup_sparse_table_read); USE_NO_KERNEL_OP(lookup_sparse_table_read);
USE_OP(scale);
std::unique_ptr<distributed::RPCServer> g_rpc_service; std::unique_ptr<distributed::RPCServer> g_rpc_service;
std::unique_ptr<distributed::RequestHandler> g_req_handler; std::unique_ptr<distributed::RequestHandler> g_req_handler;
framework::BlockDesc* AppendPrefetchBlcok(framework::ProgramDesc* program) { framework::BlockDesc* AppendSendAndRecvBlock(framework::ProgramDesc* program) {
auto root_block = program->MutableBlock(0); auto root_block = program->MutableBlock(0);
auto* block = program->AppendBlock(*root_block); auto* block = program->AppendBlock(*root_block);
framework::VariableNameMap input({{"W", {"w"}}, {"Ids", {"ids"}}}); framework::OpDesc* op = block->AppendOp();
framework::VariableNameMap output({{"Output", {"out"}}}); op->SetType("scale");
auto op = block->AppendOp(); op->SetInput("X", {"x"});
op->SetType("lookup_sparse_table_read"); op->SetOutput("Out", {"res"});
op->SetInput("W", {"w"}); op->SetAttr("scale", 0.5f);
op->SetInput("Ids", {"ids"});
op->SetOutput("Out", {"out"}); auto& out = *root_block->Var("res");
op->SetAttr("tablename", {"w"});
op->SetAttr("value_names", {"Param"});
auto& out = *root_block->Var("out");
out.SetType(framework::proto::VarType::LOD_TENSOR); out.SetType(framework::proto::VarType::LOD_TENSOR);
out.SetShape({10, 10}); out.SetShape({1, 10});
return block; return block;
} }
...@@ -69,6 +66,12 @@ void CreateVarsOnScope(framework::Scope* scope, platform::CPUPlace* place) { ...@@ -69,6 +66,12 @@ void CreateVarsOnScope(framework::Scope* scope, platform::CPUPlace* place) {
auto ids_var = scope->Var("ids"); auto ids_var = scope->Var("ids");
ids_var->GetMutable<framework::LoDTensor>(); ids_var->GetMutable<framework::LoDTensor>();
auto x_var = scope->Var("x");
x_var->GetMutable<framework::LoDTensor>();
auto res_var = scope->Var("res");
res_var->GetMutable<framework::LoDTensor>();
} }
void InitTensorsOnClient(framework::Scope* scope, platform::CPUPlace* place, void InitTensorsOnClient(framework::Scope* scope, platform::CPUPlace* place,
...@@ -78,6 +81,11 @@ void InitTensorsOnClient(framework::Scope* scope, platform::CPUPlace* place, ...@@ -78,6 +81,11 @@ void InitTensorsOnClient(framework::Scope* scope, platform::CPUPlace* place,
int64_t* ids_ptr = int64_t* ids_ptr =
ids_var->mutable_data<int64_t>(framework::DDim({rows_numel, 1}), *place); ids_var->mutable_data<int64_t>(framework::DDim({rows_numel, 1}), *place);
for (int64_t i = 0; i < rows_numel; ++i) ids_ptr[i] = i * 2; for (int64_t i = 0; i < rows_numel; ++i) ids_ptr[i] = i * 2;
auto x_var = scope->Var("x")->GetMutable<framework::LoDTensor>();
float* x_ptr =
x_var->mutable_data<float>(framework::DDim({1, rows_numel}), *place);
for (int64_t i = 0; i < rows_numel; ++i) x_ptr[i] = 1.0;
} }
void InitTensorsOnServer(framework::Scope* scope, platform::CPUPlace* place, void InitTensorsOnServer(framework::Scope* scope, platform::CPUPlace* place,
...@@ -124,6 +132,38 @@ void StartServer(const std::string& rpc_name) { ...@@ -124,6 +132,38 @@ void StartServer(const std::string& rpc_name) {
server_thread.join(); server_thread.join();
} }
void StartSendAndRecvServer(const std::string& rpc_name) {
framework::ProgramDesc program;
framework::Scope scope;
platform::CPUPlace place;
framework::Executor exe(place);
platform::CPUDeviceContext ctx(place);
auto block = AppendSendAndRecvBlock(&program);
std::string in_var_name("x");
std::vector<int> prefetch_block_ids{block->ID()};
auto prepared = exe.Prepare(program, prefetch_block_ids);
InitTensorsOnServer(&scope, &place, 10);
std::unordered_map<std::string,
std::shared_ptr<framework::ExecutorPrepareContext>>
grad_to_prepared_ctx;
grad_to_prepared_ctx[in_var_name] = prepared[0];
g_req_handler->SetProgram(&program);
g_req_handler->SetGradToPreparedCtx(&grad_to_prepared_ctx);
g_req_handler->SetDevCtx(&ctx);
g_req_handler->SetScope(&scope);
g_req_handler->SetExecutor(&exe);
g_rpc_service->RegisterRPC(rpc_name, g_req_handler.get());
g_req_handler->SetRPCServer(g_rpc_service.get());
std::thread server_thread(
std::bind(&distributed::RPCServer::StartServer, g_rpc_service.get()));
server_thread.join();
}
TEST(COMPLETE, CPU) { TEST(COMPLETE, CPU) {
setenv("http_proxy", "", 1); setenv("http_proxy", "", 1);
setenv("https_proxy", "", 1); setenv("https_proxy", "", 1);
...@@ -147,3 +187,46 @@ TEST(COMPLETE, CPU) { ...@@ -147,3 +187,46 @@ TEST(COMPLETE, CPU) {
g_rpc_service.reset(nullptr); g_rpc_service.reset(nullptr);
g_req_handler.reset(nullptr); g_req_handler.reset(nullptr);
} }
TEST(SENDANDRECV, CPU) {
setenv("http_proxy", "", 1);
setenv("https_proxy", "", 1);
g_req_handler.reset(new distributed::RequestSendAndRecvHandler(
distributed::DistributedMode::kAsync));
g_rpc_service.reset(new RPCSERVER_T("127.0.0.1:0", 1));
distributed::RPCClient* client =
distributed::RPCClient::GetInstance<RPCCLIENT_T>(0);
PADDLE_ENFORCE_NE(client, nullptr,
platform::errors::InvalidArgument(
"Client Start Fail, Check Your Code & Env"));
std::thread server_thread(StartSendAndRecvServer,
distributed::kRequestSendAndRecv);
g_rpc_service->WaitServerReady();
int port = g_rpc_service->GetSelectedPort();
std::string ep = paddle::string::Sprintf("127.0.0.1:%d", port);
framework::Scope scope;
platform::CPUPlace place;
platform::CPUDeviceContext ctx(place);
// create var on local scope
int64_t rows_numel = 10;
InitTensorsOnClient(&scope, &place, rows_numel);
std::string in_var_name("x");
std::string out_var_name("res");
client->AsyncSendAndRecv(ep, ctx, scope, in_var_name, out_var_name);
client->Wait();
auto var = scope.Var(out_var_name);
auto value = var->GetMutable<framework::LoDTensor>();
auto ptr = value->mutable_data<float>(place);
for (int64_t i = 0; i < rows_numel; ++i) {
EXPECT_EQ(ptr[i], 0.5);
}
g_rpc_service->ShutDown();
server_thread.join();
LOG(INFO) << "begin reset";
g_rpc_service.reset(nullptr);
g_req_handler.reset(nullptr);
}
...@@ -29,7 +29,7 @@ service SendRecvService { ...@@ -29,7 +29,7 @@ service SendRecvService {
rpc CheckpointNotify(VariableMessage) returns (VoidMessage) {} rpc CheckpointNotify(VariableMessage) returns (VoidMessage) {}
rpc DistributeNotify(VariableMessage) returns (VoidMessage) {} rpc DistributeNotify(VariableMessage) returns (VoidMessage) {}
rpc SendAndRecvVariable(VariableMessage) returns (VariableMessage) {}
rpc GetMonomerVariable(VariableMessage) returns (VariableMessage) {} rpc GetMonomerVariable(VariableMessage) returns (VariableMessage) {}
rpc GetMonomerBarrier(VariableMessage) returns (VoidMessage) {} rpc GetMonomerBarrier(VariableMessage) returns (VoidMessage) {}
} }
......
...@@ -96,6 +96,13 @@ class VariableResponse { ...@@ -96,6 +96,13 @@ class VariableResponse {
return scope_->FindVar(meta_.varname()); return scope_->FindVar(meta_.varname());
} }
framework::Variable* GetRecvVar() {
if (create_scope_) {
return local_scope_->Var(meta_.out_varname());
}
return scope_->FindVar(meta_.out_varname());
}
int GetTrainerId() { return static_cast<int>(meta_.trainer_id()); } int GetTrainerId() { return static_cast<int>(meta_.trainer_id()); }
protected: protected:
......
...@@ -268,7 +268,6 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, ...@@ -268,7 +268,6 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
size_t num_blocks = program->Size(); size_t num_blocks = program->Size();
PADDLE_ENFORCE_GE(num_blocks, 2, PADDLE_ENFORCE_GE(num_blocks, 2,
"server program should have at least 2 blocks"); "server program should have at least 2 blocks");
std::vector<int> block_list; std::vector<int> block_list;
for (size_t blkid = 1; blkid < num_blocks; ++blkid) { for (size_t blkid = 1; blkid < num_blocks; ++blkid) {
block_list.push_back(blkid); block_list.push_back(blkid);
...@@ -295,6 +294,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor, ...@@ -295,6 +294,7 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
request_send_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx); request_send_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
request_get_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx); request_get_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
request_prefetch_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx); request_prefetch_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
request_send_and_recv_handler_->SetGradToPreparedCtx(&grad_to_prepared_ctx);
while (true) { while (true) {
if (rpc_service_->IsExit()) { if (rpc_service_->IsExit()) {
...@@ -394,6 +394,8 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -394,6 +394,8 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
new distributed::RequestGetNoBarrierHandler()); new distributed::RequestGetNoBarrierHandler());
request_notify_handler_.reset( request_notify_handler_.reset(
new distributed::RequestNotifyHandler(distributed_mode, fan_in)); new distributed::RequestNotifyHandler(distributed_mode, fan_in));
request_send_and_recv_handler_.reset(
new distributed::RequestSendAndRecvHandler(distributed_mode));
rpc_service_->RegisterRPC(distributed::kRequestSend, rpc_service_->RegisterRPC(distributed::kRequestSend,
request_send_handler_.get(), rpc_send_thread_num); request_send_handler_.get(), rpc_send_thread_num);
...@@ -408,6 +410,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -408,6 +410,9 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
request_get_no_barrier_handler_.get()); request_get_no_barrier_handler_.get());
rpc_service_->RegisterRPC(distributed::kRequestNotify, rpc_service_->RegisterRPC(distributed::kRequestNotify,
request_notify_handler_.get(), rpc_send_thread_num); request_notify_handler_.get(), rpc_send_thread_num);
rpc_service_->RegisterRPC(distributed::kRequestSendAndRecv,
request_send_and_recv_handler_.get(),
rpc_get_thread_num);
auto optimize_blocks = auto optimize_blocks =
Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks); Attr<std::vector<framework::BlockDesc *>>(kOptimizeBlocks);
...@@ -416,6 +421,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -416,6 +421,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
"optimize blocks is less than 1. Optimize blocks " "optimize blocks is less than 1. Optimize blocks "
"should be 1 at least on the pserver side.")); "should be 1 at least on the pserver side."));
auto *program = optimize_blocks[0]->Program(); auto *program = optimize_blocks[0]->Program();
framework::Executor executor(dev_place); framework::Executor executor(dev_place);
std::shared_ptr<framework::ExecutorPrepareContext> ckpt_pre_context = nullptr; std::shared_ptr<framework::ExecutorPrepareContext> ckpt_pre_context = nullptr;
...@@ -488,6 +494,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope, ...@@ -488,6 +494,7 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
f(request_checkpoint_handler_.get()); f(request_checkpoint_handler_.get());
f(request_get_no_barrier_handler_.get()); f(request_get_no_barrier_handler_.get());
f(request_notify_handler_.get()); f(request_notify_handler_.get());
f(request_send_and_recv_handler_.get());
// register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers // register SIGINT(from ctrl+C) and SIGTERM(from kill) signal handlers
signal(SIGINT, SignalHandler::StopAndExit); signal(SIGINT, SignalHandler::StopAndExit);
......
...@@ -99,6 +99,8 @@ class ListenAndServOp : public framework::OperatorBase { ...@@ -99,6 +99,8 @@ class ListenAndServOp : public framework::OperatorBase {
mutable std::shared_ptr<distributed::RequestHandler> mutable std::shared_ptr<distributed::RequestHandler>
request_checkpoint_handler_; request_checkpoint_handler_;
mutable std::shared_ptr<distributed::RequestHandler> request_notify_handler_; mutable std::shared_ptr<distributed::RequestHandler> request_notify_handler_;
mutable std::shared_ptr<distributed::RequestHandler>
request_send_and_recv_handler_;
mutable std::shared_ptr<std::thread> server_thread_; mutable std::shared_ptr<std::thread> server_thread_;
mutable std::vector<std::string> sparse_vars_; mutable std::vector<std::string> sparse_vars_;
......
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <future> // NOLINT
#include <ostream>
#include "paddle/fluid/framework/blocking_queue.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/distributed/communicator.h"
#include "paddle/fluid/operators/distributed/communicator_common.h"
#include "paddle/fluid/operators/distributed/distributed.h"
#include "paddle/fluid/operators/distributed/parameter_send.h"
#include "paddle/fluid/operators/distributed_ops/send_recv_util.h"
#include "paddle/fluid/platform/profiler.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class SendAndRecvKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto& scope = ctx.scope();
const auto& place = ctx.GetPlace();
auto send_var_name = ctx.Attr<std::string>("send_var_name");
auto recv_var_name = ctx.Attr<std::string>("recv_var_name");
auto epmap = ctx.Attr<std::string>("endpoint");
auto trainer_id = ctx.Attr<int>("trainer_id");
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto& context = *pool.Get(place);
distributed::RPCClient* rpc_client =
distributed::RPCClient::GetInstance<RPCCLIENT_T>(trainer_id);
VLOG(3) << "SendAndRecvOp Send_var_name: " << send_var_name
<< " Recv_var_name: " << recv_var_name;
distributed::VarHandlePtr rets = rpc_client->AsyncSendAndRecv(
epmap, context, scope, send_var_name, recv_var_name);
rets->Wait();
}
};
class SendAndRecvOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
auto data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
return framework::OpKernelType(data_type, platform::CPUPlace());
}
};
class SendAndRecvOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() {
AddInput("X", "Tensor Input variable to be sent").AsDuplicable();
AddOutput("Out", "Tensor Output varibale to be recv").AsDuplicable();
AddAttr<std::string>("send_var_name", "Send Tensor's name")
.SetDefault(std::string(""));
AddAttr<std::string>("recv_var_name", "Recv Tensor's name")
.SetDefault(std::string(""));
AddAttr<int>("trainer_id", "trainer id from 0 ~ worker_num.").SetDefault(0);
AddAttr<std::string>("endpoint", "Server endpoint")
.SetDefault({"127.0.0.1:6164"});
AddComment(R"DOC(
SendAndRecv operator
This operator will send variables to listen_and_serve op at the parameter server.
And recv variable from parameter server of send variable's scope.
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(send_and_recv, ops::SendAndRecvOp, ops::SendAndRecvOpMaker);
REGISTER_OP_CPU_KERNEL(
send_and_recv,
ops::SendAndRecvKernel<paddle::platform::CPUDeviceContext, float>)
...@@ -24,49 +24,69 @@ class AdadeltaOp : public framework::OperatorWithKernel { ...@@ -24,49 +24,69 @@ class AdadeltaOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext *ctx) const override { void InferShape(framework::InferShapeContext *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("Param"), PADDLE_ENFORCE_EQ(ctx->HasInput("Param"), true,
"Input(Param) of AdadeltaOp should not be null."); platform::errors::InvalidArgument(
PADDLE_ENFORCE(ctx->HasInput("Grad"), "Input(Param) of AdadeltaOp should not be null."));
"Input(Grad) of AdadeltaOp should not be null."); PADDLE_ENFORCE_EQ(ctx->HasInput("Grad"), true,
PADDLE_ENFORCE(ctx->HasInput("AvgSquaredGrad"), platform::errors::InvalidArgument(
"Input(AvgSquaredGrad) of AdadeltaOp should not be null."); "Input(Grad) of AdadeltaOp should not be null."));
PADDLE_ENFORCE(ctx->HasInput("AvgSquaredUpdate"), PADDLE_ENFORCE_EQ(
"Input(AvgSquaredUpdate) of AdadeltaOp should not be null."); ctx->HasInput("AvgSquaredGrad"), true,
PADDLE_ENFORCE( platform::errors::InvalidArgument(
"Input(AvgSquaredGrad) of AdadeltaOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasInput("AvgSquaredUpdate"), true,
platform::errors::InvalidArgument(
"Input(AvgSquaredUpdate) of AdadeltaOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->GetInputsVarType("Param").front() == ctx->GetInputsVarType("Param").front() ==
framework::proto::VarType::LOD_TENSOR, framework::proto::VarType::LOD_TENSOR,
true,
platform::errors::InvalidArgument(
"The input var's type should be LoDTensor, but the received is %s", "The input var's type should be LoDTensor, but the received is %s",
ctx->Inputs("Param").front(), ctx->GetInputsVarType("Param").front()); ctx->Inputs("Param").front(),
PADDLE_ENFORCE( ctx->GetInputsVarType("Param").front()));
PADDLE_ENFORCE_EQ(
ctx->GetInputsVarType("Grad").front() == ctx->GetInputsVarType("Grad").front() ==
framework::proto::VarType::LOD_TENSOR, framework::proto::VarType::LOD_TENSOR,
true,
platform::errors::InvalidArgument(
"The input var's type should be LoDTensor, but the received is %s", "The input var's type should be LoDTensor, but the received is %s",
ctx->Inputs("Grad").front(), ctx->GetInputsVarType("Grad").front()); ctx->Inputs("Grad").front(),
ctx->GetInputsVarType("Grad").front()));
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), PADDLE_ENFORCE_EQ(
"Output(ParamOut) of AdadeltaOp should not be null."); ctx->HasOutput("ParamOut"), true,
PADDLE_ENFORCE( platform::errors::InvalidArgument(
ctx->HasOutput("AvgSquaredGradOut"), "Output(ParamOut) of AdadeltaOp should not be null."));
"Output(AvgSquaredGradOut) of AdadeltaOp should not be null."); PADDLE_ENFORCE_EQ(
PADDLE_ENFORCE( ctx->HasOutput("AvgSquaredGradOut"), true,
ctx->HasOutput("AvgSquaredUpdateOut"), platform::errors::InvalidArgument(
"Output(AvgSquaredUpdateOut) of AdadeltaOp should not be null."); "Output(AvgSquaredGradOut) of AdadeltaOp should not be null."));
PADDLE_ENFORCE_EQ(
ctx->HasOutput("AvgSquaredUpdateOut"), true,
platform::errors::InvalidArgument(
"Output(AvgSquaredUpdateOut) of AdadeltaOp should not be null."));
auto param_dim = ctx->GetInputDim("Param"); auto param_dim = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
param_dim, ctx->GetInputDim("Grad"), param_dim, ctx->GetInputDim("Grad"),
"param and grad input of AdadeltaOp should have same dimension"); "param and grad input of AdadeltaOp should have same dimension");
PADDLE_ENFORCE_NE(framework::product(ctx->GetInputDim("AvgSquaredGrad")), 0, PADDLE_ENFORCE_NE(
framework::product(ctx->GetInputDim("AvgSquaredGrad")), 0,
platform::errors::InvalidArgument(
"Maybe the Input variable AvgSquaredGrad has not " "Maybe the Input variable AvgSquaredGrad has not "
"been initialized. You may need to confirm if you put " "been initialized. You may need to confirm if you put "
"exe.run(startup_program) after optimizer.minimize " "exe.run(startup_program) after optimizer.minimize "
"function."); "function."));
PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("AvgSquaredGrad"), PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("AvgSquaredGrad"),
platform::errors::InvalidArgument(
"Param and AvgSquaredGrad input of AdadeltaOp " "Param and AvgSquaredGrad input of AdadeltaOp "
"should have same dimension"); "should have same dimension"));
PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("AvgSquaredUpdate"), PADDLE_ENFORCE_EQ(param_dim, ctx->GetInputDim("AvgSquaredUpdate"),
platform::errors::InvalidArgument(
"Param and AvgSquaredUpdate input of AdadeltaOp " "Param and AvgSquaredUpdate input of AdadeltaOp "
"should have same dimension"); "should have same dimension"));
ctx->SetOutputDim("ParamOut", param_dim); ctx->SetOutputDim("ParamOut", param_dim);
ctx->SetOutputDim("AvgSquaredGradOut", param_dim); ctx->SetOutputDim("AvgSquaredGradOut", param_dim);
......
...@@ -24,17 +24,19 @@ class AdadeltaOpKernel : public framework::OpKernel<T> { ...@@ -24,17 +24,19 @@ class AdadeltaOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
const auto* param_var = ctx.InputVar("Param"); const auto* param_var = ctx.InputVar("Param");
PADDLE_ENFORCE(param_var->IsType<framework::LoDTensor>(), PADDLE_ENFORCE_EQ(param_var->IsType<framework::LoDTensor>(), true,
platform::errors::InvalidArgument(
"The Var(%s)'s type should be LoDTensor, " "The Var(%s)'s type should be LoDTensor, "
"but the received is %s", "but the received is %s",
ctx.InputNames("Param").front(), ctx.InputNames("Param").front(),
framework::ToTypeName(param_var->Type())); framework::ToTypeName(param_var->Type())));
const auto* grad_var = ctx.InputVar("Grad"); const auto* grad_var = ctx.InputVar("Grad");
PADDLE_ENFORCE(grad_var->IsType<framework::LoDTensor>(), PADDLE_ENFORCE_EQ(grad_var->IsType<framework::LoDTensor>(), true,
platform::errors::InvalidArgument(
"The Var(%s)'s type should be LoDTensor, " "The Var(%s)'s type should be LoDTensor, "
"but the received is %s", "but the received is %s",
ctx.InputNames("Grad").front(), ctx.InputNames("Grad").front(),
framework::ToTypeName(grad_var->Type())); framework::ToTypeName(grad_var->Type())));
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut"); auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto avg_squared_grad_out_tensor = auto avg_squared_grad_out_tensor =
......
...@@ -23,22 +23,27 @@ class TopkOp : public framework::OperatorWithKernel { ...@@ -23,22 +23,27 @@ class TopkOp : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("X"), PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
"Input(X) of TopkOp should not be null."); platform::errors::InvalidArgument(
PADDLE_ENFORCE(ctx->HasOutput("Out"), "Input(X) of TopkOp should not be null."));
"Output(Out) of TopkOp should not be null."); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true,
PADDLE_ENFORCE(ctx->HasOutput("Indices"), platform::errors::InvalidArgument(
"Output(Indices) of TopkOp should not be null."); "Output(Out) of TopkOp should not be null."));
PADDLE_ENFORCE_EQ(ctx->HasOutput("Indices"), true,
platform::errors::InvalidArgument(
"Output(Indices) of TopkOp should not be null."));
auto input_dims = ctx->GetInputDim("X"); auto input_dims = ctx->GetInputDim("X");
const int k = static_cast<int>(ctx->Attrs().Get<int>("k")); const int k = static_cast<int>(ctx->Attrs().Get<int>("k"));
PADDLE_ENFORCE_GE(k, 1, "k must >= 1"); PADDLE_ENFORCE_GE(k, 1, "k must >= 1");
PADDLE_ENFORCE_GE(input_dims.size(), 1, "input must have >= 1d shape"); PADDLE_ENFORCE_GE(input_dims.size(), 1, platform::errors::InvalidArgument(
"input must have >= 1d shape"));
if (ctx->IsRuntime()) { if (ctx->IsRuntime()) {
PADDLE_ENFORCE_GE(input_dims[input_dims.size() - 1], k, PADDLE_ENFORCE_GE(
"input must have >= k columns"); input_dims[input_dims.size() - 1], k,
platform::errors::InvalidArgument("input must have >= k columns"));
} }
framework::DDim dims = input_dims; framework::DDim dims = input_dims;
......
...@@ -43,8 +43,9 @@ template <typename DeviceContext, typename T> ...@@ -43,8 +43,9 @@ template <typename DeviceContext, typename T>
class TopkOpCUDAKernel : public framework::OpKernel<T> { class TopkOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), PADDLE_ENFORCE_EQ(
"It must use CUDAPlace."); platform::is_gpu_place(ctx.GetPlace()), true,
platform::errors::InvalidArgument("It must use CUDAPlace."));
auto* input = ctx.Input<Tensor>("X"); auto* input = ctx.Input<Tensor>("X");
auto* output = ctx.Output<Tensor>("Out"); auto* output = ctx.Output<Tensor>("Out");
auto* indices = ctx.Output<Tensor>("Indices"); auto* indices = ctx.Output<Tensor>("Indices");
......
...@@ -206,9 +206,9 @@ void BindInferenceApi(py::module *m) { ...@@ -206,9 +206,9 @@ void BindInferenceApi(py::module *m) {
BindMkldnnQuantizerConfig(m); BindMkldnnQuantizerConfig(m);
#endif #endif
m->def("create_paddle_predictor", m->def("create_paddle_predictor",
&paddle::CreatePaddlePredictor<AnalysisConfig>); &paddle::CreatePaddlePredictor<AnalysisConfig>, py::arg("config"));
m->def("create_paddle_predictor", m->def("create_paddle_predictor",
&paddle::CreatePaddlePredictor<NativeConfig>); &paddle::CreatePaddlePredictor<NativeConfig>, py::arg("config"));
m->def("paddle_dtype_size", &paddle::PaddleDtypeSize); m->def("paddle_dtype_size", &paddle::PaddleDtypeSize);
m->def("paddle_tensor_to_bytes", &SerializePDTensorToBytes); m->def("paddle_tensor_to_bytes", &SerializePDTensorToBytes);
} }
......
...@@ -1399,6 +1399,9 @@ function main() { ...@@ -1399,6 +1399,9 @@ function main() {
local CMD=$1 local CMD=$1
local parallel_number=$2 local parallel_number=$2
init init
if [ "$CMD" != "assert_file_approvals" ];then
python ${PADDLE_ROOT}/tools/summary_env.py
fi
case $CMD in case $CMD in
build_only) build_only)
cmake_gen_and_build ${PYTHON_ABI:-""} ${parallel_number} cmake_gen_and_build ${PYTHON_ABI:-""} ${parallel_number}
......
...@@ -30,8 +30,11 @@ __all__ = ["spawn"] ...@@ -30,8 +30,11 @@ __all__ = ["spawn"]
# dygraph parallel apis # dygraph parallel apis
__all__ += [ __all__ += [
"init_parallel_env", "get_rank", "get_world_size", "prepare_context", "init_parallel_env",
"ParallelEnv" "get_rank",
"get_world_size",
"prepare_context",
"ParallelEnv",
] ]
# collective apis # collective apis
......
...@@ -18,16 +18,15 @@ from .base.distributed_strategy import DistributedStrategy ...@@ -18,16 +18,15 @@ from .base.distributed_strategy import DistributedStrategy
from .base.fleet_base import Fleet from .base.fleet_base import Fleet
from .base.util_factory import UtilBase from .base.util_factory import UtilBase
from .dataset import * from .dataset import *
#from . import metrics
__all__ = [ __all__ = [
"DistributedStrategy", "DistributedStrategy",
"UtilBase", "UtilBase",
"DatasetFactory", "DatasetFactory",
"DatasetBase",
"InMemoryDataset",
"QueueDataset",
"UserDefinedRoleMaker", "UserDefinedRoleMaker",
"PaddleCloudRoleMaker", "PaddleCloudRoleMaker",
"Fleet",
] ]
fleet = Fleet() fleet = Fleet()
......
...@@ -17,6 +17,8 @@ from paddle.distributed.fleet.proto import distributed_strategy_pb2 ...@@ -17,6 +17,8 @@ from paddle.distributed.fleet.proto import distributed_strategy_pb2
from paddle.fluid.framework import Variable, set_flags, core from paddle.fluid.framework import Variable, set_flags, core
import google.protobuf.text_format import google.protobuf.text_format
__all__ = ["DistributedStrategy"]
def get_msg_dict(msg): def get_msg_dict(msg):
res_dict = {} res_dict = {}
......
...@@ -22,7 +22,7 @@ from .runtime_factory import RuntimeFactory ...@@ -22,7 +22,7 @@ from .runtime_factory import RuntimeFactory
from .util_factory import UtilFactory from .util_factory import UtilFactory
from paddle.fluid.wrapped_decorator import wrap_decorator from paddle.fluid.wrapped_decorator import wrap_decorator
__all__ = ['Fleet'] #__all__ = ['Fleet']
def _inited_runtime_handler_(func): def _inited_runtime_handler_(func):
...@@ -200,7 +200,8 @@ class Fleet(object): ...@@ -200,7 +200,8 @@ class Fleet(object):
bool: True if this is a node of server, bool: True if this is a node of server,
False if not. False if not.
""" """
return self._role_maker.is_server() return self._role_maker.is_server(
) or self._role_maker._is_heter_worker()
@property @property
def util(self): def util(self):
......
...@@ -12,8 +12,6 @@ ...@@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
__all__ = ["MetaOptimizerFactory"]
from ..meta_optimizers import * from ..meta_optimizers import *
meta_optimizer_names = list( meta_optimizer_names = list(
......
...@@ -14,15 +14,17 @@ ...@@ -14,15 +14,17 @@
"""Defination of Role Makers.""" """Defination of Role Makers."""
import os import os
import numpy as np import numpy as np
import warnings
from multiprocessing import Process, Manager from multiprocessing import Process, Manager
import paddle.fluid as fluid import paddle.fluid as fluid
__all__ = ['RoleMakerBase', 'UserDefinedRoleMaker', 'PaddleCloudRoleMaker'] #__all__ = ['UserDefinedRoleMaker', 'PaddleCloudRoleMaker']
class Role: class Role:
WORKER = 1 WORKER = 1
SERVER = 2 SERVER = 2
HETER_WORKER = 3
class RoleMakerBase(object): class RoleMakerBase(object):
...@@ -40,6 +42,11 @@ class RoleMakerBase(object): ...@@ -40,6 +42,11 @@ class RoleMakerBase(object):
self._role = None self._role = None
self._current_id = -1 self._current_id = -1
# for heter parameter server mode
self._heter_trainer_endpoints = []
self._heter_trainer_device = "CPU"
self._is_heter_parameter_server_mode = False
self._node_type = None self._node_type = None
self._node_type_comm = None self._node_type_comm = None
self._all_comm = None self._all_comm = None
...@@ -163,12 +170,58 @@ class RoleMakerBase(object): ...@@ -163,12 +170,58 @@ class RoleMakerBase(object):
""" """
print("warning: RoleMakerBase does not have barrier worker.") print("warning: RoleMakerBase does not have barrier worker.")
def _is_heter_worker(self):
"""
Return is_heter_worker() of current process
"""
warnings.warn("RoleMakerBase does not have function: _is_heter_worker.")
return False
def _heter_worker_num(self):
"""
Get current total heter-worker number.
Returns:
int: heter_worker number
"""
warnings.warn(
"RoleMakerBase does not have function: _heter_worker_num.")
return 0
def _get_heter_worker_endpoints(self):
"""
Returns:
string: all heter_trainers'endpoints
"""
assert self._heter_trainer_endpoints != []
return self._heter_trainer_endpoints
def _get_heter_worker_endpoint(self):
"""
Returns:
int: corresponding heter_trainer's endpoint
e.g: if we have 4 cpu-trainer(default), 2 gpu-trainer(heter)
then No.0 and No.2 cpu-trainer will work with No.0 gpu-trainer
and No.1 and No.3 cpu-trainer will work with No.1 gpu-trainerr
"""
assert self._heter_trainer_endpoints != []
return self._heter_trainer_endpoints[(self._current_id + 1) %
self._heter_worker_num()]
def _get_heter_worker_device(self):
"""
Returns:
string: heter_trainer's device of current node, e.g: CPU/GPU/XPU
"""
return self._heter_trainer_device.upper()
class PaddleCloudRoleMaker(RoleMakerBase): class PaddleCloudRoleMaker(RoleMakerBase):
def __init__(self, is_collective=False, **kwargs): def __init__(self, is_collective=False, **kwargs):
super(PaddleCloudRoleMaker, self).__init__() super(PaddleCloudRoleMaker, self).__init__()
self._is_collective = is_collective self._is_collective = is_collective
self._init_gloo = False #default no init gloo self._init_gloo = False # default no init gloo
self._kwargs = kwargs self._kwargs = kwargs
self._role_is_generated = False self._role_is_generated = False
...@@ -278,10 +331,7 @@ class PaddleCloudRoleMaker(RoleMakerBase): ...@@ -278,10 +331,7 @@ class PaddleCloudRoleMaker(RoleMakerBase):
""" """
get index of current node get index of current node
""" """
if self.is_server(): return self._current_id
return self.server_index()
elif self.is_worker():
return self.worker_index()
def worker_num(self): def worker_num(self):
""" """
...@@ -323,6 +373,22 @@ class PaddleCloudRoleMaker(RoleMakerBase): ...@@ -323,6 +373,22 @@ class PaddleCloudRoleMaker(RoleMakerBase):
self.generate_role() self.generate_role()
return self._server_endpoints return self._server_endpoints
def _heter_worker_num(self):
"""
get heter worker nums
"""
if not self._role_is_generated:
self.generate_role()
return self._heter_trainers_num
def _is_heter_worker(self):
"""
whether current process is heter worker
"""
if not self._role_is_generated:
self.generate_role()
return self._role == Role.HETER_WORKER
def _get_rank(self): def _get_rank(self):
""" """
get current rank in all workers and pservers get current rank in all workers and pservers
...@@ -342,17 +408,47 @@ class PaddleCloudRoleMaker(RoleMakerBase): ...@@ -342,17 +408,47 @@ class PaddleCloudRoleMaker(RoleMakerBase):
def _ps_env(self): def _ps_env(self):
try: try:
# Environment variable PADDLE_PSERVERS_IP_PORT_LIST must be set # Environment variable PADDLE_PSERVERS_IP_PORT_LIST must be set
# format: string(ip:port), eg. 127.0.0.1:6001 # format: string(ip:port,ip:port), eg. 127.0.0.1:6001,127.0.0.1:6002
self._server_endpoints = os.environ[ self._server_endpoints = os.getenv("PADDLE_PSERVERS_IP_PORT_LIST",
"PADDLE_PSERVERS_IP_PORT_LIST"].split(",") "").split(",")
assert self._server_endpoints != ""
self._worker_endpoints = os.getenv("PADDLE_TRAINER_ENDPOINTS", self._worker_endpoints = os.getenv("PADDLE_TRAINER_ENDPOINTS",
"").split(",") "").split(",")
assert self._server_endpoints != ""
trainers_num = int(os.environ["PADDLE_TRAINERS_NUM"]) trainers_num = int(os.environ["PADDLE_TRAINERS_NUM"])
training_role = os.environ["TRAINING_ROLE"] training_role = os.environ["TRAINING_ROLE"]
if training_role not in ["TRAINER", "PSERVER"]: if training_role not in ["TRAINER", "PSERVER", "HETER_TRAINER"]:
raise ValueError("TRAINING_ROLE must be PSERVER or TRAINER") raise ValueError(
"TRAINING_ROLE must be PSERVER or TRAINER or HETER_TRAINER, but get {}, please check your environment.".
format(training_role))
# For heter parameter server env setting
heter_trainer_eplist = os.getenv(
"PADDLE_HETER_TRAINER_IP_PORT_LIST", None)
heter_trainer_device = os.getenv("PADDLE_HETER_TRAINER_DEVICE",
None)
if heter_trainer_eplist and heter_trainer_device:
try:
heter_trainer_eplist = os.environ[
"PADDLE_HETER_TRAINER_IP_PORT_LIST"].split(",")
except:
raise ValueError(
"Can not Find PADDLE_HETER_TRAINER_IP_PORT_LIST in env or its format doesn't match the requirement: 'IP:PORT,IP:PORT' ."
)
self._is_heter_parameter_server_mode = True
heter_trainers_num = len(heter_trainer_eplist)
current_node_device = heter_trainer_device.upper()
if current_node_device not in ["CPU", "GPU", "XPU"]:
raise ValueError(
"Heter Trainer doesn't support {} device now, please use CPU / GPU / XPU(KunLun)".
format(heter_trainer_device))
self._heter_trainer_device = current_node_device
else:
self._is_heter_parameter_server_mode = False
heter_trainers_num = 0
if training_role == "TRAINER": if training_role == "TRAINER":
role = Role.WORKER role = Role.WORKER
...@@ -365,17 +461,26 @@ class PaddleCloudRoleMaker(RoleMakerBase): ...@@ -365,17 +461,26 @@ class PaddleCloudRoleMaker(RoleMakerBase):
ip = os.environ["POD_IP"] ip = os.environ["POD_IP"]
self._cur_endpoint = ip + ":" + port self._cur_endpoint = ip + ":" + port
current_id = self._server_endpoints.index(self._cur_endpoint) current_id = self._server_endpoints.index(self._cur_endpoint)
elif training_role == "HETER_TRAINER":
role = Role.HETER_WORKER
cur_ip = os.environ["POD_IP"]
cur_port = os.environ["PADDLE_PORT"]
curr_endpoint = ":".join([cur_ip, cur_port])
current_id = heter_trainer_eplist.index(curr_endpoint)
else: else:
raise ValueError("TRAINING_ROLE must be PSERVER or TRAINER")
except ValueError as ve:
raise ValueError( raise ValueError(
"something wrong with PaddleCloud, please check environment") "TRAINING_ROLE must be PSERVER or TRAINER or HETER_TRAINER")
except ValueError as e:
raise ValueError(
"Something wrong with PaddleCloud, please check environment")
self._trainers_num = trainers_num self._trainers_num = trainers_num
self._role = role self._role = role
self._current_id = current_id self._current_id = current_id
self._node_num = len( self._node_num = len(
set([x.split(':')[0] for x in self._worker_endpoints])) set([x.split(':')[0] for x in self._worker_endpoints]))
self._heter_trainers_num = heter_trainers_num
self._heter_trainer_endpoints = heter_trainer_eplist
def _collective_env(self): def _collective_env(self):
self._current_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) self._current_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
......
...@@ -15,24 +15,10 @@ from .amp_optimizer import AMPOptimizer ...@@ -15,24 +15,10 @@ from .amp_optimizer import AMPOptimizer
from .recompute_optimizer import RecomputeOptimizer from .recompute_optimizer import RecomputeOptimizer
from .gradient_merge_optimizer import GradientMergeOptimizer from .gradient_merge_optimizer import GradientMergeOptimizer
from .graph_execution_optimizer import GraphExecutionOptimizer from .graph_execution_optimizer import GraphExecutionOptimizer
from .async_optimizer import AsyncMetaOptimizer from .parameter_server_optimizer import ParameterServerOptimizer
from .pipeline_optimizer import PipelineOptimizer from .pipeline_optimizer import PipelineOptimizer
from .localsgd_optimizer import LocalSGDOptimizer from .localsgd_optimizer import LocalSGDOptimizer
from .lars_optimizer import LarsOptimizer from .lars_optimizer import LarsOptimizer
from .async_graph_execution_optimizer import AsyncGraphExecutionOptimizer from .parameter_server_graph_optimizer import ParameterServerGraphOptimizer
from .dgc_optimizer import DGCOptimizer from .dgc_optimizer import DGCOptimizer
from .lamb_optimizer import LambOptimizer from .lamb_optimizer import LambOptimizer
__all__ = [
'AMPOptimizer',
'RecomputeOptimizer',
'GradientMergeOptimizer',
'AsyncMetaOptimizer',
'GraphExecutionOptimizer',
'PipelineOptimizer',
'LocalSGDOptimizer',
'LarsOptimizer',
'AsyncGraphExecutionOptimizer',
'DGCOptimizer',
'LambOptimizer',
]
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
import paddle.fluid.contrib.mixed_precision as mixed_precision import paddle.fluid.contrib.mixed_precision as mixed_precision
from .meta_optimizer_base import MetaOptimizerBase from .meta_optimizer_base import MetaOptimizerBase
__all__ = ["AMPOptimizer"]
class AMPOptimizer(MetaOptimizerBase): class AMPOptimizer(MetaOptimizerBase):
def __init__(self, optimizer): def __init__(self, optimizer):
......
...@@ -15,8 +15,6 @@ from paddle.fluid.optimizer import Momentum, DGCMomentumOptimizer ...@@ -15,8 +15,6 @@ from paddle.fluid.optimizer import Momentum, DGCMomentumOptimizer
from .meta_optimizer_base import MetaOptimizerBase from .meta_optimizer_base import MetaOptimizerBase
import logging import logging
__all__ = ["DGCOptimizer"]
class DGCOptimizer(MetaOptimizerBase): class DGCOptimizer(MetaOptimizerBase):
def __init__(self, optimizer): def __init__(self, optimizer):
......
...@@ -14,10 +14,6 @@ ...@@ -14,10 +14,6 @@
from paddle.fluid.optimizer import GradientMergeOptimizer as GM from paddle.fluid.optimizer import GradientMergeOptimizer as GM
from .meta_optimizer_base import MetaOptimizerBase from .meta_optimizer_base import MetaOptimizerBase
__all__ = ["GradientMergeOptimizer"]
# amp + gradient merge + lamb
class GradientMergeOptimizer(MetaOptimizerBase): class GradientMergeOptimizer(MetaOptimizerBase):
def __init__(self, optimizer): def __init__(self, optimizer):
......
...@@ -16,8 +16,6 @@ from paddle.fluid.optimizer import LambOptimizer as LAMB ...@@ -16,8 +16,6 @@ from paddle.fluid.optimizer import LambOptimizer as LAMB
from .meta_optimizer_base import MetaOptimizerBase from .meta_optimizer_base import MetaOptimizerBase
import logging import logging
__all__ = ["LambOptimizer"]
class LambOptimizer(MetaOptimizerBase): class LambOptimizer(MetaOptimizerBase):
def __init__(self, optimizer): def __init__(self, optimizer):
......
...@@ -15,8 +15,6 @@ from paddle.fluid.optimizer import Momentum, LarsMomentumOptimizer ...@@ -15,8 +15,6 @@ from paddle.fluid.optimizer import Momentum, LarsMomentumOptimizer
from .meta_optimizer_base import MetaOptimizerBase from .meta_optimizer_base import MetaOptimizerBase
import logging import logging
__all__ = ["LarsOptimizer"]
class LarsOptimizer(MetaOptimizerBase): class LarsOptimizer(MetaOptimizerBase):
def __init__(self, optimizer): def __init__(self, optimizer):
......
...@@ -12,8 +12,6 @@ ...@@ -12,8 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
__all__ = ["MetaOptimizerBase"]
from paddle.fluid.optimizer import Optimizer from paddle.fluid.optimizer import Optimizer
......
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
from paddle import fluid from paddle import fluid
from paddle.fluid import compiler from paddle.fluid import compiler
from .async_optimizer import AsyncMetaOptimizer from .parameter_server_optimizer import ParameterServerOptimizer
class AsyncGraphExecutionOptimizer(AsyncMetaOptimizer): class ParameterServerGraphOptimizer(ParameterServerOptimizer):
def __init__(self, optimizer): def __init__(self, optimizer):
super(AsyncGraphExecutionOptimizer, self).__init__(optimizer) super(ParameterServerGraphOptimizer, self).__init__(optimizer)
self.inner_opt = optimizer self.inner_opt = optimizer
# we do not allow meta optimizer to be inner optimizer currently # we do not allow meta optimizer to be inner optimizer currently
self.meta_optimizers_white_list = [] self.meta_optimizers_white_list = []
...@@ -31,6 +31,9 @@ class AsyncGraphExecutionOptimizer(AsyncMetaOptimizer): ...@@ -31,6 +31,9 @@ class AsyncGraphExecutionOptimizer(AsyncMetaOptimizer):
if self.role_maker.is_server(): if self.role_maker.is_server():
return False return False
if self.role_maker._is_heter_parameter_server_mode:
return False
return True return True
def _disable_strategy(self, dist_strategy): def _disable_strategy(self, dist_strategy):
......
...@@ -15,9 +15,9 @@ from paddle import fluid ...@@ -15,9 +15,9 @@ from paddle import fluid
from .meta_optimizer_base import MetaOptimizerBase from .meta_optimizer_base import MetaOptimizerBase
class AsyncMetaOptimizer(MetaOptimizerBase): class ParameterServerOptimizer(MetaOptimizerBase):
def __init__(self, optimizer): def __init__(self, optimizer):
super(AsyncMetaOptimizer, self).__init__(optimizer) super(ParameterServerOptimizer, self).__init__(optimizer)
self.inner_opt = optimizer self.inner_opt = optimizer
# we do not allow meta optimizer to be inner optimizer currently # we do not allow meta optimizer to be inner optimizer currently
self.meta_optimizers_white_list = [] self.meta_optimizers_white_list = []
...@@ -68,6 +68,21 @@ class AsyncMetaOptimizer(MetaOptimizerBase): ...@@ -68,6 +68,21 @@ class AsyncMetaOptimizer(MetaOptimizerBase):
_startup = worker.init_from_server_pass(_startup, compiled_config) _startup = worker.init_from_server_pass(_startup, compiled_config)
_startup = worker.delet_extra_optimizes_pass(_startup, _startup = worker.delet_extra_optimizes_pass(_startup,
compiled_config) compiled_config)
# for heter program
if self.role_maker._is_heter_parameter_server_mode:
from paddle.fluid.incubate.fleet.parameter_server.ir import heter_trainer_pass as heter_worker
if self.role_maker._is_heter_worker():
# for heter worker
_main = heter_worker.split_heter_worker_ops_pass(
_main, compiled_config)
else:
# for default worker
_main = heter_worker.split_trainer_ops_pass(_main,
compiled_config)
# for startup change
_startup = heter_worker.delete_startup_useless_ops_var_pass(
_startup, _main, compiled_config)
else: else:
_main = worker.append_send_ops_pass(_main, compiled_config) _main = worker.append_send_ops_pass(_main, compiled_config)
_startup = _startup _startup = _startup
...@@ -129,9 +144,12 @@ class AsyncMetaOptimizer(MetaOptimizerBase): ...@@ -129,9 +144,12 @@ class AsyncMetaOptimizer(MetaOptimizerBase):
_origin_startup_program, _origin_startup_program,
strategy, self.role_maker) strategy, self.role_maker)
main_program, startup_program = \ if self.role_maker.is_worker() or self.role_maker._is_heter_worker():
self._build_trainer_programs(compiled_config) if self.role_maker.is_worker() \ main_program, startup_program = self._build_trainer_programs(
else self._build_pserver_programs(compiled_config) compiled_config)
elif self.role_maker.is_server():
main_program, startup_program = self._build_pserver_programs(
compiled_config)
loss.block.program = main_program loss.block.program = main_program
fluid.framework.switch_startup_program(startup_program) fluid.framework.switch_startup_program(startup_program)
......
...@@ -20,8 +20,6 @@ from paddle.fluid.optimizer import PipelineOptimizer as PO ...@@ -20,8 +20,6 @@ from paddle.fluid.optimizer import PipelineOptimizer as PO
from .meta_optimizer_base import MetaOptimizerBase from .meta_optimizer_base import MetaOptimizerBase
from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_update_op, is_loss_grad_op, is_backward_op, is_optimizer_op from .common import OpRole, OP_ROLE_KEY, OP_ROLE_VAR_KEY, CollectiveHelper, is_update_op, is_loss_grad_op, is_backward_op, is_optimizer_op
__all__ = ["PipelineOptimizer"]
class PipelineHelper(CollectiveHelper): class PipelineHelper(CollectiveHelper):
def __init__(self, role_maker, nrings=1, wait_port='6174'): def __init__(self, role_maker, nrings=1, wait_port='6174'):
......
...@@ -14,8 +14,6 @@ ...@@ -14,8 +14,6 @@
from paddle.fluid.optimizer import RecomputeOptimizer as RO from paddle.fluid.optimizer import RecomputeOptimizer as RO
from .meta_optimizer_base import MetaOptimizerBase from .meta_optimizer_base import MetaOptimizerBase
__all__ = ["RecomputeOptimizer"]
class RecomputeOptimizer(MetaOptimizerBase): class RecomputeOptimizer(MetaOptimizerBase):
def __init__(self, optimizer): def __init__(self, optimizer):
......
...@@ -11,3 +11,16 @@ ...@@ -11,3 +11,16 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from .metric import *
__all__ = [
"sum",
"max",
"min",
"auc",
"mae",
"rmse",
"mse",
"acc",
]
...@@ -14,5 +14,3 @@ ...@@ -14,5 +14,3 @@
from .collective_runtime import CollectiveRuntime from .collective_runtime import CollectiveRuntime
from .parameter_server_runtime import ParameterServerRuntime from .parameter_server_runtime import ParameterServerRuntime
__all__ = ["CollectiveRuntime," "ParameterServerRuntime", ]
...@@ -196,6 +196,18 @@ class ParameterServerRuntime(RuntimeBase): ...@@ -196,6 +196,18 @@ class ParameterServerRuntime(RuntimeBase):
else: else:
warnings.warn("communicator has been initialized, skip") warnings.warn("communicator has been initialized, skip")
def _get_executor(self):
if self.role_maker._is_heter_worker():
if self.role_maker._get_heter_worker_device() == "GPU":
gpu_id = int(os.getenv("FLAGS_selected_gpus", "0"))
executor = Executor(fluid.CUDAPlace(gpu_id))
else:
raise ValueError("Not Support Device {}".format(
self.role_maker._get_heter_worker_device()))
else:
executor = fluid.Executor(fluid.CPUPlace())
return executor
def _init_server(self, *args, **kwargs): def _init_server(self, *args, **kwargs):
if len(args) > 1: if len(args) > 1:
raise ValueError("init server can only accept 1 args: `dirname`") raise ValueError("init server can only accept 1 args: `dirname`")
...@@ -204,9 +216,15 @@ class ParameterServerRuntime(RuntimeBase): ...@@ -204,9 +216,15 @@ class ParameterServerRuntime(RuntimeBase):
else: else:
model_dirname = None model_dirname = None
executor = fluid.Executor(fluid.CPUPlace()) if self.role_maker._is_heter_worker():
self._init_worker()
executor = self._get_executor()
executor.run(fluid.default_startup_program()) executor.run(fluid.default_startup_program())
if self.role_maker._is_heter_worker():
return
if not model_dirname: if not model_dirname:
return return
...@@ -237,12 +255,12 @@ class ParameterServerRuntime(RuntimeBase): ...@@ -237,12 +255,12 @@ class ParameterServerRuntime(RuntimeBase):
# self._load_sparse_params(dirname=model_dir, varnames=distribtued_varnames) # self._load_sparse_params(dirname=model_dir, varnames=distribtued_varnames)
def _run_server(self): def _run_server(self):
executor = fluid.Executor(fluid.CPUPlace()) executor = self._get_executor()
executor.run(fluid.default_main_program()) executor.run(fluid.default_main_program())
def _stop_worker(self): def _stop_worker(self):
self._communicator.stop() self._communicator.stop()
executor = fluid.Executor(fluid.CPUPlace()) executor = self._get_executor()
executor.close() executor.close()
def _get_optimizer_status(self, op, param_name): def _get_optimizer_status(self, op, param_name):
......
...@@ -15,4 +15,4 @@ ...@@ -15,4 +15,4 @@
from .fs import * from .fs import *
from .http_server import KVHandler, KVHTTPServer, KVServer from .http_server import KVHandler, KVHTTPServer, KVServer
__all__ = ['KVHandler', 'KVHTTPServer', 'KVServer'] + fs.__all__ #__all__ = ['KVHandler', 'KVHTTPServer', 'KVServer'] + fs.__all__
...@@ -145,7 +145,7 @@ class Fleet(object): ...@@ -145,7 +145,7 @@ class Fleet(object):
Returns: Returns:
bool: True if this is a node of server, bool: True if this is a node of server,
False if not. False if not
""" """
return self._role_maker.is_server() return self._role_maker.is_server()
......
...@@ -343,7 +343,6 @@ class MPISymetricRoleMaker(MPIRoleMaker): ...@@ -343,7 +343,6 @@ class MPISymetricRoleMaker(MPIRoleMaker):
def get_pserver_endpoints(self): def get_pserver_endpoints(self):
""" """
get pserver endpoints get pserver endpoints
Returns: Returns:
endpoints(list): pserver endpoints endpoints(list): pserver endpoints
""" """
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import warnings
import paddle.fluid.core as core
import paddle.fluid.framework as framework
from paddle.fluid.transpiler.details.program_utils import delete_ops
from paddle.fluid.incubate.fleet.parameter_server.ir.trainer_pass import find_heter_ops
from paddle.fluid.incubate.fleet.parameter_server.ir.trainer_pass import create_heter_program
from paddle.fluid.incubate.fleet.parameter_server.ir.trainer_pass import create_trainer_program
from paddle.fluid.incubate.fleet.parameter_server.ir.trainer_pass import find_block_joints
from paddle.fluid.incubate.fleet.parameter_server.ir.trainer_pass import find_op_input_output
from paddle.fluid.incubate.fleet.parameter_server.ir.trainer_pass import get_vars_name_in_block
def split_heter_worker_ops_pass(program, config):
"""
split heter worker program from origin-program
1. find heter op (located on different device)
2. find input&output of every heter-block
3. create heter worker program, add listen&serv op
"""
default_deveice = "cpu"
program, heter_ops, _, program_block_ops = find_heter_ops(program,
default_deveice)
if len(heter_ops) == 0:
warnings.warn(
"Currently running in Heter Parameter Server mode, but no OP running on heterogeneous devices, Please check your code."
)
return program
current_device = "gpu"
if current_device not in heter_ops:
raise ValueError("Op which run on device {} not exist.".format(
current_device))
block_vars_detail = find_block_joints(program, program_block_ops, heter_ops)
heter_program = framework.Program()
create_heter_program(program, config, heter_program, heter_ops,
block_vars_detail, current_device)
return heter_program
def split_trainer_ops_pass(program, config):
"""
split cpu-trainer program from origin-program
1. find heter op (located on different device)
2. find input&output of every heter-block
3. create cpu-trainer program, add send&recv op
"""
# Todo: support user define default_device (MrChengmo)
default_deveice = "cpu"
program, heter_ops, _, program_block_ops = find_heter_ops(program,
default_deveice)
block_vars_detail = find_block_joints(program, program_block_ops, heter_ops)
create_trainer_program(program, config, heter_ops, block_vars_detail)
return program
def delete_startup_useless_ops_var_pass(startup_program, main_program, config):
"""
delete variable which not used in current main_program
"""
# find all op and its var
vars_in_main_program = get_vars_name_in_block(main_program.global_block())
block_nums = startup_program.num_blocks
for block_index in range(1, block_nums):
current_block = startup_program.block(block_index)
# delete useless op
need_delete_op = []
for op in current_block.ops:
inputs, outputs = find_op_input_output(startup_program,
current_block, op)
inputs += outputs
# Todo: delete some concat op
if list(set(inputs) & set(vars_in_main_program)) == None:
need_delete_op.append(op)
delete_ops(current_block, need_delete_op)
# delete useless var
for var in current_block.vars:
if var.name not in vars_in_main_program:
startup_program._remove_var(var.name)
return startup_program
...@@ -12,33 +12,23 @@ ...@@ -12,33 +12,23 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Copyright(c) 2020 PaddlePaddle Authors.All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0(the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http: // www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function from __future__ import print_function
from functools import reduce from functools import reduce
import collections import collections
import math import math
import os import os
import warnings
import six import six
import paddle.fluid as fluid
from paddle.fluid import core from paddle.fluid import core
from paddle.fluid.core import CommContext from paddle.fluid.core import CommContext
import paddle.fluid.framework as framework
from paddle.fluid.incubate.fleet.parameter_server.mode import DistributedMode from paddle.fluid.incubate.fleet.parameter_server.mode import DistributedMode
from paddle.fluid.incubate.fleet.parameter_server.ir import vars_metatools from paddle.fluid.incubate.fleet.parameter_server.ir import vars_metatools
from paddle.fluid.incubate.fleet.parameter_server.ir.ps_dispatcher import RoundRobin, PSDispatcher from paddle.fluid.incubate.fleet.parameter_server.ir.ps_dispatcher import RoundRobin, PSDispatcher
from paddle.fluid.transpiler.details.program_utils import delete_ops
OP_NAME_SCOPE = "op_namescope" OP_NAME_SCOPE = "op_namescope"
CLIP_OP_NAME_SCOPE = "@CLIP" CLIP_OP_NAME_SCOPE = "@CLIP"
...@@ -122,9 +112,20 @@ class MergedVariable: ...@@ -122,9 +112,20 @@ class MergedVariable:
self.offsets = offsets self.offsets = offsets
def Singleton(cls):
_instance = {}
def _singleton(*args, **kargs):
if cls not in _instance:
_instance[cls] = cls(*args, **kargs)
return _instance[cls]
return _singleton
@Singleton
class CompileTimeStrategy(object): class CompileTimeStrategy(object):
def __init__(self, main_program, startup_program, strategy, role_maker): def __init__(self, main_program, startup_program, strategy, role_maker):
self.min_block_size = 8192 self.min_block_size = 8192
self.origin_main_program = main_program self.origin_main_program = main_program
...@@ -177,6 +178,12 @@ class CompileTimeStrategy(object): ...@@ -177,6 +178,12 @@ class CompileTimeStrategy(object):
def get_ps_endpoints(self): def get_ps_endpoints(self):
return self.role_maker.get_pserver_endpoints() return self.role_maker.get_pserver_endpoints()
def get_heter_worker_endpoints(self):
return self.role_maker._get_heter_worker_endpoints()
def get_heter_worker_endpoint(self):
return self.role_maker._get_heter_worker_endpoint()
def get_origin_programs(self): def get_origin_programs(self):
return self.origin_main_program, self.origin_startup_program return self.origin_main_program, self.origin_startup_program
...@@ -810,6 +817,30 @@ class CompileTimeStrategy(object): ...@@ -810,6 +817,30 @@ class CompileTimeStrategy(object):
return sparse_param_grads, dense_param_grads return sparse_param_grads, dense_param_grads
def remove_var_pair_by_grad(self, var_name):
for index, pair in enumerate(self.merged_variables_pairs):
var = pair[0]
var_grad = pair[1]
if var_grad.merged_var.name == var_name:
del self.merged_variables_pairs[index]
for index, pair in enumerate(self.merged_dense_pairs):
var = pair[0]
var_grad = pair[1]
if var_grad.merged_var.name == var_name:
del self.merged_dense_pairs[index]
return
for index, pair in enumerate(self.merged_sparse_pairs):
var = pair[0]
var_grad = pair[1]
if var_grad.merged_var.name == var_name:
del self.merged_sparse_pairs[index]
return
print("Not find {} in self.merge_pairs".format(var_name))
def _is_opt_role_op(op): def _is_opt_role_op(op):
# NOTE : depend on oprole to find out whether this op is for # NOTE : depend on oprole to find out whether this op is for
......
...@@ -13,7 +13,13 @@ ...@@ -13,7 +13,13 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import six
import collections
import warnings
import math
from functools import reduce
import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
import paddle.fluid.framework as framework import paddle.fluid.framework as framework
...@@ -34,6 +40,10 @@ LR_SCHED_OP_ROLE_ATTR_VALUE = core.op_proto_and_checker_maker.OpRole.LRSched ...@@ -34,6 +40,10 @@ LR_SCHED_OP_ROLE_ATTR_VALUE = core.op_proto_and_checker_maker.OpRole.LRSched
OPT_OP_ROLE_ATTR_VALUE = core.op_proto_and_checker_maker.OpRole.Optimize OPT_OP_ROLE_ATTR_VALUE = core.op_proto_and_checker_maker.OpRole.Optimize
op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName()
DEVICE_LIST = ["cpu", "gpu", "xpu"]
COMMUNICATE_OPS_TYPE = ["send", "recv", "fetch_barrier", "send_barrier"]
DEFAULT_DEVICE = 'cpu'
def delete_optimizer_pass(program, config): def delete_optimizer_pass(program, config):
def _delete_optimizer_op_and_vars(_program, optimize_ops): def _delete_optimizer_op_and_vars(_program, optimize_ops):
...@@ -250,7 +260,7 @@ def fake_init_ops_pass(program, config): ...@@ -250,7 +260,7 @@ def fake_init_ops_pass(program, config):
return list(set(dist_varnames + sparse_varnames)) return list(set(dist_varnames + sparse_varnames))
def _fake_init_sparsetable(sparse_table_names): def _fake_init_sparsetable(sparse_table_names):
#delete table init op # delete table init op
for table_name in sparse_table_names: for table_name in sparse_table_names:
table_var = program.global_block().vars[table_name] table_var = program.global_block().vars[table_name]
table_param_init_op = [] table_param_init_op = []
...@@ -307,3 +317,871 @@ def delet_extra_optimizes_pass(program, config): ...@@ -307,3 +317,871 @@ def delet_extra_optimizes_pass(program, config):
program.global_block()._remove_var(var) program.global_block()._remove_var(var)
return program return program
def find_heter_ops(program, default_device="cpu"):
if default_device not in DEVICE_LIST:
raise ValueError("Given device {} is not in device list {}".format(
default_device, DEVICE_LIST))
def _is_heter_op(op, current_heter_device, default_device="cpu"):
heter_devices = list(DEVICE_LIST)
heter_devices.remove(default_device)
op_device = op.attr("op_device")
op_type = op.type
if op_device in heter_devices:
return True
elif op_type in COMMUNICATE_OPS_TYPE and current_heter_device != default_device:
# for distributed communciate ops: send & recv & barrier etc.
# Todo: need update this method
op._set_attr('op_device', current_heter_device)
return True
elif op_device == None or op_device == default_device:
op._set_attr('op_device', default_device)
return False
return False
def _is_same_device(op, pre_device, default_device="cpu"):
op_device = op.attr("op_device")
if op_device == pre_device:
return True
if pre_device == default_device:
return True
return False
def _append_heter_op(op, current_heter_block_ops, heter_ops):
op_device = op.attr("op_device")
if op_device not in heter_ops:
heter_ops[op_device] = {}
current_heter_block_ops.append(op)
origin_porgram = program.clone()
block = program.global_block()
program_block_ops = []
default_ops = {default_device: {}}
heter_ops = {}
block_index = 0
# heter_ops: {"gpu": {1:[op1, op2, ...], 2:[op1, op2, ...] }; "xpu": {3:[op1, op2, ...], 4:[op1, op2, ...] }}
current_heter_block_ops = []
current_default_block_ops = []
current_heter_device = default_device
is_heter = False
for op in block.ops:
if _is_heter_op(op, current_heter_device, default_device):
# for gpu/xpu-op
is_heter = True
# for cpu-op block append
if len(current_default_block_ops) > 1:
default_ops[default_device][
block_index] = current_default_block_ops
program_block_ops.append(current_default_block_ops)
current_default_block_ops = []
block_index += 1
if _is_same_device(op, current_heter_device, default_device):
# for gpu-op, gpu-op -> gpu-op,...
current_heter_device = op.attr("op_device")
_append_heter_op(op, current_heter_block_ops, heter_ops)
else:
# for gpu-op -> xpu-op, ...
op_device = current_heter_block_ops[0].attr("op_device")
heter_ops[op_device][block_index] = current_heter_block_ops
program_block_ops.append(current_heter_block_ops)
block_index += 1
current_heter_block_ops = []
current_heter_device = op.attr("op_device")
_append_heter_op(op, current_heter_block_ops, heter_ops)
elif is_heter:
# for gpu/xpu-op -> cpu-op
op_device = current_heter_block_ops[0].attr("op_device")
heter_ops[op_device][block_index] = current_heter_block_ops
program_block_ops.append(current_heter_block_ops)
block_index += 1
current_heter_block_ops = []
current_heter_device = default_device
is_heter = False
current_default_block_ops.append(op)
else:
# for cpu-op
current_default_block_ops.append(op)
if current_default_block_ops != []:
default_ops[default_device][block_index] = current_default_block_ops
program_block_ops.append(current_default_block_ops)
if current_heter_block_ops != []:
op_device = current_heter_block_ops[0].attr("op_device")
heter_ops[op_device][block_index] = current_heter_block_ops
program_block_ops.append(current_heter_block_ops)
if len(heter_ops) == 0:
warnings.warn(
"No heterogeneous OP was found in your program , "
" please using fluid.device_guard() to run OPs on different device.")
total_heter_ops = 0
heter_blocks = 0
for device in heter_ops.keys():
heter_block_dict = heter_ops[device]
heter_blocks += len(heter_block_dict)
for _, heter_block in heter_block_dict.items():
total_heter_ops += len(heter_block)
print(
"There are {} OPs in your main_program, and contains {} heter-OPs which is made up of {} heter-blocks.".
format(len(block.ops), total_heter_ops, heter_blocks))
return origin_porgram, heter_ops, default_ops, program_block_ops
def create_heter_program(program, config, heter_program, heter_ops,
block_var_detail, current_device):
# add heter op
optimizer_block = []
grad_to_block_id = []
send_grad_var_list = []
pre_block_idx = heter_program.num_blocks - 1
for index, heter_block_ops in heter_ops[current_device].items():
heter_block = heter_program._create_block(pre_block_idx)
optimizer_block.append(heter_block)
for _, op in enumerate(heter_block_ops):
block_append_op(heter_program, program, heter_block, op)
# add relate variables
inputs = _get_input_map_from_op(program.global_block().vars, op)
add_vars_by_op_map(inputs, heter_program)
outputs = _get_output_map_from_op(program.global_block().vars, op)
add_vars_by_op_map(outputs, heter_program)
entrance_vars = block_var_detail[index]["entrance"]
add_vars_by_var_list(entrance_vars, program, heter_program)
exit_vars = block_var_detail[index]["exit"]
add_vars_by_var_list(exit_vars, program, heter_program)
comm_info = get_communicate_var_info(program, index, entrance_vars,
exit_vars)
grad_to_block_id.append(comm_info["block_input_var_name"] + ":" + str(
heter_block.idx))
# create slice op
first_op_index = 0
get_type_var_name = comm_info["input_var_reshape_name"][0].split(
".input_reshape@Heter")[0]
get_type_var = heter_program.global_block().vars[get_type_var_name]
insert_recv_slice_op(
heter_program, heter_block, first_op_index,
comm_info["block_input_var_name"],
(-1, sum(comm_info["input_var_reshape_dim"])), get_type_var.dtype,
get_type_var.type, comm_info["input_var_reshape_name"], [
(-1, comm_info["input_var_reshape_dim"][i])
for i in range(len(comm_info["input_var_reshape_dim"]))
])
first_op_index += len(comm_info["input_var_reshape_dim"])
# create reshape op
for i in range(len(comm_info["input_var_reshape_name"])):
var_name = entrance_vars[i]
insert_reshape_op(
heter_program,
heter_block,
first_op_index,
comm_info["input_var_reshape_name"][i],
var_name, )
first_op_index += 1
first_op_index = len(heter_block.ops)
# create send reshape op
for i in range(len(exit_vars)):
insert_reshape_op(heter_program, heter_block, first_op_index,
exit_vars[i],
comm_info["output_var_reshape_name"][i],
[-1, comm_info["output_var_reshape_dim"][i]])
first_op_index += 1
# create send concat op
insert_send_concat_op(heter_program, heter_block, first_op_index,
comm_info["output_var_reshape_name"],
comm_info["block_output_var_name"],
[-1, sum(comm_info["output_var_reshape_dim"])])
check_op_device(heter_block, current_device)
send_grad_var_list = send_grad_var_list + add_heter_send_op(
program, heter_program, heter_block, block_var_detail[index])
# add step conter
send_input_vars = []
dummy_output = []
trainer_id = config.get_role_id()
pserver_endpoints = config.get_ps_endpoints()
optimizer_block[-1].append_op(
type="send",
inputs={"X": send_input_vars},
outputs={"Out": dummy_output},
attrs={
"send_varnames": [STEP_COUNTER],
"merge_add": True,
"use_send_handler": False,
"endpoints": pserver_endpoints
})
# add info in listen&serv
attrs = {
"grad_to_block_id": grad_to_block_id,
"sparse_grad_to_param": None,
"lr_decay_block_id": None,
"dense_optimize_blocks": None,
"sparse_optimize_blocks": None,
"optimize_blocks": optimizer_block,
# runtime attribute
"endpoint": config.get_heter_worker_endpoint(),
"pserver_id": config.get_role_id(),
"Fanin": config.get_trainers(),
"distributed_mode": config.get_distributed_mode(),
"rpc_get_thread_num": 12,
"rpc_send_thread_num": 12,
"rpc_prefetch_thread_num": 12
}
# append the listen_and_serv op
heter_program.global_block().append_op(
type="listen_and_serv", inputs={'X': []}, outputs={}, attrs=attrs)
check_heter_compile_time_strategy(program, config, send_grad_var_list)
def check_heter_compile_time_strategy(program, config, send_grad_var_list):
origin_grad_var_list = []
for _, var_grad in config.merged_variables_pairs:
origin_grad_var_list.append(var_grad.merged_var.name)
origin_grad_var_list = list(set(origin_grad_var_list))
send_grad_var_list = list(set(send_grad_var_list))
useless_grad_var_list = list(
set(origin_grad_var_list) - set(send_grad_var_list))
for useless_grad_var in useless_grad_var_list:
config.remove_var_pair_by_grad(useless_grad_var)
def create_trainer_program(program, config, heter_ops, block_var_detail):
for device in heter_ops.keys():
for heter_block_index in sorted(heter_ops[device]):
replace_ops_by_communicate_op(program, config, heter_block_index,
heter_ops[device][heter_block_index],
block_var_detail)
remove_trainer_send_op(program, config, heter_block_index,
block_var_detail)
deleter_trainer_useless_var(program)
check_op_device(program.global_block(), DEFAULT_DEVICE)
def replace_ops_by_communicate_op(program, config, heter_block_index, ops_list,
block_var_detail):
all_op = program.global_block().ops
start_op = ops_list[0]
first_op_idx = -1
for op in all_op:
if is_same_op(op, start_op):
first_op_idx = all_op.index(op)
break
assert first_op_idx != -1
delete_same_ops(program.global_block(), ops_list)
mode = config.get_distributed_mode()
heter_worker_endpoint = config.get_heter_worker_endpoint()
entrance_var = block_var_detail[heter_block_index]["entrance"]
exit_var = block_var_detail[heter_block_index]["exit"]
default_device_comm_info = get_communicate_var_info(
program, heter_block_index - 1,
block_var_detail[heter_block_index - 1]["entrance"],
block_var_detail[heter_block_index - 1]["exit"])
comm_info = get_communicate_var_info(program, heter_block_index,
entrance_var, exit_var)
# create reshape op
for i in range(len(entrance_var)):
insert_reshape_op(
program,
program.global_block(), first_op_idx, entrance_var[i],
default_device_comm_info["output_var_reshape_name"][i],
[-1, default_device_comm_info["output_var_reshape_dim"][i]])
first_op_idx += 1
# create concat op
insert_send_concat_op(
program,
program.global_block(), first_op_idx,
default_device_comm_info["output_var_reshape_name"],
default_device_comm_info["block_output_var_name"],
[-1, sum(default_device_comm_info["output_var_reshape_dim"])])
first_op_idx += 1
# create send op
send_input_vars = [
program.global_block().vars[default_device_comm_info[
"block_output_var_name"]]
]
get_type_var_name = comm_info["output_var_reshape_name"][0].split(
".output_reshape@Heter")[0]
get_type_var = program.global_block().vars[get_type_var_name]
program.global_block().create_var(
name=comm_info["block_output_var_name"],
shape=(-1, sum(comm_info["output_var_reshape_dim"])),
dtype=get_type_var.dtype,
type=get_type_var.type)
recv_vars = [
program.global_block().vars[comm_info["block_output_var_name"]]
]
program.global_block()._insert_op(
index=first_op_idx,
type="send_and_recv",
inputs={"X": send_input_vars},
outputs={"Out": recv_vars},
attrs={
"send_var_name": default_device_comm_info["block_output_var_name"],
"recv_var_name": comm_info["block_output_var_name"],
"endpoint": heter_worker_endpoint,
"trainer_id": config.get_role_id(),
RPC_OP_ROLE_ATTR_NAME: RPC_OP_ROLE_ATTR_VALUE
})
first_op_idx += 1
# recv
# create slice op
insert_recv_slice_op(
program,
program.global_block(), first_op_idx,
comm_info["block_output_var_name"],
(-1, sum(comm_info["output_var_reshape_dim"])), get_type_var.dtype,
get_type_var.type, comm_info["output_var_reshape_name"], [
(-1, comm_info["output_var_reshape_dim"][i])
for i in range(len(comm_info["output_var_reshape_dim"]))
])
first_op_idx += len(comm_info["output_var_reshape_dim"])
# create reshape op
for i in range(len(comm_info["output_var_reshape_name"])):
var_name = comm_info["output_var_reshape_name"][i].split(
".output_reshape@Heter")[0]
insert_reshape_op(
program,
program.global_block(),
first_op_idx,
comm_info["output_var_reshape_name"][i],
var_name, )
first_op_idx += 1
def remove_trainer_send_op(program, config, heter_block_index,
block_var_detaile):
# if trainer do FF->BP->SEND, it has follow vars: var, var@GRAD
# if trainer only do SEND, it has one var: var@GRAD
# Delete Send op ,if trainer doesn't has pair var (var<->var@GRAD)
persistables = block_var_detaile[heter_block_index]["persistables"]
need_remove_send_op = []
need_remove_grad_var = []
for op in find_send_op(program):
input_list, _ = find_op_input_output(program,
program.global_block(), op)
for var_name in input_list:
origin_var_name = var_name.split("@GRAD")[0]
if origin_var_name in persistables:
need_remove_send_op.append(op)
need_remove_grad_var.append(var_name)
need_remove_send_op = list(set(need_remove_send_op))
delete_ops(program.global_block(), need_remove_send_op)
for grad_var_name in need_remove_grad_var:
config.remove_var_pair_by_grad(grad_var_name)
def add_heter_send_op(program, heter_program, block, block_var_detail):
def _get_send_op_dict():
send_op_dict = {}
send_op_list = find_send_op(program)
for op in send_op_list:
input_list, _ = find_op_input_output(program,
program.global_block(), op)
for var in input_list:
send_op_dict[var] = op
return send_op_dict
send_grad_var_list = []
send_op_dict = _get_send_op_dict()
for persistable_var in block_var_detail["persistables"]:
# check var_name == var@GRAD
if "@GRAD" not in persistable_var:
continue
if "GRAD" != persistable_var.split("@")[-1]:
continue
if persistable_var not in send_op_dict:
continue
block_append_op(program, heter_program, block,
send_op_dict[persistable_var])
send_grad_var_list.append(persistable_var)
return send_grad_var_list
def find_send_op(program):
send_op_list = []
for op in program.global_block().ops:
if op.type == "send":
send_op_list.append(op)
return send_op_list
def get_communicate_var_info(program, block_index, entrance_var_list,
exit_var_list):
input_var_reshape_dim = []
input_var_reshape_name = []
block_input_var_name = "joint_{}_{}@Heter".format(block_index - 1,
block_index)
output_var_reshape_dim = []
output_var_reshape_name = []
block_output_var_name = "joint_{}_{}@Heter".format(block_index,
block_index + 1)
entrance_var_list.sort()
exit_var_list.sort()
# input
# Heter_SERVER_BLOCK_index@JOINT_VAR -> slice -> var@Heter_SERVER_BLOCK@INPUT_RESHAPE_VAR -> reshape -> var
for name in entrance_var_list:
var = program.global_block().vars[name]
shape = var.shape
if len(shape) < 2 or shape[0] != -1:
raise ValueError(
"Variable {} not support heter training. its shape is {}".
format(name, shape))
recv_var_dim = -1 * reduce(lambda x, y: x * y, shape)
input_var_reshape_dim.append(recv_var_dim)
input_var_reshape_name.append("{}.input_reshape@Heter".format(name))
# output
# var -> reshape -> var@Heter_SERVER_BLOCK@INPUT_RESHAPE_VAR -> concat -> Heter_SERVER_BLOCK_index@JOINT_VAR
for var_name in exit_var_list:
var = program.global_block().vars[var_name]
shape = var.shape
if len(shape) < 2 or shape[0] != -1:
raise ValueError(
"Variable {} not support heter training. its shape is {}".
format(var_name, shape))
send_reshape_dim = -1 * reduce(lambda x, y: x * y, shape)
output_var_reshape_dim.append(send_reshape_dim)
output_var_reshape_name.append("{}.output_reshape@Heter".format(
var_name))
info = {
"input_var_reshape_dim": input_var_reshape_dim,
"input_var_reshape_name": input_var_reshape_name,
"block_input_var_name": block_input_var_name,
"output_var_reshape_dim": output_var_reshape_dim,
"output_var_reshape_name": output_var_reshape_name,
"block_output_var_name": block_output_var_name
}
return info
def find_block_joints(program, program_block_ops_list, heter_ops):
block_var_detail = find_entrance_exit_private(program,
program_block_ops_list)
block_var_detail = entrance_exit_check(program, program_block_ops_list,
block_var_detail, heter_ops)
block_var_detail = delete_block_useless_exit(
program, program_block_ops_list, block_var_detail)
return block_var_detail
def find_entrance_exit_private(program, program_block_ops_list):
block_var_detail = []
persistables = []
for index, block_op_list in enumerate(program_block_ops_list):
block_input, block_output = find_ops_list_input_output(program,
block_op_list)
persistables = screen_persistables(
program, block_input) + screen_persistables(program, block_output)
# find entrance & exit
block_private_vars = list(set(block_input) & set(block_output))
block_entrance = list(set(block_input) - set(block_private_vars))
block_exit = list(set(block_output) - set(block_private_vars))
detail = {
"entrance": block_entrance,
"exit": block_exit,
"private": block_private_vars,
"persistables": persistables
}
block_var_detail.append(detail)
return block_var_detail
def entrance_exit_check(program, program_block_ops_list, block_var_detail,
heter_ops):
for index in range(len(block_var_detail) - 1, -1, -1):
if index - 1 < 0:
break
previous_block_exit = block_var_detail[index - 1]["exit"]
previous_block_exit.sort()
current_block_entrance = block_var_detail[index]["entrance"]
current_block_entrance.sort()
if previous_block_exit == current_block_entrance:
continue
exist_vars = list(
set(previous_block_exit) & set(current_block_entrance))
need_add_vars = list(set(current_block_entrance) - set(exist_vars))
need_add_vars = find_need_var_from_previous_block(
need_add_vars, block_var_detail, index, heter_ops)
previous_block_private = block_var_detail[index - 1]["private"]
previous_block_entrance = block_var_detail[index - 1]["entrance"]
for var in need_add_vars:
if var not in previous_block_private and var not in previous_block_entrance:
previous_block_entrance.append(var)
previous_block_exit.append(var)
return block_var_detail
def find_need_var_from_previous_block(need_add_vars, block_var_detail,
current_index, heter_ops):
# create index_device_map
index_device_map = {}
for index in range(len(block_var_detail)):
index_device_map[index] = DEFAULT_DEVICE
for device in heter_ops:
for index in heter_ops[device].keys():
index_device_map[index] = device
pre_index = current_index - 1
need_ignore_var = []
# if need_add_var in current device, no need communicate
for var in need_add_vars:
while (pre_index >= 0):
previous_block_private = block_var_detail[pre_index]["private"]
previous_block_exit = block_var_detail[pre_index]["exit"]
previous_block_entrance = block_var_detail[pre_index]["entrance"]
total_var = previous_block_private + previous_block_exit + previous_block_entrance
if var in total_var:
if index_device_map[current_index] == index_device_map[
pre_index] and index_device_map[
current_index] == DEFAULT_DEVICE:
need_ignore_var.append(var)
break
pre_index -= 1
need_add_vars = list(set(need_add_vars).difference(set(need_ignore_var)))
return need_add_vars
def delete_block_useless_exit(program, program_block_ops_list,
block_var_detail):
for index in range(len(block_var_detail)):
if index == len(block_var_detail) - 1:
break
current_block_exit = block_var_detail[index]["exit"]
next_block_entrance = block_var_detail[index + 1]["entrance"]
need_delete_var = []
for var in current_block_exit:
if var not in next_block_entrance:
need_delete_var.append(var)
for var in need_delete_var:
current_block_exit.remove(var)
return block_var_detail
def check_op_device(block, device):
for op in block.ops:
op._set_attr('op_device', device)
def screen_persistables(program, var_list):
need_remove = []
for var_name in var_list:
if "@GRAD" in var_name:
origin_var_name = var_name.split("@GRAD")[0]
var = program.global_block().vars[origin_var_name]
else:
var = program.global_block().vars[var_name]
if fluid.io.is_persistable(var):
need_remove.append(var_name)
for var_name in need_remove:
var_list.remove(var_name)
return need_remove
def insert_reshape_op(program,
block,
index,
var_name,
new_var_name,
new_var_shape=None):
input_var = program.global_block().vars[var_name]
if new_var_name not in program.global_block().vars:
out = program.global_block().create_var(
name=new_var_name,
shape=new_var_shape,
dtype=input_var.dtype,
type=input_var.type)
else:
out = program.global_block().vars[new_var_name]
new_var_shape = out.shape
x_shape = program.global_block().create_var(
name="{}.xshape@Heter".format(var_name), dtype=input_var.dtype)
block._insert_op(
index=index,
type="reshape2",
inputs={"X": input_var},
attrs={'shape': new_var_shape},
outputs={"Out": out,
"XShape": x_shape})
def insert_send_concat_op(program, block, index, var_name_list, new_var_name,
new_var_shape):
input_var_list = [
program.global_block().vars[var_name] for var_name in var_name_list
]
out = program.global_block().create_var(
name=new_var_name,
shape=new_var_shape,
dtype=input_var_list[0].dtype,
type=input_var_list[0].type)
block._insert_op(
index=index,
type='concat',
inputs={"X": input_var_list},
outputs={'Out': [out]},
attrs={'axis': -1,
'use_stack': False})
def insert_recv_slice_op(program, block, index, var_name, var_shape, dtype,
type, new_var_name_list, new_var_shape_list):
if var_name not in program.global_block().vars:
input_var = program.global_block().create_var(
name=var_name, shape=var_shape, dtype=dtype, type=type)
else:
input_var = program.global_block().vars[var_name]
out_list = []
for i in range(len(new_var_name_list)):
if new_var_name_list[i] not in program.global_block().vars:
out = program.global_block().create_var(
name=new_var_name_list[i],
shape=new_var_shape_list[i],
dtype=input_var.dtype,
type=input_var.type)
else:
out = program.global_block().vars[new_var_name_list[i]]
out_list.append(out)
start_index = 0
end_index = 0
for i in range(len(new_var_name_list)):
starts = []
ends = []
attrs = {'axes': [1]}
end_index += new_var_shape_list[i][1]
starts.append(start_index)
ends.append(end_index)
attrs['starts'] = starts
attrs['ends'] = ends
block._insert_op(
index=index,
type='slice',
inputs={'Input': input_var},
attrs=attrs,
outputs={'Out': out_list[i]})
start_index = end_index
index += 1
def deleter_trainer_useless_var(program):
porgram_useful_var_list = []
for op in program.global_block().ops:
input_var_list, output_var_list = find_op_input_output(
program, program.global_block(), op)
op_var_list = list(set(input_var_list).union(set(output_var_list)))
porgram_useful_var_list = list(
set(porgram_useful_var_list).union(set(op_var_list)))
program_useless_var_list = list(
set(get_vars_name_in_block(program.global_block())).difference(
set(porgram_useful_var_list)))
for var in program_useless_var_list:
program.global_block()._remove_var(var)
return program_useless_var_list
def block_append_op(program, origin_program, block, op):
inputs = _get_input_map_from_op(origin_program.global_block().vars, op)
for key, varlist in six.iteritems(inputs):
if not isinstance(varlist, list):
varlist = [varlist]
for var in varlist:
if var.name not in program.global_block().vars:
program.global_block()._clone_variable(var)
outputs = _get_output_map_from_op(origin_program.global_block().vars, op)
for key, varlist in six.iteritems(outputs):
if not isinstance(varlist, list):
varlist = [varlist]
for var in varlist:
if var.name not in program.global_block().vars:
program.global_block()._clone_variable(var)
if "_grad" not in op.type:
# for forward op
return block.append_op(
type=op.type, inputs=inputs, outputs=outputs, attrs=op.all_attrs())
else:
# for grad op
op_desc = op.desc
op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName()
backward = core.op_proto_and_checker_maker.OpRole.Backward
device_attr_name = core.op_proto_and_checker_maker.kOpDeviceAttrName()
# append grad op
new_op_desc = block.desc.append_op()
new_op_desc.copy_from(op_desc)
new_op_desc._set_attr(op_role_attr_name, backward)
# set device gard
if op.desc.has_attr(device_attr_name):
op_device = op_desc.attr(device_attr_name)
new_op_desc._set_attr(device_attr_name, op_device)
block._sync_with_cpp()
def add_vars_by_op_map(var_map, program):
for key, varlist in six.iteritems(var_map):
if not isinstance(varlist, list):
varlist = [varlist]
for i in range(len(varlist)):
var = varlist[i]
if var.name not in program.global_block().vars:
program.global_block()._clone_variable(var)
def add_vars_by_var_list(var_name_list, origin_program, program):
for var_name in var_name_list:
if var_name not in program.global_block().vars:
var = origin_program.global_block().vars[var_name]
program.global_block()._clone_variable(var)
def get_varlist_from_op_map(var_map):
var_list = []
for key, varlist in six.iteritems(var_map):
if not isinstance(varlist, list):
varlist = [varlist]
for i in range(len(varlist)):
var = varlist[i]
var_list.append(var.name)
return var_list
def find_ops_list_input_output(program, ops_list):
input_var_list = []
output_var_list = []
for op in ops_list:
inputs = _get_input_map_from_op(program.global_block().vars, op)
input_var_list += get_varlist_from_op_map(inputs)
outputs = _get_output_map_from_op(program.global_block().vars, op)
output_var_list += get_varlist_from_op_map(outputs)
input_var_list = list(set(input_var_list))
output_var_list = list(set(output_var_list))
return input_var_list, output_var_list
def find_op_input_output(program, block, op):
input_var_list = []
output_var_list = []
inputs = _get_input_map_from_op(block.vars, op)
input_var_list += get_varlist_from_op_map(inputs)
outputs = _get_output_map_from_op(block.vars, op)
output_var_list += get_varlist_from_op_map(outputs)
input_var_list = list(set(input_var_list))
output_var_list = list(set(output_var_list))
return input_var_list, output_var_list
def get_vars_name_in_block(block):
vars_list = block.vars.keys()
vars_name_list = [var_name for var_name in vars_list]
return vars_name_list
def is_same_op(op1, op2):
if str(op1) != str(op2):
return False
return True
def _get_input_map_from_op(varmap, op):
"""Returns a dict from op input name to the vars in varmap."""
iomap = collections.OrderedDict()
for key in op.input_names:
vars = []
for varname in op.input(key):
if varname == "@EMPTY@":
continue
if "lod_tensor_blocking_queue" in varname:
continue
vars.append(varmap[varname])
if len(vars) == 1:
iomap[key] = vars[0]
else:
iomap[key] = vars
return iomap
def _get_output_map_from_op(varmap, op):
"""Returns a dict from op output name to the vars in varmap."""
iomap = collections.OrderedDict()
for key in op.output_names:
vars = []
for varname in op.output(key):
if varname == "@EMPTY@":
continue
if "lod_tensor_blocking_queue" in varname:
continue
vars.append(varmap[varname])
if len(vars) == 1:
iomap[key] = vars[0]
else:
iomap[key] = vars
return iomap
def delete_same_ops(block, ops):
for op in ops:
try:
for origin_op in block.ops:
if is_same_op(origin_op, op):
idx = list(block.ops).index(origin_op)
block._remove_op(idx)
break
except Exception as e:
print(e)
...@@ -1858,6 +1858,7 @@ def conv3d(input, ...@@ -1858,6 +1858,7 @@ def conv3d(input,
return helper.append_activation(pre_act) return helper.append_activation(pre_act)
@deprecated(since="2.0.0", update_to="paddle.nn.functional.pool2d")
@templatedoc() @templatedoc()
def pool2d(input, def pool2d(input,
pool_size=-1, pool_size=-1,
...@@ -2075,6 +2076,7 @@ def pool2d(input, ...@@ -2075,6 +2076,7 @@ def pool2d(input,
return pool_out return pool_out
@deprecated(since="2.0.0", update_to="paddle.nn.functional.pool3d")
@templatedoc() @templatedoc()
def pool3d(input, def pool3d(input,
pool_size=-1, pool_size=-1,
...@@ -2303,6 +2305,7 @@ def pool3d(input, ...@@ -2303,6 +2305,7 @@ def pool3d(input,
return pool_out return pool_out
@deprecated(since="2.0.0", update_to="paddle.nn.functional.adaptive_pool2d")
@templatedoc(op_type="pool2d") @templatedoc(op_type="pool2d")
def adaptive_pool2d(input, def adaptive_pool2d(input,
pool_size, pool_size,
...@@ -2450,6 +2453,7 @@ def adaptive_pool2d(input, ...@@ -2450,6 +2453,7 @@ def adaptive_pool2d(input,
return (pool_out, mask) if require_index else pool_out return (pool_out, mask) if require_index else pool_out
@deprecated(since="2.0.0", update_to="paddle.nn.functional.adaptive_pool3d")
@templatedoc(op_type="pool3d") @templatedoc(op_type="pool3d")
def adaptive_pool3d(input, def adaptive_pool3d(input,
pool_size, pool_size,
...@@ -10205,6 +10209,7 @@ def unstack(x, axis=0, num=None): ...@@ -10205,6 +10209,7 @@ def unstack(x, axis=0, num=None):
return outs return outs
@deprecated(since='2.0.0', update_to="paddle.expand")
def expand(x, expand_times, name=None): def expand(x, expand_times, name=None):
""" """
:alias_main: paddle.expand :alias_main: paddle.expand
...@@ -10312,6 +10317,7 @@ def expand(x, expand_times, name=None): ...@@ -10312,6 +10317,7 @@ def expand(x, expand_times, name=None):
return out return out
@deprecated(since='2.0.0', update_to="paddle.expand_as")
def expand_as(x, target_tensor, name=None): def expand_as(x, target_tensor, name=None):
""" """
:alias_main: paddle.expand_as :alias_main: paddle.expand_as
...@@ -10377,6 +10383,9 @@ def expand_as(x, target_tensor, name=None): ...@@ -10377,6 +10383,9 @@ def expand_as(x, target_tensor, name=None):
#(3,20) #(3,20)
""" """
if in_dygraph_mode():
return core.ops.expand_as(x, target_tensor)
check_variable_and_dtype( check_variable_and_dtype(
x, 'x', ['float32', 'float64', 'int32', 'int64', 'bool'], 'expand_as') x, 'x', ['float32', 'float64', 'int32', 'int64', 'bool'], 'expand_as')
check_variable_and_dtype(target_tensor, 'target_tensor', check_variable_and_dtype(target_tensor, 'target_tensor',
...@@ -15004,6 +15013,7 @@ def gather_tree(ids, parents): ...@@ -15004,6 +15013,7 @@ def gather_tree(ids, parents):
return out return out
@deprecated(since="2.0.0", update_to="paddle.uniform")
@templatedoc() @templatedoc()
def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0, def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0,
name=None): name=None):
......
...@@ -17,8 +17,9 @@ from __future__ import print_function ...@@ -17,8 +17,9 @@ from __future__ import print_function
import os import os
import logging import logging
import tarfile import tarfile
import tempfile
import random import random
import warnings
import paddle import paddle
import paddle.fluid.incubate.data_generator as data_generator import paddle.fluid.incubate.data_generator as data_generator
...@@ -57,7 +58,7 @@ def load_dnn_input_record(sent): ...@@ -57,7 +58,7 @@ def load_dnn_input_record(sent):
def load_lr_input_record(sent): def load_lr_input_record(sent):
res = [] res = []
for _ in [x.split(':') for x in sent.split()]: for _ in [x.split(':') for x in sent.split()]:
res.append(int(_[0])) res.append(int(_[0]) % 10000)
return res return res
...@@ -120,9 +121,62 @@ def prepare_data(): ...@@ -120,9 +121,62 @@ def prepare_data():
lr_input_dim = res[1] lr_input_dim = res[1]
logger.info('dnn input dim: %d' % dnn_input_dim) logger.info('dnn input dim: %d' % dnn_input_dim)
logger.info('lr input dim: %d' % lr_input_dim) logger.info('lr input dim: %d' % lr_input_dim)
return dnn_input_dim, lr_input_dim, train_file_path return dnn_input_dim, lr_input_dim, train_file_path
def gen_fake_line(dnn_data_num=7,
dnn_data_range=1e5,
lr_data_num=5,
lr_data_range=1e5):
line = ""
# for deep data
for index in range(dnn_data_num):
data = str(random.randint(0, dnn_data_range - 1))
if index < dnn_data_num - 1:
data += " "
line += data
line += "\t"
# for wide data
for index in range(lr_data_num):
data = str(random.randint(0, lr_data_range - 1)) + ":" + str(1)
if index < lr_data_num - 1:
data += " "
line += data
line += "\t"
# for label
line += str(random.randint(0, 1))
line += "\n"
return line
def prepare_fake_data(file_nums=8, file_lines=1000):
"""
Create fake data with same type as avazu_ctr_data
"""
file_dir = tempfile.mkdtemp()
warnings.warn("Fake data write in {}".format(file_dir))
for file_index in range(file_nums):
with open(
os.path.join(file_dir,
"ctr_train_data_part_{}".format(file_index)),
'w+') as fin:
file_str = ""
for line_index in range(file_lines):
file_str += gen_fake_line()
fin.write(file_str)
warnings.warn("Write done ctr_train_data_part_{}".format(
file_index))
file_list = [os.path.join(file_dir, x) for x in os.listdir(file_dir)]
assert len(file_list) == file_nums
return file_list
if __name__ == "__main__": if __name__ == "__main__":
pairwise_reader = DatasetCtrReader() pairwise_reader = DatasetCtrReader()
pairwise_reader.run_from_stdin() pairwise_reader.run_from_stdin()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Distribute CTR model for test fleet api
"""
from __future__ import print_function
import shutil
import tempfile
import time
import paddle
import paddle.fluid as fluid
import os
import numpy as np
import ctr_dataset_reader
from test_dist_fleet_heter_base import runtime_main, FleetDistHeterRunnerBase
from dist_fleet_ctr import TestDistCTR2x2, fake_ctr_reader
from paddle.distributed.fleet.base.util_factory import fleet_util
# Fix seed for test
fluid.default_startup_program().random_seed = 1
fluid.default_main_program().random_seed = 1
class TestHeterPsCTR2x2(FleetDistHeterRunnerBase):
"""
For test CTR model, using Fleet api
"""
def net(self, args, batch_size=4, lr=0.01):
"""
network definition
Args:
batch_size(int): the size of mini-batch for training
lr(float): learning rate of training
Returns:
avg_cost: LoDTensor of cost.
"""
dnn_input_dim, lr_input_dim = int(1e5), int(1e5)
dnn_data = fluid.layers.data(
name="dnn_data",
shape=[-1, 1],
dtype="int64",
lod_level=1,
append_batch_size=False)
lr_data = fluid.layers.data(
name="lr_data",
shape=[-1, 1],
dtype="int64",
lod_level=1,
append_batch_size=False)
label = fluid.layers.data(
name="click",
shape=[-1, 1],
dtype="float32",
lod_level=0,
append_batch_size=False)
datas = [dnn_data, lr_data, label]
if args.reader == "pyreader":
self.reader = fluid.io.PyReader(
feed_list=datas,
capacity=64,
iterable=False,
use_double_buffer=False)
# build dnn model
dnn_layer_dims = [128, 64, 32, 1]
dnn_embedding = fluid.layers.embedding(
is_distributed=False,
input=dnn_data,
size=[dnn_input_dim, dnn_layer_dims[0]],
param_attr=fluid.ParamAttr(
name="deep_embedding",
initializer=fluid.initializer.Constant(value=0.01)),
is_sparse=True)
dnn_pool = fluid.layers.sequence_pool(
input=dnn_embedding, pool_type="sum")
dnn_out = dnn_pool
# build lr model
lr_embbding = fluid.layers.embedding(
is_distributed=False,
input=lr_data,
size=[lr_input_dim, 1],
param_attr=fluid.ParamAttr(
name="wide_embedding",
initializer=fluid.initializer.Constant(value=0.01)),
is_sparse=True)
lr_pool = fluid.layers.sequence_pool(input=lr_embbding, pool_type="sum")
with fluid.device_guard("gpu"):
for i, dim in enumerate(dnn_layer_dims[1:]):
fc = fluid.layers.fc(
input=dnn_out,
size=dim,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(value=0.01)),
name='dnn-fc-%d' % i)
dnn_out = fc
merge_layer = fluid.layers.concat(input=[dnn_out, lr_pool], axis=1)
label = fluid.layers.cast(label, dtype="int64")
predict = fluid.layers.fc(input=merge_layer, size=2, act='softmax')
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
fluid.layers.Print(avg_cost, message="avg_cost")
self.feeds = datas
self.train_file_path = ["fake1", "fake2"]
self.avg_cost = avg_cost
self.predict = predict
return avg_cost
def check_model_right(self, dirname):
model_filename = os.path.join(dirname, "__model__")
with open(model_filename, "rb") as f:
program_desc_str = f.read()
program = fluid.Program.parse_from_string(program_desc_str)
with open(os.path.join(dirname, "__model__.proto"), "w") as wn:
wn.write(str(program))
def do_pyreader_training(self, fleet):
"""
do training using dataset, using fetch handler to catch variable
Args:
fleet(Fleet api): the fleet object of Parameter Server, define distribute training role
"""
exe = fluid.Executor(fluid.CPUPlace())
fleet.init_worker()
exe.run(fluid.default_startup_program())
batch_size = 4
train_reader = paddle.batch(fake_ctr_reader(), batch_size=batch_size)
self.reader.decorate_sample_list_generator(train_reader)
for epoch_id in range(1):
self.reader.start()
try:
pass_start = time.time()
while True:
exe.run(program=fluid.default_main_program())
pass_time = time.time() - pass_start
except fluid.core.EOFException:
self.reader.reset()
fleet.stop_worker()
def do_dataset_training(self, fleet):
train_file_list = ctr_dataset_reader.prepare_fake_data()
exe = fluid.Executor(fluid.CPUPlace())
fleet.init_worker()
exe.run(fluid.default_startup_program())
thread_num = 1
batch_size = 128
filelist = fleet_util.get_file_shard(train_file_list)
print("filelist: {}".format(filelist))
# config dataset
dataset = paddle.distributed.fleet.DatasetFactory().create_dataset()
dataset.set_batch_size(batch_size)
dataset.set_use_var(self.feeds)
pipe_command = 'python ctr_dataset_reader.py'
dataset.set_pipe_command(pipe_command)
dataset.set_filelist(filelist)
dataset.set_thread(thread_num)
for epoch_id in range(1):
pass_start = time.time()
dataset.set_filelist(filelist)
exe.train_from_dataset(
program=fluid.default_main_program(),
dataset=dataset,
fetch_list=[self.avg_cost],
fetch_info=["cost"],
print_period=2,
debug=int(os.getenv("Debug", "0")))
pass_time = time.time() - pass_start
print("do_dataset_training done. using time {}".format(pass_time))
if os.getenv("SAVE_MODEL") == "1":
model_dir = tempfile.mkdtemp()
fleet.save_inference_model(exe, model_dir,
[feed.name for feed in self.feeds],
self.avg_cost)
self.check_model_right(model_dir)
shutil.rmtree(model_dir)
fleet.stop_worker()
print("do_dataset_training stop worker.")
if __name__ == "__main__":
runtime_main(TestHeterPsCTR2x2)
...@@ -17,6 +17,8 @@ from __future__ import print_function ...@@ -17,6 +17,8 @@ from __future__ import print_function
import unittest import unittest
import numpy as np import numpy as np
from op_test import OpTest from op_test import OpTest
import paddle
import paddle.fluid as fluid
class TestAdadeltaOp1(OpTest): class TestAdadeltaOp1(OpTest):
...@@ -108,5 +110,54 @@ class TestAdadeltaOp2(OpTest): ...@@ -108,5 +110,54 @@ class TestAdadeltaOp2(OpTest):
self.check_output() self.check_output()
class TestAdadeltaV2(unittest.TestCase):
def test_adadelta_dygraph(self):
paddle.disable_static(paddle.CPUPlace())
value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5)
# This can be any optimizer supported by dygraph.
adam = paddle.optimizer.Adadelta(
learning_rate=0.01,
parameters=linear.parameters(),
weight_decay=0.01)
out = linear(a)
out.backward()
adam.step()
adam.clear_gradients()
def test_adadelta(self):
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
rms_optimizer = paddle.optimizer.Adadelta(learning_rate=0.1)
rms_optimizer.minimize(avg_cost)
fetch_list = [avg_cost]
train_reader = paddle.batch(
paddle.dataset.uci_housing.train(), batch_size=1)
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
for data in train_reader():
exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list)
def test_raise_error(self):
self.assertRaises(ValueError, paddle.optimizer.Adadelta, None)
self.assertRaises(
ValueError, paddle.optimizer.Adadelta, learning_rate=0.1, rho=None)
self.assertRaises(
ValueError,
paddle.optimizer.Adadelta,
learning_rate=0.1,
epsilon=None)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import unittest
import numpy as np
from op_test import OpTest
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard
import paddle
import paddle.nn.functional as F
import paddle.fluid as fluid
def adaptive_start_index(index, input_size, output_size):
return int(np.floor(index * input_size / output_size))
def adaptive_end_index(index, input_size, output_size):
return int(np.ceil((index + 1) * input_size / output_size))
def avg_pool1D_forward_naive(x,
ksize,
strides,
paddings,
global_pool=0,
ceil_mode=False,
exclusive=False,
adaptive=False,
data_type=np.float64):
N, C, L = x.shape
if global_pool == 1:
ksize = [L]
if adaptive:
L_out = ksize[0]
else:
L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1
) // strides[0] + 1 if ceil_mode else (
L - ksize[0] + 2 * paddings[0]) // strides[0] + 1
out = np.zeros((N, C, L_out))
for i in range(L_out):
if adaptive:
r_start = adaptive_start_index(i, L, ksize[0])
r_end = adaptive_end_index(i, L, ksize[0])
else:
r_start = np.max((i * strides[0] - paddings[0], 0))
r_end = np.min((i * strides[0] + ksize[0] - paddings[0], L))
x_masked = x[:, :, r_start:r_end]
field_size = (r_end - r_start) \
if (exclusive or adaptive) else (ksize[0])
if data_type == np.int8 or data_type == np.uint8:
out[:, :, i] = (np.rint(
np.sum(x_masked, axis=(2, 3)) / field_size)).astype(data_type)
else:
out[:, :, i] = (np.sum(x_masked, axis=(2)) /
field_size).astype(data_type)
return out
class TestPool1d_API(unittest.TestCase):
def setUp(self):
np.random.seed(123)
self.places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
self.places.append(fluid.CUDAPlace(0))
def check_adaptive_avg_dygraph_results(self, place):
with fluid.dygraph.guard(place):
input_np = np.random.random([2, 3, 32]).astype("float32")
input = fluid.dygraph.to_variable(input_np)
result = F.adaptive_avg_pool1d(input, output_size=16)
result_np = avg_pool1D_forward_naive(
input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True)
self.assertTrue(np.allclose(result.numpy(), result_np))
ada_max_pool1d_dg = paddle.nn.layer.AdaptiveAvgPool1d(
output_size=16)
result = ada_max_pool1d_dg(input)
self.assertTrue(np.allclose(result.numpy(), result_np))
def check_adaptive_avg_static_results(self, place):
with fluid.program_guard(fluid.Program(), fluid.Program()):
input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32")
result = F.adaptive_avg_pool1d(input, output_size=16)
input_np = np.random.random([2, 3, 32]).astype("float32")
result_np = avg_pool1D_forward_naive(
input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True)
exe = fluid.Executor(place)
fetches = exe.run(fluid.default_main_program(),
feed={"input": input_np},
fetch_list=[result])
self.assertTrue(np.allclose(fetches[0], result_np))
def test_adaptive_avg_pool1d(self):
for place in self.places:
self.check_adaptive_avg_dygraph_results(place)
self.check_adaptive_avg_static_results(place)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import unittest
from op_test import OpTest
import paddle.fluid.core as core
from paddle.fluid import compiler, Program, program_guard
import paddle
import paddle.nn.functional as F
import paddle.fluid as fluid
def adaptive_start_index(index, input_size, output_size):
return int(np.floor(index * input_size / output_size))
def adaptive_end_index(index, input_size, output_size):
return int(np.ceil((index + 1) * input_size / output_size))
def max_pool1D_forward_naive(x,
ksize,
strides,
paddings,
global_pool=0,
ceil_mode=False,
exclusive=False,
adaptive=False,
data_type=np.float64):
N, C, L = x.shape
if global_pool == 1:
ksize = [L]
if adaptive:
L_out = ksize[0]
else:
L_out = (L - ksize[0] + 2 * paddings[0] + strides[0] - 1
) // strides[0] + 1 if ceil_mode else (
L - ksize[0] + 2 * paddings[0]) // strides[0] + 1
out = np.zeros((N, C, L_out))
for i in range(L_out):
if adaptive:
r_start = adaptive_start_index(i, L, ksize[0])
r_end = adaptive_end_index(i, L, ksize[0])
else:
r_start = np.max((i * strides[0] - paddings[0], 0))
r_end = np.min((i * strides[0] + ksize[0] - paddings[0], L))
x_masked = x[:, :, r_start:r_end]
out[:, :, i] = np.max(x_masked, axis=(2))
return out
class TestPool1d_API(unittest.TestCase):
def setUp(self):
np.random.seed(123)
self.places = [fluid.CPUPlace()]
if core.is_compiled_with_cuda():
self.places.append(fluid.CUDAPlace(0))
def check_adaptive_max_dygraph_results(self, place):
with fluid.dygraph.guard(place):
input_np = np.random.random([2, 3, 32]).astype("float32")
input = fluid.dygraph.to_variable(input_np)
result = F.adaptive_max_pool1d(input, output_size=16)
result_np = max_pool1D_forward_naive(
input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True)
self.assertTrue(np.allclose(result.numpy(), result_np))
ada_max_pool1d_dg = paddle.nn.layer.AdaptiveMaxPool1d(
output_size=16)
result = ada_max_pool1d_dg(input)
self.assertTrue(np.allclose(result.numpy(), result_np))
def check_adaptive_max_static_results(self, place):
with fluid.program_guard(fluid.Program(), fluid.Program()):
input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32")
result = F.adaptive_max_pool1d(input, output_size=16)
input_np = np.random.random([2, 3, 32]).astype("float32")
result_np = max_pool1D_forward_naive(
input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True)
exe = fluid.Executor(place)
fetches = exe.run(fluid.default_main_program(),
feed={"input": input_np},
fetch_list=[result])
self.assertTrue(np.allclose(fetches[0], result_np))
def test_adaptive_max_pool1d(self):
for place in self.places:
self.check_adaptive_max_dygraph_results(place)
self.check_adaptive_max_static_results(place)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from __future__ import division
import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
import paddle
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
def adaptive_start_index(index, input_size, output_size):
return int(np.floor(index * input_size / output_size))
def adaptive_end_index(index, input_size, output_size):
return int(np.ceil((index + 1) * input_size / output_size))
def adaptive_pool2d_forward(x, output_size, data_format='NCHW',
pool_type="max"):
N = x.shape[0]
C, H, W = [x.shape[1], x.shape[2], x.shape[3]] if data_format == 'NCHW' \
else [x.shape[3], x.shape[1], x.shape[2]]
if (isinstance(output_size, int) or output_size == None):
H_out = output_size
W_out = output_size
output_size = [H_out, W_out]
else:
H_out, W_out = output_size
if output_size[0] == None:
output_size[0] = H
H_out = H
if output_size[1] == None:
output_size[1] = W
W_out = W
out = np.zeros((N, C, H_out, W_out)) if data_format=='NCHW' \
else np.zeros((N, H_out, W_out, C))
for i in range(H_out):
in_h_start = adaptive_start_index(i, H, output_size[0])
in_h_end = adaptive_end_index(i, H, output_size[0])
for j in range(W_out):
in_w_start = adaptive_start_index(j, W, output_size[1])
in_w_end = adaptive_end_index(j, W, output_size[1])
if data_format == 'NCHW':
x_masked = x[:, :, in_h_start:in_h_end, in_w_start:in_w_end]
if pool_type == 'avg':
field_size = (
(in_h_end - in_h_start) * (in_w_end - in_w_start))
out[:, :, i, j] = np.sum(x_masked, axis=(2, 3)) / field_size
elif pool_type == 'max':
out[:, :, i, j] = np.max(x_masked, axis=(2, 3))
elif data_format == 'NHWC':
x_masked = x[:, in_h_start:in_h_end, in_w_start:in_w_end, :]
if pool_type == 'avg':
field_size = (
(in_h_end - in_h_start) * (in_w_end - in_w_start))
out[:, i, j, :] = np.sum(x_masked, axis=(1, 2)) / field_size
elif pool_type == 'max':
out[:, i, j, :] = np.max(x_masked, axis=(1, 2))
return out
class TestAdaptiveMaxPool2dAPI(unittest.TestCase):
def setUp(self):
self.x_np = np.random.random([2, 3, 7, 7]).astype("float32")
self.res_1_np = adaptive_pool2d_forward(
x=self.x_np, output_size=[3, 3], pool_type="max")
self.res_2_np = adaptive_pool2d_forward(
x=self.x_np, output_size=5, pool_type="max")
self.res_3_np = adaptive_pool2d_forward(
x=self.x_np, output_size=[2, 5], pool_type="max")
"""
self.res_4_np = adaptive_pool2d_forward(
x=self.x_np,
output_size=[3, 3],
pool_type="max",
data_format="NHWC")
"""
self.res_5_np = adaptive_pool2d_forward(
x=self.x_np, output_size=[None, 3], pool_type="max")
def test_static_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.enable_static()
x = paddle.data(name="x", shape=[2, 3, 7, 7], dtype="float32")
out_1 = paddle.nn.functional.adaptive_max_pool2d(
x=x, output_size=[3, 3])
out_2 = paddle.nn.functional.adaptive_max_pool2d(x=x, output_size=5)
out_3 = paddle.nn.functional.adaptive_max_pool2d(
x=x, output_size=[2, 5])
#out_4 = paddle.nn.functional.adaptive_max_pool2d(
# x=x, output_size=[3, 3], data_format="NHWC")
out_5 = paddle.nn.functional.adaptive_max_pool2d(
x=x, output_size=[None, 3])
exe = paddle.static.Executor(place=place)
[res_1, res_2, res_3, res_5] = exe.run(
fluid.default_main_program(),
feed={"x": self.x_np},
fetch_list=[out_1, out_2, out_3, out_5])
assert np.allclose(res_1, self.res_1_np)
assert np.allclose(res_2, self.res_2_np)
assert np.allclose(res_3, self.res_3_np)
#assert np.allclose(res_4, self.res_4_np)
assert np.allclose(res_5, self.res_5_np)
def test_dynamic_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np)
out_1 = paddle.nn.functional.adaptive_max_pool2d(
x=x, return_indices=False, output_size=[3, 3])
out_2 = paddle.nn.functional.adaptive_max_pool2d(x=x, output_size=5)
out_3 = paddle.nn.functional.adaptive_max_pool2d(
x=x, output_size=[2, 5])
#out_4 = paddle.nn.functional.adaptive_max_pool2d(
# x=x, output_size=[3, 3], data_format="NHWC")
out_5 = paddle.nn.functional.adaptive_max_pool2d(
x=x, output_size=[None, 3])
assert np.allclose(out_1.numpy(), self.res_1_np)
assert np.allclose(out_2.numpy(), self.res_2_np)
assert np.allclose(out_3.numpy(), self.res_3_np)
#assert np.allclose(out_4.numpy(), self.res_4_np)
assert np.allclose(out_5.numpy(), self.res_5_np)
class TestAdaptiveMaxPool2dClassAPI(unittest.TestCase):
def setUp(self):
self.x_np = np.random.random([2, 3, 7, 7]).astype("float32")
self.res_1_np = adaptive_pool2d_forward(
x=self.x_np, output_size=[3, 3], pool_type="max")
self.res_2_np = adaptive_pool2d_forward(
x=self.x_np, output_size=5, pool_type="max")
self.res_3_np = adaptive_pool2d_forward(
x=self.x_np, output_size=[2, 5], pool_type="max")
#self.res_4_np = adaptive_pool2d_forward(
# x=self.x_np,
# output_size=[3, 3],
# pool_type="max",
# data_format="NHWC")
self.res_5_np = adaptive_pool2d_forward(
x=self.x_np, output_size=[None, 3], pool_type="max")
def test_static_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.enable_static()
x = paddle.data(name="x", shape=[2, 3, 7, 7], dtype="float32")
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[3, 3])
out_1 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=5)
out_2 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[2, 5])
out_3 = adaptive_max_pool(x=x)
# adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(
# output_size=[3, 3], data_format="NHWC")
# out_4 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(
output_size=[None, 3])
out_5 = adaptive_max_pool(x=x)
exe = paddle.static.Executor(place=place)
[res_1, res_2, res_3, res_5] = exe.run(
fluid.default_main_program(),
feed={"x": self.x_np},
fetch_list=[out_1, out_2, out_3, out_5])
assert np.allclose(res_1, self.res_1_np)
assert np.allclose(res_2, self.res_2_np)
assert np.allclose(res_3, self.res_3_np)
#assert np.allclose(res_4, self.res_4_np)
assert np.allclose(res_5, self.res_5_np)
def test_dynamic_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[3, 3])
out_1 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=5)
out_2 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=[2, 5])
out_3 = adaptive_max_pool(x=x)
#adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(
# output_size=[3, 3], data_format="NHWC")
#out_4 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(
output_size=[None, 3])
out_5 = adaptive_max_pool(x=x)
assert np.allclose(out_1.numpy(), self.res_1_np)
assert np.allclose(out_2.numpy(), self.res_2_np)
assert np.allclose(out_3.numpy(), self.res_3_np)
#assert np.allclose(out_4.numpy(), self.res_4_np)
assert np.allclose(out_5.numpy(), self.res_5_np)
if __name__ == '__main__':
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from __future__ import division
import unittest
import numpy as np
import paddle.fluid.core as core
from op_test import OpTest
import paddle
import paddle.fluid as fluid
from paddle.fluid import Program, program_guard
def adaptive_start_index(index, input_size, output_size):
return int(np.floor(index * input_size / output_size))
def adaptive_end_index(index, input_size, output_size):
return int(np.ceil((index + 1) * input_size / output_size))
def adaptive_pool3d_forward(x,
output_size,
adaptive=True,
data_format='NCDHW',
pool_type='max'):
N = x.shape[0]
C, D, H, W = [x.shape[1], x.shape[2], x.shape[3], x.shape[4]] \
if data_format == 'NCDHW' else [x.shape[4], x.shape[1], x.shape[2],x.shape[3]]
if (isinstance(output_size, int) or output_size == None):
H_out = output_size
W_out = output_size
D_out = output_size
output_size = [D_out, H_out, W_out]
else:
D_out, H_out, W_out = output_size
if output_size[0] == None:
output_size[0] = D
D_out = D
if output_size[1] == None:
output_size[1] = H
H_out = H
if output_size[2] == None:
output_size[2] = W
W_out = W
out = np.zeros((N, C, D_out, H_out, W_out)) if data_format=='NCDHW' \
else np.zeros((N, D_out, H_out, W_out, C))
for k in range(D_out):
d_start = adaptive_start_index(k, D, output_size[0])
d_end = adaptive_end_index(k, D, output_size[0])
for i in range(H_out):
h_start = adaptive_start_index(i, H, output_size[1])
h_end = adaptive_end_index(i, H, output_size[1])
for j in range(W_out):
w_start = adaptive_start_index(j, W, output_size[2])
w_end = adaptive_end_index(j, W, output_size[2])
if data_format == 'NCDHW':
x_masked = x[:, :, d_start:d_end, h_start:h_end, w_start:
w_end]
if pool_type == 'avg':
field_size = (d_end - d_start) * (h_end - h_start) * (
w_end - w_start)
out[:, :, k, i, j] = np.sum(x_masked,
axis=(2, 3, 4)) / field_size
elif pool_type == 'max':
out[:, :, k, i, j] = np.max(x_masked, axis=(2, 3, 4))
elif data_format == 'NDHWC':
x_masked = x[:, d_start:d_end, h_start:h_end, w_start:
w_end, :]
if pool_type == 'avg':
field_size = (d_end - d_start) * (h_end - h_start) * (
w_end - w_start)
out[:, k, i, j, :] = np.sum(x_masked,
axis=(1, 2, 3)) / field_size
elif pool_type == 'max':
out[:, k, i, j, :] = np.max(x_masked, axis=(1, 2, 3))
return out
class TestAdaptiveMaxPool3dAPI(unittest.TestCase):
def setUp(self):
self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32")
self.res_1_np = adaptive_pool3d_forward(
x=self.x_np, output_size=[3, 3, 3], pool_type="max")
self.res_2_np = adaptive_pool3d_forward(
x=self.x_np, output_size=5, pool_type="max")
self.res_3_np = adaptive_pool3d_forward(
x=self.x_np, output_size=[2, 3, 5], pool_type="max")
self.res_4_np = adaptive_pool3d_forward(
x=self.x_np,
output_size=[3, 3, 3],
pool_type="max",
data_format="NDHWC")
self.res_5_np = adaptive_pool3d_forward(
x=self.x_np, output_size=[None, 3, None], pool_type="max")
def test_static_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.enable_static()
x = paddle.data(name="x", shape=[2, 3, 5, 7, 7], dtype="float32")
out_1 = paddle.nn.functional.adaptive_max_pool3d(
x=x, output_size=[3, 3, 3])
out_2 = paddle.nn.functional.adaptive_max_pool3d(x=x, output_size=5)
out_3 = paddle.nn.functional.adaptive_max_pool3d(
x=x, output_size=[2, 3, 5])
#out_4 = paddle.nn.functional.adaptive_max_pool3d(
# x=x, output_size=[3, 3, 3], data_format="NDHWC")
out_5 = paddle.nn.functional.adaptive_max_pool3d(
x=x, output_size=[None, 3, None])
exe = paddle.static.Executor(place=place)
[res_1, res_2, res_3, res_5] = exe.run(
fluid.default_main_program(),
feed={"x": self.x_np},
fetch_list=[out_1, out_2, out_3, out_5])
assert np.allclose(res_1, self.res_1_np)
assert np.allclose(res_2, self.res_2_np)
assert np.allclose(res_3, self.res_3_np)
#assert np.allclose(res_4, self.res_4_np)
assert np.allclose(res_5, self.res_5_np)
def test_dynamic_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np)
out_1 = paddle.nn.functional.adaptive_max_pool3d(
x=x, output_size=[3, 3, 3])
out_2 = paddle.nn.functional.adaptive_max_pool3d(x=x, output_size=5)
out_3 = paddle.nn.functional.adaptive_max_pool3d(
x=x, output_size=[2, 3, 5])
#out_4 = paddle.nn.functional.adaptive_max_pool3d(
# x=x, output_size=[3, 3, 3], data_format="NDHWC")
out_5 = paddle.nn.functional.adaptive_max_pool3d(
x=x, output_size=[None, 3, None])
assert np.allclose(out_1.numpy(), self.res_1_np)
assert np.allclose(out_2.numpy(), self.res_2_np)
assert np.allclose(out_3.numpy(), self.res_3_np)
#assert np.allclose(out_4.numpy(), self.res_4_np)
assert np.allclose(out_5.numpy(), self.res_5_np)
class TestAdaptiveMaxPool3dClassAPI(unittest.TestCase):
def setUp(self):
self.x_np = np.random.random([2, 3, 5, 7, 7]).astype("float32")
self.res_1_np = adaptive_pool3d_forward(
x=self.x_np, output_size=[3, 3, 3], pool_type="max")
self.res_2_np = adaptive_pool3d_forward(
x=self.x_np, output_size=5, pool_type="max")
self.res_3_np = adaptive_pool3d_forward(
x=self.x_np, output_size=[2, 3, 5], pool_type="max")
# self.res_4_np = adaptive_pool3d_forward(
# x=self.x_np,
# output_size=[3, 3, 3],
# pool_type="max",
# data_format="NDHWC")
self.res_5_np = adaptive_pool3d_forward(
x=self.x_np, output_size=[None, 3, None], pool_type="max")
def test_static_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.enable_static()
x = paddle.data(name="x", shape=[2, 3, 5, 7, 7], dtype="float32")
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
output_size=[3, 3, 3])
out_1 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(output_size=5)
out_2 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
output_size=[2, 3, 5])
out_3 = adaptive_max_pool(x=x)
# adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
# output_size=[3, 3, 3], data_format="NDHWC")
# out_4 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
output_size=[None, 3, None])
out_5 = adaptive_max_pool(x=x)
exe = paddle.static.Executor(place=place)
[res_1, res_2, res_3, res_5] = exe.run(
fluid.default_main_program(),
feed={"x": self.x_np},
fetch_list=[out_1, out_2, out_3, out_5])
assert np.allclose(res_1, self.res_1_np)
assert np.allclose(res_2, self.res_2_np)
assert np.allclose(res_3, self.res_3_np)
# assert np.allclose(res_4, self.res_4_np)
assert np.allclose(res_5, self.res_5_np)
def test_dynamic_graph(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
paddle.disable_static(place=place)
x = paddle.to_variable(self.x_np)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
output_size=[3, 3, 3])
out_1 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(output_size=5)
out_2 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
output_size=[2, 3, 5])
out_3 = adaptive_max_pool(x=x)
# adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
# output_size=[3, 3, 3], data_format="NDHWC")
# out_4 = adaptive_max_pool(x=x)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool3d(
output_size=[None, 3, None])
out_5 = adaptive_max_pool(x=x)
assert np.allclose(out_1.numpy(), self.res_1_np)
assert np.allclose(out_2.numpy(), self.res_2_np)
assert np.allclose(out_3.numpy(), self.res_3_np)
# assert np.allclose(out_4.numpy(), self.res_4_np)
assert np.allclose(out_5.numpy(), self.res_5_np)
if __name__ == '__main__':
unittest.main()
...@@ -85,10 +85,35 @@ class TestBatchNorm(unittest.TestCase): ...@@ -85,10 +85,35 @@ class TestBatchNorm(unittest.TestCase):
y = bn(fluid.dygraph.to_variable(x)) y = bn(fluid.dygraph.to_variable(x))
return y.numpy() return y.numpy()
def compute_v3(x, is_test, trainable_statistics):
with fluid.dygraph.guard(p):
bn = fluid.dygraph.BatchNorm(
shape[1],
is_test=is_test,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(1.0),
trainable=False),
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.Constant(0.0),
trainable=False),
trainable_statistics=trainable_statistics)
y = bn(fluid.dygraph.to_variable(x))
return y.numpy()
def compute_v4(x):
with fluid.dygraph.guard(p):
bn = paddle.nn.BatchNorm2d(
shape[1], weight_attr=False, bias_attr=False)
y = bn(fluid.dygraph.to_variable(x))
return y.numpy()
x = np.random.randn(*shape).astype("float32") x = np.random.randn(*shape).astype("float32")
y1 = compute_v1(x, False, False) y1 = compute_v1(x, False, False)
y2 = compute_v2(x) y2 = compute_v2(x)
y3 = compute_v3(x, False, False)
y4 = compute_v4(x)
self.assertTrue(np.allclose(y1, y2)) self.assertTrue(np.allclose(y1, y2))
self.assertTrue(np.allclose(y3, y4))
def test_static(self): def test_static(self):
places = [fluid.CPUPlace()] places = [fluid.CPUPlace()]
......
...@@ -166,12 +166,16 @@ class TestClipAPI(unittest.TestCase): ...@@ -166,12 +166,16 @@ class TestClipAPI(unittest.TestCase):
data_shape = [1, 9, 9, 4] data_shape = [1, 9, 9, 4]
data = np.random.random(data_shape).astype('float32') data = np.random.random(data_shape).astype('float32')
images = paddle.to_variable(data, dtype='float32') images = paddle.to_variable(data, dtype='float32')
v_min = paddle.to_variable(np.array([0.2], dtype=np.float32))
v_max = paddle.to_variable(np.array([0.8], dtype=np.float32))
out_1 = paddle.clip(images, min=0.2, max=0.8) out_1 = paddle.clip(images, min=0.2, max=0.8)
out_2 = paddle.clip(images, min=0.2, max=0.9) out_2 = paddle.clip(images, min=0.2, max=0.9)
out_3 = paddle.clip(images, min=v_min, max=v_max)
self.assertTrue(np.allclose(out_1.numpy(), data.clip(0.2, 0.8))) self.assertTrue(np.allclose(out_1.numpy(), data.clip(0.2, 0.8)))
self.assertTrue(np.allclose(out_2.numpy(), data.clip(0.2, 0.9))) self.assertTrue(np.allclose(out_2.numpy(), data.clip(0.2, 0.9)))
self.assertTrue(np.allclose(out_3.numpy(), data.clip(0.2, 0.8)))
def test_errors(self): def test_errors(self):
paddle.enable_static() paddle.enable_static()
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
"""
high level unit test for distribute fleet.
"""
import os
import sys
import subprocess
import six
import shutil
import numpy as np
import argparse
from contextlib import closing
import socket
import time
import tempfile
import unittest
import paddle
import paddle.fluid as fluid
import paddle.distributed.fleet.base.role_maker as role_maker
from paddle.distributed.fleet.base.util_factory import fleet_util
from paddle.distributed.fleet import fleet
__all__ = ['FleetDistHeterRunnerBase', 'TestFleetHeterBase', 'runtime_main']
RUN_STEP = 5
LEARNING_RATE = 0.01
DIST_UT_PORT = 0
class FleetDistHeterRunnerBase(object):
"""
run_pserver,run_trainer : after init role, using transpiler split program
net : implment by child class, the network of model
do training : exe run program
"""
def build_role(self, args):
environs = {}
environs["PADDLE_PSERVERS_IP_PORT_LIST"] = args.endpoints
environs["PADDLE_TRAINER_ENDPOINTS"] = args.trainer_endpoints
environs[
"PADDLE_HETER_TRAINER_IP_PORT_LIST"] = args.heter_trainer_endpoints
environs["PADDLE_HETER_TRAINER_DEVICE"] = args.heter_trainer_device
environs["TRAINING_ROLE"] = args.role.upper()
environs["PADDLE_TRAINERS_NUM"] = args.trainers
environs["PADDLE_TRAINER_ID"] = args.current_id
if args.role.upper() == "PSERVER":
environs["POD_IP"] = args.endpoints.split(",")[int(
args.current_id)].split(":")[0]
environs["PADDLE_PORT"] = args.endpoints.split(",")[int(
args.current_id)].split(":")[1]
elif args.role.upper() == "HETER_TRAINER":
environs["POD_IP"] = args.heter_trainer_endpoints.split(",")[int(
args.current_id)].split(":")[0]
environs["PADDLE_PORT"] = args.heter_trainer_endpoints.split(",")[
int(args.current_id)].split(":")[1]
environs["FLAGS_selected_gpus"] = args.current_id
for k, v in environs.items():
os.environ[k] = str(v)
self.role = role_maker.PaddleCloudRoleMaker()
return self.role
def build_strategy(self, args):
self.strategy = paddle.distributed.fleet.DistributedStrategy()
self.strategy.a_sync = True
return self.strategy
def build_optimizer(self, avg_cost, strategy):
optimizer = fluid.optimizer.SGD(LEARNING_RATE)
optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
optimizer.minimize(avg_cost)
def run_pserver(self, args):
fleet.init_server()
fleet.run_server()
def run_dataset_trainer(self, args):
out = self.do_dataset_training(fleet)
def run_pyreader_trainer(self, args):
out = self.do_pyreader_training(fleet)
def net(self, args, batch_size=4, lr=0.01):
raise NotImplementedError(
"get_model should be implemented by child classes.")
def do_dataset_training(self, fleet):
raise NotImplementedError(
"do_dataset_training should be implemented by child classes.")
def do_pyreader_training(self, fleet):
raise NotImplementedError(
"do_pyreader_training should be implemented by child classes.")
class TestFleetHeterBase(unittest.TestCase):
"""
start_pserver,start_trainer : add start cmd to test
run_cluster : using multi process to test distribute program
"""
def _setup_config(self):
raise NotImplementedError("tests should have _setup_config implemented")
def tearDown(self):
t = time.time() - self.startTime
print('%s: %.3f' % (self.__class__.__name__, t))
def setUp(self):
self.startTime = time.time()
self._mode = "async"
self._reader = "pyreader"
self._trainers = 2
self._pservers = 2
self._port_set = set()
self._heter_device = "gpu"
global DIST_UT_PORT
if DIST_UT_PORT == 0 and os.getenv("PADDLE_DIST_UT_PORT"):
DIST_UT_PORT = int(os.getenv("PADDLE_DIST_UT_PORT"))
if DIST_UT_PORT:
print("set begin_port:", DIST_UT_PORT)
self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
DIST_UT_PORT, DIST_UT_PORT + 1)
self._tr_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
DIST_UT_PORT + 2, DIST_UT_PORT + 3)
self._heter_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
DIST_UT_PORT + 4, DIST_UT_PORT + 5)
DIST_UT_PORT += 6
else:
self._ps_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
self._find_free_port(), self._find_free_port())
self._tr_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
self._find_free_port(), self._find_free_port())
self._heter_endpoints = "127.0.0.1:%s,127.0.0.1:%s" % (
self._find_free_port(), self._find_free_port())
self._python_interp = sys.executable
self._geo_sgd_need_push_nums = 5
self._grad_clip_mode = 0
self._setup_config()
def _find_free_port(self):
def __free_port():
with closing(socket.socket(socket.AF_INET,
socket.SOCK_STREAM)) as s:
s.bind(('', 0))
return s.getsockname()[1]
while True:
port = __free_port()
if port not in self._port_set:
self._port_set.add(port)
return port
def _start_pserver(self, cmd, required_envs):
ps0_cmd, ps1_cmd = cmd.format(0), cmd.format(1)
ps0_pipe = open(tempfile.gettempdir() + "/ps0_err.log", "wb+")
ps1_pipe = open(tempfile.gettempdir() + "/ps1_err.log", "wb+")
ps0_proc = subprocess.Popen(
ps0_cmd.strip().split(" "),
stdout=subprocess.PIPE,
stderr=ps0_pipe,
env=required_envs)
ps1_proc = subprocess.Popen(
ps1_cmd.strip().split(" "),
stdout=subprocess.PIPE,
stderr=ps1_pipe,
env=required_envs)
return ps0_proc, ps1_proc, ps0_pipe, ps1_pipe
def _start_trainer(self, cmd, required_envs):
tr0_cmd, tr1_cmd = cmd.format(0), cmd.format(1)
tr0_pipe = open(tempfile.gettempdir() + "/tr0_err.log", "wb+")
tr1_pipe = open(tempfile.gettempdir() + "/tr1_err.log", "wb+")
tr0_out = open(tempfile.gettempdir() + "/tr0_out.log", "wb+")
tr1_out = open(tempfile.gettempdir() + "/tr1_out.log", "wb+")
tr0_proc = subprocess.Popen(
tr0_cmd.strip().split(" "),
stdout=tr0_out,
stderr=tr0_pipe,
env=required_envs)
tr1_proc = subprocess.Popen(
tr1_cmd.strip().split(" "),
stdout=tr1_out,
stderr=tr1_pipe,
env=required_envs)
return tr0_proc, tr1_proc, tr0_pipe, tr1_pipe
def _start_heter_trainer(self, cmd, required_envs):
heter0_cmd, heter1_cmd = cmd.format(0), cmd.format(1)
heter0_pipe = open(tempfile.gettempdir() + "/heter0_err.log", "wb+")
heter1_pipe = open(tempfile.gettempdir() + "/heter1_err.log", "wb+")
heter0_out = open(tempfile.gettempdir() + "/heter0_out.log", "wb+")
heter1_out = open(tempfile.gettempdir() + "/heter1_out.log", "wb+")
heter0_proc = subprocess.Popen(
heter0_cmd.strip().split(" "),
stdout=heter0_out,
stderr=heter0_pipe,
env=required_envs)
heter1_proc = subprocess.Popen(
heter1_cmd.strip().split(" "),
stdout=heter1_out,
stderr=heter1_pipe,
env=required_envs)
return heter0_proc, heter1_proc, heter0_pipe, heter1_pipe
def _run_cluster(self, model, envs):
env = {'GRAD_CLIP': str(self._grad_clip_mode)}
python_path = self._python_interp
gloo_path = tempfile.mkdtemp()
if os.getenv('WITH_COVERAGE', 'OFF') == 'ON':
envs['COVERAGE_FILE'] = os.getenv('COVERAGE_FILE', '')
python_path += " -m coverage run --branch -p"
env.update(envs)
tr_cmd = "{0} {1} --role trainer --endpoints {2} --trainer_endpoints {3} --current_id {{}} --trainers {4} --mode {5} --geo_sgd_need_push_nums {6} --reader {7} --gloo_path {8} --heter_trainer_endpoints {9} --heter_trainer_device {10}".format(
python_path, model, self._ps_endpoints, self._tr_endpoints,
self._trainers, self._mode, self._geo_sgd_need_push_nums,
self._reader, gloo_path, self._heter_endpoints, self._heter_device)
ps_cmd = "{0} {1} --role pserver --endpoints {2} --trainer_endpoints {3} --current_id {{}} --trainers {4} --mode {5} --geo_sgd_need_push_nums {6} --reader {7} --gloo_path {8} --heter_trainer_endpoints {9} --heter_trainer_device {10}".format(
python_path, model, self._ps_endpoints, self._tr_endpoints,
self._trainers, self._mode, self._geo_sgd_need_push_nums,
self._reader, gloo_path, self._heter_endpoints, self._heter_device)
heter_cmd = "{0} {1} --role heter_trainer --endpoints {2} --trainer_endpoints {3} --current_id {{}} --trainers {4} --mode {5} --geo_sgd_need_push_nums {6} --reader {7} --gloo_path {8} --heter_trainer_endpoints {9} --heter_trainer_device {10}".format(
python_path, model, self._ps_endpoints, self._tr_endpoints,
self._trainers, self._mode, self._geo_sgd_need_push_nums,
self._reader, gloo_path, self._heter_endpoints, self._heter_device)
# Run dist train to compare with local results
ps0, ps1, ps0_pipe, ps1_pipe = self._start_pserver(ps_cmd, env)
tr0, tr1, tr0_pipe, tr1_pipe = self._start_trainer(tr_cmd, env)
heter0, heter1, heter0_pipe, heter1_pipe = self._start_heter_trainer(
heter_cmd, env)
# Wait until trainer process terminate
while True:
stat0 = tr0.poll()
time.sleep(0.1)
if stat0 is not None:
break
while True:
stat1 = tr1.poll()
time.sleep(0.1)
if stat1 is not None:
break
tr0_out, tr0_err = tr0.communicate()
tr1_out, tr1_err = tr1.communicate()
print("tr end communicate")
tr0_ret = tr0.returncode
tr1_ret = tr0.returncode
print("tr get returncode: {}".format(tr0_ret))
if tr0_ret != 0:
print(
"========================Error tr0_err begin==========================="
)
os.system("cat {}".format(tempfile.gettempdir() + "/tr0_err.log"))
print(
"========================Error tr0_err end==========================="
)
if tr1_ret != 0:
print(
"========================Error tr1_err begin==========================="
)
os.system("cat {}".format(tempfile.gettempdir() + "/tr1_err.log"))
print(
"========================Error tr1_err end==========================="
)
self.assertEqual(tr0_ret, 0, "something wrong in tr0, please check")
self.assertEqual(tr1_ret, 0, "something wrong in tr1, please check")
# close trainer file
tr0_pipe.close()
tr1_pipe.close()
ps0_pipe.close()
ps1_pipe.close()
heter0_pipe.close()
heter1_pipe.close()
ps0.terminate()
ps1.terminate()
heter0.terminate()
heter1.terminate()
shutil.rmtree(gloo_path)
return 0, 0
def check_with_place(self,
model_file,
delta=1e-3,
check_error_log=False,
need_envs={}):
required_envs = {
"PATH": os.getenv("PATH", ""),
"PYTHONPATH": os.getenv("PYTHONPATH", ""),
"LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""),
"FLAGS_rpc_deadline": "5000", # 5sec to fail fast
"http_proxy": ""
}
required_envs.update(need_envs)
if check_error_log:
required_envs["GLOG_v"] = "3"
required_envs["GLOG_logtostderr"] = "1"
tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs)
def runtime_main(test_class):
parser = argparse.ArgumentParser(description='Run Fleet test.')
parser.add_argument(
'--role',
type=str,
required=True,
choices=['pserver', 'trainer', 'heter_trainer'])
parser.add_argument('--endpoints', type=str, required=False, default="")
parser.add_argument(
'--trainer_endpoints', type=str, required=False, default="")
parser.add_argument(
'--heter_trainer_endpoints', type=str, required=False, default="")
parser.add_argument(
'--heter_trainer_device', type=str, required=False, default="gpu")
parser.add_argument('--gloo_path', type=str, required=False, default="")
parser.add_argument('--current_id', type=int, required=False, default=0)
parser.add_argument('--trainers', type=int, required=False, default=1)
parser.add_argument('--mode', type=str, required=False, default='async')
parser.add_argument(
'--geo_sgd_need_push_nums', type=int, required=False, default=2)
parser.add_argument('--reader', type=str, required=False, default='dataset')
args = parser.parse_args()
model = test_class()
role = model.build_role(args)
fleet.init(role)
strategy = model.build_strategy(args)
avg_cost = model.net(args)
model.build_optimizer(avg_cost, strategy)
fleet_util._set_strategy(strategy)
fleet_util._set_role_maker(role)
if args.role == "pserver" or args.role == "heter_trainer":
model.run_pserver(args)
else:
if args.reader == "dataset":
model.run_dataset_trainer(args)
else:
model.run_pyreader_trainer(args)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import unittest
import tempfile
from test_dist_fleet_heter_base import TestFleetHeterBase
class TestDistHeterDatasetAsync2x2(TestFleetHeterBase):
def _setup_config(self):
self._mode = "async"
self._reader = "dataset"
def check_with_place(self,
model_file,
delta=1e-3,
check_error_log=False,
need_envs={}):
required_envs = {
"PATH": os.getenv("PATH", ""),
"PYTHONPATH": os.getenv("PYTHONPATH", ""),
"LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""),
"FLAGS_rpc_deadline": "5000", # 5sec to fail fast
"http_proxy": "",
"CPU_NUM": "1"
}
required_envs.update(need_envs)
if check_error_log:
required_envs["GLOG_v"] = "4"
required_envs["GLOG_logtostderr"] = "1"
tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs)
def test_dist_train(self):
self.check_with_place(
"dist_fleet_heter_ctr.py", delta=1e-5, check_error_log=True)
if __name__ == "__main__":
unittest.main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import paddle
import os
import math
import paddle.fluid as fluid
import paddle.distributed.fleet.base.role_maker as role_maker
from paddle.distributed.fleet.base.util_factory import fleet_util
from paddle.distributed.fleet import fleet
class TestDistFleetHeterProgram(unittest.TestCase):
def build_role(self):
environs = {}
environs[
"PADDLE_PSERVERS_IP_PORT_LIST"] = "127.0.0.1:36012,127.0.0.1:36013"
environs["PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36014,127.0.0.1:36015"
environs[
"PADDLE_HETER_TRAINER_IP_PORT_LIST"] = "127.0.0.1:36016,127.0.0.1:36017"
environs["PADDLE_HETER_TRAINER_DEVICE"] = "gpu"
environs["TRAINING_ROLE"] = "HETER_TRAINER"
environs["PADDLE_TRAINERS_NUM"] = 2
environs["PADDLE_TRAINER_ID"] = 0
environs["POD_IP"] = "127.0.0.1"
environs["PADDLE_PORT"] = "36016"
environs["FLAGS_selected_gpus"] = 0
for k, v in environs.items():
os.environ[k] = str(v)
self.role = role_maker.PaddleCloudRoleMaker()
return self.role
def build_strategy(self):
self.strategy = paddle.distributed.fleet.DistributedStrategy()
self.strategy.a_sync = True
return self.strategy
def build_input(self):
dense_input = fluid.layers.data(
name="dense_input", shape=[10], dtype="float32")
sparse_input_ids = [
fluid.layers.data(
name="C" + str(i), shape=[1], lod_level=1, dtype="int64")
for i in range(1, 27)
]
label = fluid.layers.data(name="label", shape=[1], dtype="float32")
inputs = [dense_input] + sparse_input_ids + [label]
return inputs
def build_net(self, inputs):
def embedding_layer(input):
return fluid.layers.embedding(
input=input,
is_sparse=True,
size=[100001, 10],
param_attr=fluid.ParamAttr(
name="SparseFeatFactors",
initializer=fluid.initializer.Uniform()), )
sparse_embed_seq = list(map(embedding_layer, inputs[1:-1]))
concated = fluid.layers.concat(sparse_embed_seq + inputs[0:1], axis=1)
with fluid.device_guard("gpu"):
fc1 = fluid.layers.fc(
input=concated,
size=400,
act="relu",
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(concated.shape[1]))),
name="fc1")
with fluid.device_guard("cpu"):
fc2 = fluid.layers.fc(input=fc1,
size=400,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc1.shape[1]))),
name="fc2")
with fluid.device_guard("gpu"):
fc3 = fluid.layers.fc(input=fc2,
size=400,
act="relu",
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc2.shape[1]))),
name="fc3")
with fluid.device_guard("cpu"):
predict = fluid.layers.fc(
input=fc3,
size=2,
act="softmax",
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
scale=1 / math.sqrt(fc3.shape[1]))), )
with fluid.device_guard("gpu"):
labels = fluid.layers.cast(inputs[-1], dtype="int64")
cost = fluid.layers.cross_entropy(input=predict, label=labels)
avg_cost = fluid.layers.reduce_sum(cost)
return avg_cost
def build_optimizer(self, avg_cost, strategy):
optimizer = fluid.optimizer.SGD(1e-2)
optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
optimizer.minimize(avg_cost)
def test(self):
role = self.build_role()
fleet.init(role)
strategy = self.build_strategy()
inputs = self.build_input()
avg_cost = self.build_net(inputs)
self.build_optimizer(avg_cost, strategy)
if __name__ == "__main__":
unittest.main()
...@@ -102,8 +102,23 @@ class TestExpandAsOpRank4(OpTest): ...@@ -102,8 +102,23 @@ class TestExpandAsOpRank4(OpTest):
self.check_grad(['X'], 'Out') self.check_grad(['X'], 'Out')
# Test dygraph API
class TestExpandAsDygraphAPI(unittest.TestCase):
def test_api(self):
import paddle
paddle.disable_static()
np_data_x = np.array([1, 2, 3]).astype('int32')
np_data_y = np.array([1, 2, 3, 1, 2, 3]).astype('int32')
data_x = paddle.to_tensor(np_data_x)
data_y = paddle.to_tensor(np_data_y)
out = fluid.layers.expand_as(data_x, data_y)
np_out = out.numpy()
assert np.array_equal(np_out, np.tile(np_data_x, (2)))
paddle.enable_static()
# Test python API # Test python API
class TestExpandAPI(unittest.TestCase): class TestExpandAsAPI(unittest.TestCase):
def test_api(self): def test_api(self):
input1 = np.random.random([12, 14]).astype("float32") input1 = np.random.random([12, 14]).astype("float32")
input2 = np.random.random([48, 14]).astype("float32") input2 = np.random.random([48, 14]).astype("float32")
......
...@@ -43,7 +43,7 @@ class TestFleetBase(unittest.TestCase): ...@@ -43,7 +43,7 @@ class TestFleetBase(unittest.TestCase):
role = role_maker.PaddleCloudRoleMaker(is_collective=True) role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role) fleet.init(role)
strategy = fleet.DistributedStrategy() strategy = fleet.DistributedStrategy()
optimizer = paddle.optimizer.SGD(learning_rate=0.001) optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.001)
optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
......
...@@ -22,6 +22,7 @@ import paddle.fluid.core as core ...@@ -22,6 +22,7 @@ import paddle.fluid.core as core
from paddle.fluid.op import Operator from paddle.fluid.op import Operator
from paddle.fluid.executor import Executor from paddle.fluid.executor import Executor
from op_test import OpTest from op_test import OpTest
import paddle
class TestGaussianRandomOp(OpTest): class TestGaussianRandomOp(OpTest):
...@@ -235,6 +236,56 @@ class TestGaussianRandomAPI(unittest.TestCase): ...@@ -235,6 +236,56 @@ class TestGaussianRandomAPI(unittest.TestCase):
self.assertAlmostEqual(np.mean(res_6), 0.0, delta=0.1) self.assertAlmostEqual(np.mean(res_6), 0.0, delta=0.1)
self.assertAlmostEqual(np.std(res_6), 1., delta=0.1) self.assertAlmostEqual(np.std(res_6), 1., delta=0.1)
def test_default_dtype(self):
paddle.disable_static()
def test_default_fp_16():
paddle.framework.set_default_dtype('float16')
paddle.tensor.random.gaussian_random([2, 3])
self.assertRaises(TypeError, test_default_fp_16)
def test_default_fp_32():
paddle.framework.set_default_dtype('float32')
out = paddle.tensor.random.gaussian_random([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP32)
def test_default_fp_64():
paddle.framework.set_default_dtype('float64')
out = paddle.tensor.random.gaussian_random([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64)
test_default_fp_64()
test_default_fp_32()
paddle.enable_static()
class TestStandardNormalDtype(unittest.TestCase):
def test_default_dtype(self):
paddle.disable_static()
def test_default_fp_16():
paddle.framework.set_default_dtype('float16')
paddle.tensor.random.standard_normal([2, 3])
self.assertRaises(TypeError, test_default_fp_16)
def test_default_fp_32():
paddle.framework.set_default_dtype('float32')
out = paddle.tensor.random.standard_normal([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP32)
def test_default_fp_64():
paddle.framework.set_default_dtype('float64')
out = paddle.tensor.random.standard_normal([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64)
test_default_fp_64()
test_default_fp_32()
paddle.enable_static()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -658,7 +658,7 @@ class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase): ...@@ -658,7 +658,7 @@ class TestImperativeExponentialMovingAverage(TestImperativeOptimizerBase):
class TestImperativePipelineOptimizer(TestImperativeOptimizerBase): class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list): def get_optimizer_dygraph(self, parameter_list):
optimizer = paddle.optimizer.SGD(learning_rate=0.5, optimizer = paddle.optimizer.SGD(learning_rate=0.5,
parameter_list=parameter_list) parameters=parameter_list)
optimizer = PipelineOptimizer(optimizer) optimizer = PipelineOptimizer(optimizer)
return optimizer return optimizer
...@@ -670,7 +670,7 @@ class TestImperativePipelineOptimizer(TestImperativeOptimizerBase): ...@@ -670,7 +670,7 @@ class TestImperativePipelineOptimizer(TestImperativeOptimizerBase):
class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase): class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list): def get_optimizer_dygraph(self, parameter_list):
optimizer = paddle.optimizer.SGD(learning_rate=0.5, optimizer = paddle.optimizer.SGD(learning_rate=0.5,
parameter_list=parameter_list) parameters=parameter_list)
optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5) optimizer = LookaheadOptimizer(optimizer, alpha=0.5, k=5)
return optimizer return optimizer
...@@ -682,7 +682,7 @@ class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase): ...@@ -682,7 +682,7 @@ class TestImperativeLookaheadOptimizer(TestImperativeOptimizerBase):
class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase): class TestImperativeRecomputeOptimizer(TestImperativeOptimizerBase):
def get_optimizer_dygraph(self, parameter_list): def get_optimizer_dygraph(self, parameter_list):
optimizer = paddle.optimizer.SGD(learning_rate=0.5, optimizer = paddle.optimizer.SGD(learning_rate=0.5,
parameter_list=parameter_list) parameters=parameter_list)
optimizer = RecomputeOptimizer(optimizer) optimizer = RecomputeOptimizer(optimizer)
return optimizer return optimizer
......
...@@ -299,7 +299,7 @@ class TestLayer(LayerTest): ...@@ -299,7 +299,7 @@ class TestLayer(LayerTest):
my_syncbn = paddle.nn.SyncBatchNorm(3) my_syncbn = paddle.nn.SyncBatchNorm(3)
dy_ret = my_syncbn(base.to_variable(t)) dy_ret = my_syncbn(base.to_variable(t))
dy_ret_value = dy_ret.numpy() dy_ret_value = dy_ret.numpy()
self.assertTrue(np.array_equal(static_ret, static_ret)) self.assertTrue(np.array_equal(static_ret, dy_ret_value))
def test_relu(self): def test_relu(self):
with self.static_graph(): with self.static_graph():
......
...@@ -19,6 +19,8 @@ import numpy as np ...@@ -19,6 +19,8 @@ import numpy as np
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.op import Operator from paddle.fluid.op import Operator
from op_test import OpTest from op_test import OpTest
import paddle
import paddle.fluid as fluid
class TestMomentumOp1(OpTest): class TestMomentumOp1(OpTest):
...@@ -234,5 +236,48 @@ class TestSparseMomentumOp2(TestSparseMomentumOp): ...@@ -234,5 +236,48 @@ class TestSparseMomentumOp2(TestSparseMomentumOp):
self.use_nesterov = True self.use_nesterov = True
class TestMomentumV2(unittest.TestCase):
def test_momentum_dygraph(self):
paddle.disable_static()
value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5)
# This can be any optimizer supported by dygraph.
adam = paddle.optimizer.Momentum(
learning_rate=0.01, momentum=0.9, parameters=linear.parameters())
out = linear(a)
out.backward()
adam.step()
adam.clear_gradients()
def test_momentum(self):
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
rms_optimizer = paddle.optimizer.Momentum(
learning_rate=0.1, momentum=0.9)
rms_optimizer.minimize(avg_cost)
fetch_list = [avg_cost]
train_reader = paddle.batch(
paddle.dataset.uci_housing.train(), batch_size=1)
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
for data in train_reader():
exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list)
def test_raise_error(self):
self.assertRaises(
ValueError, paddle.optimizer.Momentum, learning_rate=None)
self.assertRaises(ValueError, paddle.optimizer.Momentum, momentum=None)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -174,66 +174,6 @@ class TestPool1d_API(unittest.TestCase): ...@@ -174,66 +174,6 @@ class TestPool1d_API(unittest.TestCase):
result = max_pool1d_dg(input) result = max_pool1d_dg(input)
self.assertTrue(np.allclose(result.numpy(), result_np)) self.assertTrue(np.allclose(result.numpy(), result_np))
def check_adaptive_max_dygraph_results(self, place):
with fluid.dygraph.guard(place):
input_np = np.random.random([2, 3, 32]).astype("float32")
input = fluid.dygraph.to_variable(input_np)
result = F.adaptive_max_pool1d(input, output_size=16)
result_np = max_pool1D_forward_naive(
input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True)
self.assertTrue(np.allclose(result.numpy(), result_np))
ada_max_pool1d_dg = paddle.nn.layer.AdaptiveMaxPool1d(
output_size=16)
result = ada_max_pool1d_dg(input)
self.assertTrue(np.allclose(result.numpy(), result_np))
def check_adaptive_avg_dygraph_results(self, place):
with fluid.dygraph.guard(place):
input_np = np.random.random([2, 3, 32]).astype("float32")
input = fluid.dygraph.to_variable(input_np)
result = F.adaptive_avg_pool1d(input, output_size=16)
result_np = avg_pool1D_forward_naive(
input_np, ksize=[16], strides=[0], paddings=[0], adaptive=True)
self.assertTrue(np.allclose(result.numpy(), result_np))
ada_max_pool1d_dg = paddle.nn.layer.AdaptiveAvgPool1d(
output_size=16)
result = ada_max_pool1d_dg(input)
self.assertTrue(np.allclose(result.numpy(), result_np))
def check_adaptive_max_static_results(self, place):
with fluid.program_guard(fluid.Program(), fluid.Program()):
input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32")
result = F.adaptive_max_pool1d(input, output_size=16)
input_np = np.random.random([2, 3, 32]).astype("float32")
result_np = max_pool1D_forward_naive(
input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True)
exe = fluid.Executor(place)
fetches = exe.run(fluid.default_main_program(),
feed={"input": input_np},
fetch_list=[result])
self.assertTrue(np.allclose(fetches[0], result_np))
def check_adaptive_avg_static_results(self, place):
with fluid.program_guard(fluid.Program(), fluid.Program()):
input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32")
result = F.adaptive_avg_pool1d(input, output_size=16)
input_np = np.random.random([2, 3, 32]).astype("float32")
result_np = avg_pool1D_forward_naive(
input_np, ksize=[16], strides=[2], paddings=[0], adaptive=True)
exe = fluid.Executor(place)
fetches = exe.run(fluid.default_main_program(),
feed={"input": input_np},
fetch_list=[result])
self.assertTrue(np.allclose(fetches[0], result_np))
def check_max_dygraph_padding_same(self, place): def check_max_dygraph_padding_same(self, place):
with fluid.dygraph.guard(place): with fluid.dygraph.guard(place):
input_np = np.random.random([2, 3, 32]).astype("float32") input_np = np.random.random([2, 3, 32]).astype("float32")
...@@ -265,10 +205,6 @@ class TestPool1d_API(unittest.TestCase): ...@@ -265,10 +205,6 @@ class TestPool1d_API(unittest.TestCase):
self.check_avg_dygraph_results(place) self.check_avg_dygraph_results(place)
self.check_max_static_results(place) self.check_max_static_results(place)
self.check_avg_static_results(place) self.check_avg_static_results(place)
self.check_adaptive_max_dygraph_results(place)
self.check_adaptive_avg_dygraph_results(place)
self.check_adaptive_max_static_results(place)
self.check_adaptive_avg_static_results(place)
self.check_max_dygraph_padding_same(place) self.check_max_dygraph_padding_same(place)
self.check_avg_dygraph_padding_same(place) self.check_avg_dygraph_padding_same(place)
......
...@@ -21,6 +21,7 @@ import paddle.fluid.core as core ...@@ -21,6 +21,7 @@ import paddle.fluid.core as core
from paddle import rand from paddle import rand
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid import compiler, Program, program_guard from paddle.fluid import compiler, Program, program_guard
import paddle
class TestRandOpError(unittest.TestCase): class TestRandOpError(unittest.TestCase):
...@@ -115,5 +116,31 @@ class TestRandOpForDygraph(unittest.TestCase): ...@@ -115,5 +116,31 @@ class TestRandOpForDygraph(unittest.TestCase):
self.run_net(True) self.run_net(True)
class TestRandDtype(unittest.TestCase):
def test_default_dtype(self):
paddle.disable_static()
def test_default_fp_16():
paddle.framework.set_default_dtype('float16')
paddle.tensor.random.rand([2, 3])
self.assertRaises(TypeError, test_default_fp_16)
def test_default_fp_32():
paddle.framework.set_default_dtype('float32')
out = paddle.tensor.random.rand([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP32)
def test_default_fp_64():
paddle.framework.set_default_dtype('float64')
out = paddle.tensor.random.rand([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64)
test_default_fp_64()
test_default_fp_32()
paddle.enable_static()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -20,6 +20,7 @@ import paddle.fluid as fluid ...@@ -20,6 +20,7 @@ import paddle.fluid as fluid
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.fluid.op import Operator from paddle.fluid.op import Operator
from op_test import OpTest from op_test import OpTest
import paddle
class TestSGDOp(OpTest): class TestSGDOp(OpTest):
...@@ -208,5 +209,46 @@ class TestSGDOpWithLargeInput(unittest.TestCase): ...@@ -208,5 +209,46 @@ class TestSGDOpWithLargeInput(unittest.TestCase):
result = exe.run(compiled_prog, fetch_list=[avg_cost]) result = exe.run(compiled_prog, fetch_list=[avg_cost])
class TestSGDV2(unittest.TestCase):
def test_sgd_dygraph(self):
paddle.disable_static()
value = np.arange(26).reshape(2, 13).astype("float32")
a = paddle.to_tensor(value)
linear = paddle.nn.Linear(13, 5)
# This can be any optimizer supported by dygraph.
adam = paddle.optimizer.SGD(learning_rate=0.01,
parameters=linear.parameters(),
weight_decay=0.01)
out = linear(a)
out.backward()
adam.step()
adam.clear_gradients()
def test_sgd(self):
place = fluid.CPUPlace()
main = fluid.Program()
with fluid.program_guard(main):
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y = fluid.layers.data(name='y', shape=[1], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
cost = fluid.layers.square_error_cost(input=y_predict, label=y)
avg_cost = fluid.layers.mean(cost)
rms_optimizer = paddle.optimizer.SGD(learning_rate=0.1)
rms_optimizer.minimize(avg_cost)
fetch_list = [avg_cost]
train_reader = paddle.batch(
paddle.dataset.uci_housing.train(), batch_size=1)
feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
for data in train_reader():
exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list)
def test_raise_error(self):
self.assertRaises(ValueError, paddle.optimizer.SGD, learning_rate=None)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -221,5 +221,21 @@ class TestDygraphSyncBatchNormAPIError(unittest.TestCase): ...@@ -221,5 +221,21 @@ class TestDygraphSyncBatchNormAPIError(unittest.TestCase):
self.assertRaises(TypeError, my_sync_batch_norm, x2) self.assertRaises(TypeError, my_sync_batch_norm, x2)
class TestConvertSyncBatchNorm(unittest.TestCase):
def test_convert(self):
if not core.is_compiled_with_cuda():
return
with program_guard(Program(), Program()):
model = paddle.nn.Sequential(
paddle.nn.Conv2d(3, 5, 3), paddle.nn.BatchNorm2d(5))
sync_model = paddle.nn.SyncBatchNorm.convert_sync_batchnorm(model)
for idx, sublayer in enumerate(model.sublayers()):
if isinstance(sublayer, paddle.nn.BatchNorm2d):
self.assertEqual(
isinstance(sync_model[idx], paddle.nn.SyncBatchNorm),
True)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -536,5 +536,31 @@ class TestUniformDygraphMode(unittest.TestCase): ...@@ -536,5 +536,31 @@ class TestUniformDygraphMode(unittest.TestCase):
self.assertTrue((x_np[i] > 0 and x_np[i] < 1.0)) self.assertTrue((x_np[i] > 0 and x_np[i] < 1.0))
class TestUniformDtype(unittest.TestCase):
def test_default_dtype(self):
paddle.disable_static()
def test_default_fp_16():
paddle.framework.set_default_dtype('float16')
paddle.tensor.random.uniform([2, 3])
self.assertRaises(TypeError, test_default_fp_16)
def test_default_fp_32():
paddle.framework.set_default_dtype('float32')
out = paddle.tensor.random.uniform([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP32)
def test_default_fp_64():
paddle.framework.set_default_dtype('float64')
out = paddle.tensor.random.uniform([2, 3])
self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64)
test_default_fp_64()
test_default_fp_32()
paddle.enable_static()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -891,10 +891,11 @@ class Model(object): ...@@ -891,10 +891,11 @@ class Model(object):
class Mnist(paddle.nn.Layer): class Mnist(paddle.nn.Layer):
def __init__(self): def __init__(self):
super(MyNet, self).__init__() super(Mnist, self).__init__()
self._fc = Linear(784, 1, act='softmax') self._fc = Linear(784, 10, act='softmax')
@paddle.jit.to_static # If save for inference in dygraph, need this # If save for inference in dygraph, need this
@paddle.jit.to_static
def forward(self, x): def forward(self, x):
y = self._fc(x) y = self._fc(x)
return y return y
...@@ -903,21 +904,18 @@ class Model(object): ...@@ -903,21 +904,18 @@ class Model(object):
device = hapi.set_device('cpu') device = hapi.set_device('cpu')
# if use static graph, do not set # if use static graph, do not set
paddle.disable_static(device) if dynamic else None paddle.disable_static(device) if dynamic else None
# inputs and labels are not required for dynamic graph. # inputs and labels are not required for dynamic graph.
input = hapi.Input([None, 784], 'float32', 'x') input = hapi.Input([None, 784], 'float32', 'x')
label = hapi.Input([None, 1], 'int64', 'label') label = hapi.Input([None, 1], 'int64', 'label')
model = hapi.Model(Mnist(), input, label) model = hapi.Model(Mnist(), input, label)
optim = paddle.optimizer.SGD(learning_rate=1e-3, optim = paddle.optimizer.SGD(learning_rate=1e-3,
parameter_list=model.parameters()) parameter_list=model.parameters())
model.prepare(optim, model.prepare(optim, paddle.nn.CrossEntropyLoss())
paddle.nn.CrossEntropyLoss(),
hapi.metrics.Accuracy())
mnist_data = hapi.datasets.MNIST(mode='train', chw_format=False) mnist_data = hapi.datasets.MNIST(mode='train', chw_format=False)
model.fit(mnist_data, epochs=1, batch_size=32, verbose=0) model.fit(mnist_data, epochs=1, batch_size=32, verbose=0)
model.save('checkpoint/test') # save for training model.save('checkpoint/test') # save for training
model.save('inference_model', False) # save for inference model.save('inference_model', False) # save for inference
""" """
if ParallelEnv().local_rank == 0: if ParallelEnv().local_rank == 0:
...@@ -1534,47 +1532,6 @@ class Model(object): ...@@ -1534,47 +1532,6 @@ class Model(object):
Returns: Returns:
list: The fetch variables' name list list: The fetch variables' name list
Examples:
.. code-block:: python
import numpy as np
import paddle
from paddle.static import InputSpec
import paddle.incubate.hapi as hapi
from paddle.nn import Linear
from paddle.incubate.hapi.datasets.mnist import MNIST as MnistDataset
class Mnist(Layer):
def __init__(self, classifier_act=None):
super(Mnist, self).__init__()
self.fc = Linear(input_dim=784, output_dim=10, act="softmax")
@paddle.jit.to_static # In static mode, you need to delete this.
def forward(self, inputs):
outputs = self.fc(inputs)
return outputs
dynamic = True # False
device = hapi.set_device('gpu')
# if use static graph, do not set
paddle.disable_static(device) if dynamic else None
# inputs and labels are not required for dynamic graph.
input = InputSpec([None, 784], 'float32', 'x')
label = InputSpec([None, 1], 'int64', 'label')
model = hapi.Model(Mnist(), input, label)
optim = paddle.optimizer.SGD(learning_rate=1e-3,
parameter_list=model.parameters())
model.prepare(optim,
paddle.nn.CrossEntropyLoss(),
hapi.metrics.Accuracy())
mnist_data = hapi.datasets.MNIST(mode='train', chw_format=False)
model.fit(mnist_data, epochs=1, batch_size=32, verbose=0)
model.save_inference_model('inference_model')
""" """
def get_inout_spec(all_vars, return_name=False): def get_inout_spec(all_vars, return_name=False):
...@@ -1592,8 +1549,8 @@ class Model(object): ...@@ -1592,8 +1549,8 @@ class Model(object):
# the inputs of the model in running. # the inputs of the model in running.
# 3. Make it Unnecessary to add `@paddle.jit.to_static` for users in dynamic mode. # 3. Make it Unnecessary to add `@paddle.jit.to_static` for users in dynamic mode.
if fluid.in_dygraph_mode(): if fluid.in_dygraph_mode():
with fluid.framework._dygraph_guard(None):
layer = self.network layer = self.network
fluid.disable_dygraph()
# 1. input check # 1. input check
prog_translator = ProgramTranslator() prog_translator = ProgramTranslator()
...@@ -1631,7 +1588,8 @@ class Model(object): ...@@ -1631,7 +1588,8 @@ class Model(object):
if param_or_buffer.name in state_names_dict: if param_or_buffer.name in state_names_dict:
extra_info_dict['structured_name'] = state_names_dict[ extra_info_dict['structured_name'] = state_names_dict[
param_or_buffer.name] param_or_buffer.name]
extra_info_dict['stop_gradient'] = param_or_buffer.stop_gradient extra_info_dict[
'stop_gradient'] = param_or_buffer.stop_gradient
if isinstance(param_or_buffer, ParamBase): if isinstance(param_or_buffer, ParamBase):
extra_info_dict['trainable'] = param_or_buffer.trainable extra_info_dict['trainable'] = param_or_buffer.trainable
extra_var_info[param_or_buffer.name] = extra_info_dict extra_var_info[param_or_buffer.name] = extra_info_dict
......
...@@ -64,6 +64,11 @@ class TestTransforms(unittest.TestCase): ...@@ -64,6 +64,11 @@ class TestTransforms(unittest.TestCase):
self.do_transform(trans) self.do_transform(trans)
def test_normalize(self):
normalize = transforms.Normalize(mean=0.5, std=0.5)
trans = transforms.Compose([transforms.Permute(mode='CHW'), normalize])
self.do_transform(trans)
def test_trans_resize(self): def test_trans_resize(self):
trans = transforms.Compose([ trans = transforms.Compose([
transforms.Resize(300, [0, 1]), transforms.Resize(300, [0, 1]),
...@@ -165,7 +170,7 @@ class TestTransforms(unittest.TestCase): ...@@ -165,7 +170,7 @@ class TestTransforms(unittest.TestCase):
fake_img = np.random.rand(500, 400, 3).astype('float32') fake_img = np.random.rand(500, 400, 3).astype('float32')
fake_img_gray = trans_gray(fake_img) fake_img_gray = trans_gray(fake_img)
np.testing.assert_equal(len(fake_img_gray.shape), 2) np.testing.assert_equal(len(fake_img_gray.shape), 3)
np.testing.assert_equal(fake_img_gray.shape[0], 500) np.testing.assert_equal(fake_img_gray.shape[0], 500)
np.testing.assert_equal(fake_img_gray.shape[1], 400) np.testing.assert_equal(fake_img_gray.shape[1], 400)
......
...@@ -16,6 +16,7 @@ import sys ...@@ -16,6 +16,7 @@ import sys
import collections import collections
import random import random
import math import math
import functools
import cv2 import cv2
import numbers import numbers
...@@ -31,6 +32,23 @@ else: ...@@ -31,6 +32,23 @@ else:
__all__ = ['flip', 'resize', 'pad', 'rotate', 'to_grayscale'] __all__ = ['flip', 'resize', 'pad', 'rotate', 'to_grayscale']
def keepdims(func):
"""Keep the dimension of input images unchanged"""
@functools.wraps(func)
def wrapper(image, *args, **kwargs):
if len(image.shape) != 3:
raise ValueError("Expect image have 3 dims, but got {} dims".format(
len(image.shape)))
ret = func(image, *args, **kwargs)
if len(ret.shape) == 2:
ret = ret[:, :, np.newaxis]
return ret
return wrapper
@keepdims
def flip(image, code): def flip(image, code):
""" """
Accordding to the code (the type of flip), flip the input image Accordding to the code (the type of flip), flip the input image
...@@ -62,6 +80,7 @@ def flip(image, code): ...@@ -62,6 +80,7 @@ def flip(image, code):
return cv2.flip(image, flipCode=code) return cv2.flip(image, flipCode=code)
@keepdims
def resize(img, size, interpolation=cv2.INTER_LINEAR): def resize(img, size, interpolation=cv2.INTER_LINEAR):
""" """
resize the input data to given size resize the input data to given size
...@@ -103,6 +122,7 @@ def resize(img, size, interpolation=cv2.INTER_LINEAR): ...@@ -103,6 +122,7 @@ def resize(img, size, interpolation=cv2.INTER_LINEAR):
return cv2.resize(img, size[::-1], interpolation=interpolation) return cv2.resize(img, size[::-1], interpolation=interpolation)
@keepdims
def pad(img, padding, fill=(0, 0, 0), padding_mode='constant'): def pad(img, padding, fill=(0, 0, 0), padding_mode='constant'):
"""Pads the given CV Image on all sides with speficified padding mode and fill value. """Pads the given CV Image on all sides with speficified padding mode and fill value.
...@@ -193,6 +213,7 @@ def pad(img, padding, fill=(0, 0, 0), padding_mode='constant'): ...@@ -193,6 +213,7 @@ def pad(img, padding, fill=(0, 0, 0), padding_mode='constant'):
return img return img
@keepdims
def rotate(img, def rotate(img,
angle, angle,
interpolation=cv2.INTER_LINEAR, interpolation=cv2.INTER_LINEAR,
...@@ -266,6 +287,7 @@ def rotate(img, ...@@ -266,6 +287,7 @@ def rotate(img,
return dst.astype(dtype) return dst.astype(dtype)
@keepdims
def to_grayscale(img, num_output_channels=1): def to_grayscale(img, num_output_channels=1):
"""Converts image to grayscale version of image. """Converts image to grayscale version of image.
......
...@@ -505,7 +505,7 @@ class Normalize(object): ...@@ -505,7 +505,7 @@ class Normalize(object):
mean = [mean, mean, mean] mean = [mean, mean, mean]
if isinstance(std, numbers.Number): if isinstance(std, numbers.Number):
mean = [std, std, std] std = [std, std, std]
self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1) self.mean = np.array(mean, dtype=np.float32).reshape(len(mean), 1, 1)
self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1) self.std = np.array(std, dtype=np.float32).reshape(len(std), 1, 1)
......
...@@ -97,8 +97,20 @@ from .layer.common import Dropout #DEFINE_ALIAS ...@@ -97,8 +97,20 @@ from .layer.common import Dropout #DEFINE_ALIAS
from .layer.common import Dropout2D #DEFINE_ALIAS from .layer.common import Dropout2D #DEFINE_ALIAS
from .layer.common import Dropout3D #DEFINE_ALIAS from .layer.common import Dropout3D #DEFINE_ALIAS
from .layer.common import AlphaDropout #DEFINE_ALIAS from .layer.common import AlphaDropout #DEFINE_ALIAS
from .layer.pooling import AvgPool1d #DEFINE_ALIAS
from .layer.pooling import AvgPool2d #DEFINE_ALIAS
from .layer.pooling import AvgPool3d #DEFINE_ALIAS
from .layer.pooling import MaxPool1d #DEFINE_ALIAS
from .layer.pooling import MaxPool2d #DEFINE_ALIAS
from .layer.pooling import MaxPool3d #DEFINE_ALIAS
from .layer.pooling import AdaptiveAvgPool1d #DEFINE_ALIAS
from .layer.pooling import AdaptiveAvgPool2d #DEFINE_ALIAS from .layer.pooling import AdaptiveAvgPool2d #DEFINE_ALIAS
from .layer.pooling import AdaptiveAvgPool3d #DEFINE_ALIAS from .layer.pooling import AdaptiveAvgPool3d #DEFINE_ALIAS
from .layer.pooling import AdaptiveMaxPool1d #DEFINE_ALIAS
from .layer.pooling import AdaptiveMaxPool2d #DEFINE_ALIAS
from .layer.pooling import AdaptiveMaxPool3d #DEFINE_ALIAS
from .layer.conv import Conv1d #DEFINE_ALIAS from .layer.conv import Conv1d #DEFINE_ALIAS
from .layer.conv import Conv2d #DEFINE_ALIAS from .layer.conv import Conv2d #DEFINE_ALIAS
from .layer.conv import Conv3d #DEFINE_ALIAS from .layer.conv import Conv3d #DEFINE_ALIAS
......
...@@ -170,22 +170,28 @@ from .norm import layer_norm #DEFINE_ALIAS ...@@ -170,22 +170,28 @@ from .norm import layer_norm #DEFINE_ALIAS
from .norm import lrn #DEFINE_ALIAS from .norm import lrn #DEFINE_ALIAS
from .norm import normalize #DEFINE_ALIAS from .norm import normalize #DEFINE_ALIAS
# from .norm import spectral_norm #DEFINE_ALIAS # from .norm import spectral_norm #DEFINE_ALIAS
from .pooling import max_pool1d #DEFINE_ALIAS
from .pooling import avg_pool1d #DEFINE_ALIAS
from .pooling import adaptive_max_pool1d #DEFINE_ALIAS
from .pooling import adaptive_avg_pool1d #DEFINE_ALIAS
from .pooling import pool2d #DEFINE_ALIAS from .pooling import pool2d #DEFINE_ALIAS
from .pooling import pool3d #DEFINE_ALIAS from .pooling import pool3d #DEFINE_ALIAS
from .pooling import avg_pool1d #DEFINE_ALIAS
from .pooling import adaptive_pool2d #DEFINE_ALIAS from .pooling import adaptive_pool2d #DEFINE_ALIAS
from .pooling import adaptive_pool3d #DEFINE_ALIAS from .pooling import adaptive_pool3d #DEFINE_ALIAS
from .rnn import rnn #DEFINE_ALIAS
from .rnn import birnn #DEFINE_ALIAS
from .pooling import avg_pool2d #DEFINE_ALIAS from .pooling import avg_pool2d #DEFINE_ALIAS
from .pooling import max_pool2d #DEFINE_ALIAS
from .pooling import avg_pool3d #DEFINE_ALIAS from .pooling import avg_pool3d #DEFINE_ALIAS
from .pooling import max_pool1d #DEFINE_ALIAS
from .pooling import max_pool2d #DEFINE_ALIAS
from .pooling import max_pool3d #DEFINE_ALIAS from .pooling import max_pool3d #DEFINE_ALIAS
from .pooling import adaptive_pool2d #DEFINE_ALIAS
from .pooling import adaptive_pool3d #DEFINE_ALIAS
from .pooling import adaptive_max_pool1d #DEFINE_ALIAS
from .pooling import adaptive_max_pool2d #DEFINE_ALIAS
from .pooling import adaptive_max_pool3d #DEFINE_ALIAS
from .pooling import adaptive_avg_pool1d #DEFINE_ALIAS
from .pooling import adaptive_avg_pool2d #DEFINE_ALIAS from .pooling import adaptive_avg_pool2d #DEFINE_ALIAS
from .pooling import adaptive_avg_pool3d #DEFINE_ALIAS from .pooling import adaptive_avg_pool3d #DEFINE_ALIAS
from .rnn import rnn #DEFINE_ALIAS
from .rnn import birnn #DEFINE_ALIAS
# from .rnn import gru_unit #DEFINE_ALIAS # from .rnn import gru_unit #DEFINE_ALIAS
# from .rnn import lstm #DEFINE_ALIAS # from .rnn import lstm #DEFINE_ALIAS
# from .rnn import lstm_unit #DEFINE_ALIAS # from .rnn import lstm_unit #DEFINE_ALIAS
......
...@@ -158,7 +158,7 @@ def conv1d(x, ...@@ -158,7 +158,7 @@ def conv1d(x,
bias (Tensor, optional): The bias with shape [M,]. Default: None. bias (Tensor, optional): The bias with shape [M,]. Default: None.
stride (int or tuple, optional): The stride size. If stride is a tuple, it must stride (int or tuple, optional): The stride size. If stride is a tuple, it must
contain one integers, (stride_size). Default: 1. contain one integers, (stride_size). Default: 1.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. padding(int|str|tuple|list, optional): The padding size. Padding could be in one of the following forms.
1. a string in ['valid', 'same']. 1. a string in ['valid', 'same'].
2. an int, which means the feature map is zero paded by size of `padding` on both sides. 2. an int, which means the feature map is zero paded by size of `padding` on both sides.
3. a list[int] or tuple[int] whose length is 1, which means the feature map is zero paded by size of `padding[0]` on both sides. 3. a list[int] or tuple[int] whose length is 1, which means the feature map is zero paded by size of `padding[0]` on both sides.
...@@ -185,7 +185,7 @@ def conv1d(x, ...@@ -185,7 +185,7 @@ def conv1d(x,
same with input. same with input.
Raises: Raises:
ValueError: If the channel dimmention of the input is less than or equal to zero. ValueError: If the channel dimension of the input is less than or equal to zero.
ValueError: If `data_format` is not "NCL" or "NLC". ValueError: If `data_format` is not "NCL" or "NLC".
ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0
...@@ -238,7 +238,7 @@ def conv1d(x, ...@@ -238,7 +238,7 @@ def conv1d(x,
num_channels = x.shape[channel_dim] num_channels = x.shape[channel_dim]
num_filters = weight.shape[0] num_filters = weight.shape[0]
if num_channels < 0: if num_channels < 0:
raise ValueError("The channel dimmention of the input({}) " raise ValueError("The channel dimension of the input({}) "
"should be defined. Received: {}.".format( "should be defined. Received: {}.".format(
x.shape, num_channels)) x.shape, num_channels))
if num_channels % groups != 0: if num_channels % groups != 0:
...@@ -260,7 +260,7 @@ def conv1d(x, ...@@ -260,7 +260,7 @@ def conv1d(x,
padding = padding + [0] padding = padding + [0]
else: else:
raise ValueError( raise ValueError(
"The size of padding's dimmention should 1 or 2. But got padding={}". "The size of padding's dimension should be 1 or 2. But got padding={}".
format(padding)) format(padding))
stride = utils.convert_to_list(stride, 1, 'stride') + [1] stride = utils.convert_to_list(stride, 1, 'stride') + [1]
...@@ -424,7 +424,7 @@ def conv2d(x, ...@@ -424,7 +424,7 @@ def conv2d(x,
Raises: Raises:
ValueError: If `data_format` is not "NCHW" or "NHWC". ValueError: If `data_format` is not "NCHW" or "NHWC".
ValueError: If the channel dimmention of the input is less than or equal to zero. ValueError: If the channel dimension of the input is less than or equal to zero.
ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0
or the element corresponding to the input's channel is not 0. or the element corresponding to the input's channel is not 0.
...@@ -465,7 +465,7 @@ def conv2d(x, ...@@ -465,7 +465,7 @@ def conv2d(x,
num_channels = x.shape[channel_dim] num_channels = x.shape[channel_dim]
num_filters = weight.shape[0] num_filters = weight.shape[0]
if num_channels < 0: if num_channels < 0:
raise ValueError("The channel dimmention of the input({}) " raise ValueError("The channel dimension of the input({}) "
"should be defined. Received: {}.".format( "should be defined. Received: {}.".format(
x.shape, num_channels)) x.shape, num_channels))
if num_channels % groups != 0: if num_channels % groups != 0:
...@@ -710,7 +710,7 @@ def conv_transpose1d(x, ...@@ -710,7 +710,7 @@ def conv_transpose1d(x,
num_channels = x.shape[channel_dim] num_channels = x.shape[channel_dim]
if num_channels < 0: if num_channels < 0:
raise ValueError("The channel dimmention of the input({}) " raise ValueError("The channel dimension of the input({}) "
"should be defined. Received: {}.".format( "should be defined. Received: {}.".format(
x.shape, num_channels)) x.shape, num_channels))
if num_channels % groups != 0: if num_channels % groups != 0:
...@@ -728,7 +728,7 @@ def conv_transpose1d(x, ...@@ -728,7 +728,7 @@ def conv_transpose1d(x,
padding = padding + [0] padding = padding + [0]
else: else:
raise ValueError( raise ValueError(
"The size of padding's dimmention should 1 or 2. But got padding={}". "The size of padding's dimension should 1 or 2. But got padding={}".
format(padding)) format(padding))
stride = utils.convert_to_list(stride, 1, 'stride') + [1] stride = utils.convert_to_list(stride, 1, 'stride') + [1]
...@@ -807,10 +807,10 @@ def conv_transpose2d(x, ...@@ -807,10 +807,10 @@ def conv_transpose2d(x,
stride=1, stride=1,
padding=0, padding=0,
output_padding=0, output_padding=0,
groups=1,
dilation=1, dilation=1,
data_format='NCHW', groups=1,
output_size=None, output_size=None,
data_format='NCHW',
name=None): name=None):
""" """
...@@ -883,28 +883,27 @@ def conv_transpose2d(x, ...@@ -883,28 +883,27 @@ def conv_transpose2d(x,
stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution. stride(int|list|tuple, optional): The stride size. It means the stride in transposed convolution.
If stride is a tuple, it must contain two integers, (stride_height, stride_width). If stride is a tuple, it must contain two integers, (stride_height, stride_width).
Otherwise, stride_height = stride_width = stride. Default: stride = 1. Otherwise, stride_height = stride_width = stride. Default: stride = 1.
padding(int|list|str|tuple, optional): The padding size. The padding argument effectively adds padding(str|int|list|tuple, optional): The padding size. It means the number of zero-paddings
`dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a on both sides for each dimension. If `padding` is a string, either 'VALID' or
string, either 'VALID' or 'SAME' supported, which is the padding algorithm. 'SAME' which is the padding algorithm. If padding size is a tuple or list,
If `padding` is a tuple or list, it could be in three forms: it could be in three forms: `[pad_height, pad_width]` or
`[pad_height, pad_width]` or `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and and when `data_format` is `"NCHW"`, `pool_padding` can be in the form
when `data_format` is `'NCHW'`, `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
`padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. when `data_format` is `"NHWC"`, `pool_padding` can be in the form
when `data_format` is `'NHWC'`, `padding` can be in the form
`[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
Default: padding = 0. Default: padding = 0.
output_padding(int|list|tuple, optional): Additional size added to one side output_padding(int|list|tuple, optional): Additional size added to one side
of each dimension in the output shape. Default: 0. of each dimension in the output shape. Default: 0.
dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width).
Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1.
groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by groups(int, optional): The groups number of the Conv2d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels. filters is only connected to the second half of the input channels.
Default: groups = 1. Default: groups = 1.
dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
If dilation is a tuple, it must contain two integers, (dilation_height, dilation_width).
Otherwise, dilation_height = dilation_width = dilation. Default: dilation = 1.
output_size(int|tuple|list, optional): The output image size. If output size is a output_size(int|tuple|list, optional): The output image size. If output size is a
tuple, it must contain two integers, (image_height, image_width). None if use tuple, it must contain two integers, (image_height, image_width). None if use
filter_size, padding, and stride to calculate output_size. filter_size, padding, and stride to calculate output_size.
...@@ -950,7 +949,7 @@ def conv_transpose2d(x, ...@@ -950,7 +949,7 @@ def conv_transpose2d(x,
paddle.disable_static() paddle.disable_static()
x_var = paddle.to_tensor(x) x_var = paddle.to_tensor(x)
w_var = paddle.to_tensor(w) w_var = paddle.to_tensor(w)
y_var = F.conv2d_transpose(x_var, w_var) y_var = F.conv_transpose2d(x_var, w_var)
y_np = y_var.numpy() y_np = y_var.numpy()
print(y_np.shape) print(y_np.shape)
...@@ -966,7 +965,7 @@ def conv_transpose2d(x, ...@@ -966,7 +965,7 @@ def conv_transpose2d(x,
channel_dim = -1 if channel_last else 1 channel_dim = -1 if channel_last else 1
num_channels = x.shape[channel_dim] num_channels = x.shape[channel_dim]
if num_channels < 0: if num_channels < 0:
raise ValueError("The channel dimmention of the input({}) " raise ValueError("The channel dimension of the input({}) "
"should be defined. Received: {}.".format( "should be defined. Received: {}.".format(
x.shape, num_channels)) x.shape, num_channels))
if num_channels % groups != 0: if num_channels % groups != 0:
...@@ -1147,7 +1146,7 @@ def conv3d(x, ...@@ -1147,7 +1146,7 @@ def conv3d(x,
Raises: Raises:
ValueError: If `data_format` is not "NCDHW" or "NDHWC". ValueError: If `data_format` is not "NCDHW" or "NDHWC".
ValueError: If the channel dimmention of the input is less than or equal to zero. ValueError: If the channel dimension of the input is less than or equal to zero.
ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0 ValueError: If `padding` is a tuple, but the element corresponding to the input's batch size is not 0
or the element corresponding to the input's channel is not 0. or the element corresponding to the input's channel is not 0.
...@@ -1160,19 +1159,17 @@ def conv3d(x, ...@@ -1160,19 +1159,17 @@ def conv3d(x,
Examples: Examples:
.. code-block:: python .. code-block:: python
from paddle import fluid
import paddle.nn.functional as F
import paddle.fluid.dygraph as dg
import numpy as np import numpy as np
import paddle
import paddle.nn.functional as F
x = np.random.randn(2, 3, 8, 8, 8).astype(np.float32) x = np.random.randn(2, 3, 8, 8, 8).astype(np.float32)
w = np.random.randn(6, 3, 3, 3, 3).astype(np.float32) w = np.random.randn(6, 3, 3, 3, 3).astype(np.float32)
place = fluid.CPUPlace() paddle.disable_static()
with dg.guard(place): x_var = paddle.to_tensor(x)
x_var = dg.to_variable(x) w_var = paddle.to_tensor(w)
w_var = dg.to_variable(w) y_var = F.conv3d(x_var, w_var)
y_var = F.conv3d(x_var, w_var, act="relu")
y_np = y_var.numpy() y_np = y_var.numpy()
print(y_np.shape) print(y_np.shape)
...@@ -1190,7 +1187,7 @@ def conv3d(x, ...@@ -1190,7 +1187,7 @@ def conv3d(x,
num_filters = weight.shape[0] num_filters = weight.shape[0]
if num_channels < 0: if num_channels < 0:
raise ValueError( raise ValueError(
"The channel dimmention of the input({}) should be defined. " "The channel dimension of the input({}) should be defined. "
"Received: {}.".format(x.shape, num_channels)) "Received: {}.".format(x.shape, num_channels))
if num_channels % groups != 0: if num_channels % groups != 0:
raise ValueError( raise ValueError(
...@@ -1260,8 +1257,8 @@ def conv_transpose3d(x, ...@@ -1260,8 +1257,8 @@ def conv_transpose3d(x,
output_padding=0, output_padding=0,
groups=1, groups=1,
dilation=1, dilation=1,
data_format='NCDHW',
output_size=None, output_size=None,
data_format='NCDHW',
name=None): name=None):
""" """
The convolution3d transpose layer calculates the output based on the input, The convolution3d transpose layer calculates the output based on the input,
...@@ -1338,37 +1335,37 @@ def conv_transpose3d(x, ...@@ -1338,37 +1335,37 @@ def conv_transpose3d(x,
If stride is a tuple, it must contain three integers, (stride_depth, stride_height, If stride is a tuple, it must contain three integers, (stride_depth, stride_height,
stride_width). Otherwise, stride_depth = stride_height = stride_width = stride. stride_width). Otherwise, stride_depth = stride_height = stride_width = stride.
Default: stride = 1. Default: stride = 1.
padding(int|list|str|tuple, optional): The padding size. The padding argument effectively padding (string|int|list|tuple, optional): The padding size. It means the number of zero-paddings
adds `dilation * (kernel - 1)` amount of zero-padding on both sides of input. If `padding` is a string, on both sides for each dimension. If `padding` is a string, either 'VALID' or
either 'VALID' or 'SAME' supported, which is the padding algorithm. If `padding` 'SAME' which is the padding algorithm. If padding size is a tuple or list,
is a tuple or list, it could be in three forms: `[pad_depth, pad_height, pad_width]` or it could be in three forms: `[pad_depth, pad_height, pad_width]` or
`[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
and when `data_format` is `'NCDHW'`, `padding` can be in the form and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
`[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `'NDHWC'`, `padding` can be in the form when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
Default: padding = 0. Default: padding = 0.
output_padding(int|list|tuple, optional): Additional size added to one side output_padding(int|list|tuple, optional): Additional size added to one side
of each dimension in the output shape. Default: 0. of each dimension in the output shape. Default: 0.
dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height,
dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation.
Default: dilation = 1.
groups(int, optional): The groups number of the Conv3d transpose layer. Inspired by groups(int, optional): The groups number of the Conv3d transpose layer. Inspired by
grouped convolution in Alex Krizhevsky's Deep CNN paper, in which grouped convolution in Alex Krizhevsky's Deep CNN paper, in which
when group=2, the first half of the filters is only connected to the when group=2, the first half of the filters is only connected to the
first half of the input channels, while the second half of the first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels. filters is only connected to the second half of the input channels.
Default: groups=1 Default: groups=1
data_format (str, optional): Specify the data format of the input, and the data format of the output dilation(int|list|tuple, optional): The dilation size. It means the spacing between the kernel points.
will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`. If dilation is a tuple, it must contain three integers, (dilation_depth, dilation_height,
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: dilation_width). Otherwise, dilation_depth = dilation_height = dilation_width = dilation.
`[batch_size, input_channels, input_height, input_width]`. Default: dilation = 1.
output_size(int|list|tuple, optional): The output image size. If output size is a output_size(int|list|tuple, optional): The output image size. If output size is a
tuple, it must contain three integers, (image_depth, image_height, image_width). This tuple, it must contain three integers, (image_depth, image_height, image_width). This
parameter only works when filter_size is None. If output_size and filter_size are parameter only works when filter_size is None. If output_size and filter_size are
specified at the same time, They should follow the formula above. Default: None. specified at the same time, They should follow the formula above. Default: None.
Output_size and filter_size should not be None at the same time. Output_size and filter_size should not be None at the same time.
data_format (str, optional): Specify the data format of the input, and the data format of the output
will be consistent with that of the input. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
...@@ -1425,7 +1422,7 @@ def conv_transpose3d(x, ...@@ -1425,7 +1422,7 @@ def conv_transpose3d(x,
num_filters = weight.shape[1] num_filters = weight.shape[1]
if num_channels < 0: if num_channels < 0:
raise ValueError( raise ValueError(
"The channel dimmention of the input({}) should be defined. " "The channel dimension of the input({}) should be defined. "
"Received: {}.".format(x.shape, num_channels)) "Received: {}.".format(x.shape, num_channels))
if num_channels % groups != 0: if num_channels % groups != 0:
raise ValueError( raise ValueError(
......
...@@ -784,30 +784,30 @@ def kl_div(input, label, reduction='mean', name=None): ...@@ -784,30 +784,30 @@ def kl_div(input, label, reduction='mean', name=None):
import numpy as np import numpy as np
import paddle.nn.functional as F import paddle.nn.functional as F
paddle.enable_imperative() paddle.disable_static()
shape = (5, 20) shape = (5, 20)
input = np.random.uniform(-10, 10, shape).astype('float32') input = np.random.uniform(-10, 10, shape).astype('float32')
target = np.random.uniform(-10, 10, shape).astype('float32') target = np.random.uniform(-10, 10, shape).astype('float32')
# 'batchmean' reduction, loss shape will be [N] # 'batchmean' reduction, loss shape will be [N]
pred_loss = F.kl_div(paddle.to_variable(input), pred_loss = F.kl_div(paddle.to_tensor(input),
paddle.to_variable(target), reduction='batchmean') paddle.to_tensor(target), reduction='batchmean')
# shape=[5] # shape=[5]
# 'mean' reduction, loss shape will be [1] # 'mean' reduction, loss shape will be [1]
pred_loss = F.kl_div(paddle.to_variable(input), pred_loss = F.kl_div(paddle.to_tensor(input),
paddle.to_variable(target), reduction='mean') paddle.to_tensor(target), reduction='mean')
# shape=[1] # shape=[1]
# 'sum' reduction, loss shape will be [1] # 'sum' reduction, loss shape will be [1]
pred_loss = F.kl_div(paddle.to_variable(input), pred_loss = F.kl_div(paddle.to_tensor(input),
paddle.to_variable(target), reduction='sum') paddle.to_tensor(target), reduction='sum')
# shape=[1] # shape=[1]
# 'none' reduction, loss shape is same with input shape # 'none' reduction, loss shape is same with input shape
pred_loss = F.kl_div(paddle.to_variable(input), pred_loss = F.kl_div(paddle.to_tensor(input),
paddle.to_variable(target), reduction='none') paddle.to_tensor(target), reduction='none')
# shape=[5, 20] # shape=[5, 20]
""" """
......
...@@ -18,124 +18,146 @@ from ...fluid.layers import pool3d #DEFINE_ALIAS ...@@ -18,124 +18,146 @@ from ...fluid.layers import pool3d #DEFINE_ALIAS
from ...fluid.layers import adaptive_pool2d #DEFINE_ALIAS from ...fluid.layers import adaptive_pool2d #DEFINE_ALIAS
from ...fluid.layers import adaptive_pool3d #DEFINE_ALIAS from ...fluid.layers import adaptive_pool3d #DEFINE_ALIAS
from ...fluid import core from ...fluid import core
from ...fluid.framework import in_dygraph_mode, convert_np_dtype_to_dtype_ from ...fluid.framework import in_dygraph_mode
from ...fluid.layers import utils, LayerHelper from ...fluid.layers import utils, LayerHelper, unsqueeze, squeeze
from ...fluid.data_feeder import check_type, check_variable_and_dtype, check_type, check_dtype, convert_dtype from ...fluid.data_feeder import check_type, check_variable_and_dtype
from ...fluid.layers import unsqueeze, squeeze
__all__ = [ __all__ = [
'pool2d', 'pool2d',
'pool3d', 'pool3d',
'adaptive_pool2d',
'adaptive_pool3d',
'avg_pool1d', 'avg_pool1d',
'avg_pool2d',
'avg_pool3d',
'max_pool1d', 'max_pool1d',
'max_pool2d',
'max_pool3d',
'adaptive_avg_pool1d', 'adaptive_avg_pool1d',
'adaptive_max_pool1d',
'adaptive_avg_pool2d', 'adaptive_avg_pool2d',
'adaptive_avg_pool3d', 'adaptive_avg_pool3d',
'adaptive_pool2d', 'adaptive_max_pool1d',
'adaptive_pool3d', 'adaptive_max_pool2d',
'max_pool2d', 'adaptive_max_pool3d',
'avg_pool2d',
'max_pool3d',
'avg_pool3d',
] ]
def check_input(x, dimension): def _is_list_or_tuple(input):
return isinstance(input, (list, tuple))
def _check_input(x, dimension):
if len(x.shape) != dimension: if len(x.shape) != dimension:
raise ValueError("Excepted Input X is 3-D tensor, but received {}-D {}". raise ValueError(
format(len(x.shape), type(x))) "Excepted Input X is {}-D tensor, but received {}-D {}".format(
dimension, len(x.shape), type(x)))
def check_instance(x, x_name, types=(int, float)): def _check_instance(x, x_name, types=(int, float)):
if not isinstance(x, types): if not isinstance(x, types):
raise ValueError("Excepted {} type for {} but received type: {}. ". raise ValueError("Excepted {} type for {} but received type: {}. ".
format(types, x_name, type(x))) format(types, x_name, type(x)))
def update_padding1d(padding, pool_type='avg'): def _zero_padding_in_batch_and_channel(padding, channel_last):
def is_list_or_tuple(ele): if channel_last:
if isinstance(ele, list) or isinstance(ele, tuple): return list(padding[0]) == [0, 0] and list(padding[-1]) == [0, 0]
return True
return False
if is_list_or_tuple(padding):
if padding.__len__() == 1 and not is_list_or_tuple(padding[0]):
return [0, padding[0]]
else:
raise ValueError(
"{}_pool1d() argument 'padding' should contain one int (got {})".
format(pool_type, padding.__len__()))
else: else:
padding = [0, padding] return list(padding[0]) == [0, 0] and list(padding[1]) == [0, 0]
return padding
def _exclude_padding_in_batch_and_channel(padding, channel_last):
padding_ = padding[1:-1] if channel_last else padding[2:]
padding_ = [elem for pad_a_dim in padding_ for elem in pad_a_dim]
return padding_
def update_padding2d(padding, data_format):
def is_list_or_tuple(ele):
if isinstance(ele, list) or isinstance(ele, tuple):
return True
return False
if is_list_or_tuple(padding) and len(padding) == 4: def _channel_last(data_format, num_dims):
if is_list_or_tuple(padding[0]) and (data_format == "NCHW"): if num_dims == 1:
if not (padding[0] == [0, 0] and padding[1] == [0, 0]): if data_format not in ['NCL', 'NLC']:
raise ValueError( raise ValueError(
"Non-zero pool_padding(%s) in the batch or channel dimensions " "Attr(data_format) should be 'NCL' or 'NLC'. Received "
"is not supported." % str(padding)) "Attr(data_format): %s" % str(data_format))
padding = padding[2:4] else:
padding = [ele for a_list in padding for ele in a_list] return True if data_format == "NLC" else False
elif is_list_or_tuple(padding[0]) and (data_format == "NHWC"): if num_dims == 2:
if not (padding[0] == [0, 0] and padding[3] == [0, 0]): if data_format not in ['NCHW', 'NHWC']:
raise ValueError( raise ValueError(
"Non-zero pool_padding(%s) in the batch or channel dimensions " "Attr(data_format) should be 'NCHW' or 'NHWC'. Received "
"is not supported." % str(padding)) "Attr(data_format): %s" % str(data_format))
padding = padding[1:3]
padding = [ele for a_list in padding for ele in a_list]
padding = utils.convert_to_list(padding, 4, 'padding')
if utils._is_symmetric_padding(padding, 2):
padding = [padding[0], padding[2]]
else: else:
padding = utils.convert_to_list(padding, 2, 'padding') return True if data_format == "NHWC" else False
if num_dims == 3:
return padding if data_format not in ['NCDHW', 'NDHWC']:
raise ValueError(
"Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
"Attr(data_format): %s" % str(data_format))
else:
return True if data_format == "NDHWC" else False
def update_padding3d(padding, data_format):
def is_list_or_tuple(ele):
if isinstance(ele, (list, tuple)):
return True
return False
if is_list_or_tuple(padding) and len(padding) == 5: def _update_padding_nd(padding, num_dims, channel_last=False, ceil_mode=False):
if is_list_or_tuple(padding[0]) and (data_format == "NCDHW"): if isinstance(padding, str):
if not (padding[0] == [0, 0] and padding[1] == [0, 0]): padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError( raise ValueError(
"Non-zero pool_padding(%s) in the batch or channel dimensions " "Unknown padding: '{}'. It can only be 'SAME' or 'VALID'.".
"is not supported." % str(padding)) format(padding))
padding = padding[2:5] if padding == "VALID":
padding = [ele for a_list in padding for ele in a_list] if ceil_mode != False:
elif is_list_or_tuple(padding[0]) and (data_format == "NDHWC"):
if not (padding[0] == [0, 0] and padding[4] == [0, 0]):
raise ValueError( raise ValueError(
"Non-zero pool_padding(%s) in the batch or channel dimensions " "When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. "
"is not supported." % str(padding)) "Received ceil_mode: True.")
padding = padding[1:4]
padding = [ele for a_list in padding for ele in a_list] padding_algorithm = "VALID"
padding = utils.convert_to_list(padding, 6, 'padding') padding = [0] * num_dims
if utils._is_symmetric_padding(padding, 3): else:
padding = [padding[0], padding[2], padding[4]] padding_algorithm = "SAME"
padding = [0] * num_dims
elif is_list_or_tuple(padding) and len(padding) == 6: elif _is_list_or_tuple(padding):
padding = utils.convert_to_list(padding, 6, 'padding') # for padding like
if utils._is_symmetric_padding(padding, 3): # [(pad_before, pad_after), (pad_before, pad_after), ...]
padding = [padding[0], padding[2], padding[4]] # padding for batch_dim and channel_dim included
if len(padding) == 2 + num_dims and _is_list_or_tuple(padding[0]):
if not _zero_padding_in_batch_and_channel(padding, channel_last):
raise ValueError(
"Non-zero padding({}) in the batch or channel dimensions "
"is not supported.".format(padding))
padding_algorithm = "EXPLICIT"
padding = _exclude_padding_in_batch_and_channel(padding,
channel_last)
if utils._is_symmetric_padding(padding, num_dims):
padding = padding[0::2]
# for padding like [pad_before, pad_after, pad_before, pad_after, ...]
elif len(padding) == 2 * num_dims and isinstance(padding[0], int):
padding_algorithm = "EXPLICIT"
padding = utils.convert_to_list(padding, 2 * num_dims, 'padding')
if utils._is_symmetric_padding(padding, num_dims):
padding = padding[0::2]
# for padding like [pad_d1, pad_d2, ...]
elif len(padding) == num_dims and isinstance(padding[0], int):
padding_algorithm = "EXPLICIT"
padding = utils.convert_to_list(padding, num_dims, 'padding')
else:
raise ValueError("Invalid padding: {}".format(padding))
# for integer padding
else: else:
padding = utils.convert_to_list(padding, 3, 'padding') padding_algorithm = "EXPLICIT"
padding = utils.convert_to_list(padding, num_dims, 'padding')
return padding, padding_algorithm
def _expand_low_nd_padding(padding):
#1d to 2d fake input
if len(padding) == 2:
padding = [0] * 2 + padding
elif len(padding) == 1:
padding = [0] + padding
else:
raise ValueError(
"The size of padding's dimmention should be 1 or 2. But got padding={}".
format(padding))
return padding return padding
...@@ -147,72 +169,56 @@ def avg_pool1d(x, ...@@ -147,72 +169,56 @@ def avg_pool1d(x,
ceil_mode=False, ceil_mode=False,
name=None): name=None):
""" """
This API implements average pooling 1d operation,
This operation applies a 1D average pooling over an input signal composed See more details in :ref:`api_nn_pooling_AvgPool1d` .
of several input planes, based on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
The output value of the layer with input size (N, C, L),
output (N, C, L_{out}) and kernel_size k can be precisely described as
For average pool1d:
.. math::
Output(N_i, C_i, l) &= mean(Input[N_i, C_i, stride \times l:stride \times l+k])
Args: Args:
x (Tensor): The input tensor of pooling operator which is a 3-D tensor with x (Tensor): The input tensor of pooling operator which is a 3-D tensor with
shape [N, C, L]. where `N` is batch size, `C` is the number of channels, shape [N, C, L]. where `N` is batch size, `C` is the number of channels,
`L` is the length of the feature. The data type if float32 or float64. `L` is the length of the feature. The data type is float32 or float64.
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one integers. it must contain an integer.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain one integers. it must contain an integer.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
'SAME' which is the padding algorithm. If pool padding size is a tuple or list, 1. A string in ['valid', 'same'].
it could be the following forms: `[pad_left, pad_right]`. If padding is non-zero, 2. An int, which means the feature map is zero padded by size of `padding` on every sides.
then the input is implicitly zero-padded on both sides for padding number of points. 3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
count_include_pad (bool): Whether to exclude padding points in average pooling count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is `true`. mode, default is `True`.
ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width. ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width.
If it is set to False, the floor function will be used. Default False If it is set to False, the floor function will be used. The default value is False.
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: Returns:
Tensor: The output tensor of pooling result. The data type is same as input tensor. Tensor: The output tensor of pooling result. The data type is same as input tensor.
Raises: Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True. ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ValueError: If `padding` is a list or tuple but its length greater than 1. ValueError: If `padding` is a list or tuple but its length is greater than 1.
ShapeError: If the input is not a 3-D. ShapeError: If the input is not a 3-D tensor.
ShapeError: If the output's shape calculated is not greater than 0. ShapeError: If the output's shape calculated is not greater than 0.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
import paddle.nn.functional as F import paddle.nn.functional as F
paddle.disable_static() paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
pool_out = F.avg_pool1d(data, kernel_size=2, stride=2, padding=0) out = F.avg_pool1d(data, kernel_size=2, stride=2, padding=0)
# pool_out shape: [1, 3, 16] # out shape: [1, 3, 16]
""" """
"""NCL to NCHW""" """NCL to NCHW"""
data_format = "NCHW" data_format = "NCHW"
check_variable_and_dtype(x, 'input', ['float32', 'float64'], 'avg_pool1d') check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool1d')
check_input(x, 3) _check_input(x, 3)
x = unsqueeze(x, [2]) x = unsqueeze(x, [2])
kernel_size = utils.convert_to_list(kernel_size, 1, 'pool_size') kernel_size = utils.convert_to_list(kernel_size, 1, 'kernel_size')
kernel_size = [1] + kernel_size kernel_size = [1] + kernel_size
if stride is None: if stride is None:
stride = kernel_size stride = kernel_size
...@@ -220,33 +226,20 @@ def avg_pool1d(x, ...@@ -220,33 +226,20 @@ def avg_pool1d(x,
stride = utils.convert_to_list(stride, 1, 'pool_stride') stride = utils.convert_to_list(stride, 1, 'pool_stride')
stride = [1] + stride stride = [1] + stride
padding_algorithm = "EXPLICIT" channel_last = _channel_last("NCL", 1)
if isinstance(padding, str): padding, padding_algorithm = _update_padding_nd(
padding = padding.upper() padding, 1, channel_last=channel_last, ceil_mode=ceil_mode)
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(padding))
if padding == "VALID":
padding_algorithm = "VALID"
padding = [0]
if ceil_mode != False:
raise ValueError(
"When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. "
"Received ceil_mode: True.")
elif padding == "SAME":
padding_algorithm = "SAME"
padding = [0]
padding = update_padding1d(padding, "avg") # use 2d to implenment 1d should expand padding in advance.
padding = _expand_low_nd_padding(padding)
if in_dygraph_mode(): if in_dygraph_mode():
output = core.ops.pool2d( output = core.ops.pool2d(
x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling', x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
False, 'strides', stride, 'paddings', padding, 'padding_algorithm', False, 'strides', stride, 'paddings', padding, 'padding_algorithm',
padding_algorithm, 'use_cudnn', not count_include_pad, 'ceil_mode', padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
ceil_mode, 'use_mkldnn', False, 'exclusive', True, 'data_format', 'use_mkldnn', False, 'exclusive', not count_include_pad,
data_format) 'data_format', data_format)
return squeeze(output, [2]) return squeeze(output, [2])
op_type = 'pool2d' op_type = 'pool2d'
...@@ -275,126 +268,103 @@ def avg_pool1d(x, ...@@ -275,126 +268,103 @@ def avg_pool1d(x,
return squeeze(pool_out, [2]) return squeeze(pool_out, [2])
def max_pool1d(x, def avg_pool2d(x,
kernel_size, kernel_size,
stride=None, stride=None,
padding=0, padding=0,
return_indices=False,
ceil_mode=False, ceil_mode=False,
count_include_pad=True,
divisor_override=None,
data_format="NCHW",
name=None): name=None):
""" """
This API implements average pooling 2d operation.
Applies a 1D max pooling over an input signal composed of several input planes based See more details in :ref:`api_nn_pooling_AvgPool2d` .
on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output value of the layer with input size (N, C, L),
output (N, C, L_{out}) and kernel_size k can be precisely described as
For average pool1d:
.. math::
Output(N_i, C_i, l) &= max(Input[N_i, C_i, stride \times l:stride \times l+k])}
Args: Args:
x (Tensor): The input tensor of pooling operator which is a 3-D tensor with x (Tensor): The input tensor of pooling operator which is a 4-D tensor with
shape [N, C, L], where `N` is batch size, `C` is the number of channels, shape [N, C, H, W]. The format of input tensor is `"NCHW"` or
`L` is the length of the feature. The data type if float32 or float64. `"NHWC"`, where `N` is batch size, `C` is the number of channels,
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, `H` is the height of the feature, and `W` is the width of the
it must contain one integers. feature. The data type if float32 or float64.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, kernel_size (int|list|tuple): The pool kernel size. If it is a tuple or list,
it must contain one integers. it must contain two integers, (kernel_size_Height, kernel_size_Width).
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or Otherwise, the pool kernel size will be a square of an int.
'SAME' which is the padding algorithm. If pool padding size is a tuple or list, stride (int|list|tuple): The stride size. If it is a tuple or list,
it could be the following forms: `[pad_left, pad_right]`. it must contain two integers, (stride_Height, stride_Width).
return_indices (bool): Whether return the max indices along with the outputs. default is `False`. Otherwise, the stride size will be a square of an int.
ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
If it is set to False, the floor function will be used. Default False. padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An int, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is `true`.
divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: Returns:
Tensor: The output tensor of pooling result. The data type is same as input tensor. Tensor: The output tensor of pooling result. The data type is same as input tensor.
Raises: Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True. ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ValueError: If `padding` is a list or tuple but its length greater than 1.
ShapeError: If the input is not a 3-D.
ShapeError: If the output's shape calculated is not greater than 0. ShapeError: If the output's shape calculated is not greater than 0.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
import paddle.nn.functional as F import paddle.nn.functional as F
import numpy as np
paddle.disable_static() paddle.disable_static()
# avg pool2d
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0) out = F.avg_pool2d(x,
# pool_out shape: [1, 3, 16] kernel_size=2,
stride=2, padding=0)
pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_indices=True) # out.shape [1, 3, 16, 16]
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
""" """
"""NCL to NCHW""" check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool2d')
data_format = "NCHW" kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size')
check_variable_and_dtype(x, 'input', ['float32', 'float64'], 'max_pool1d')
check_input(x, 3)
x = unsqueeze(x, [2])
kernel_size = [1] + utils.convert_to_list(kernel_size, 1, 'pool_size')
if stride is None: if stride is None:
stride = kernel_size stride = kernel_size
else: else:
stride = [1] + utils.convert_to_list(stride, 1, 'pool_stride') stride = utils.convert_to_list(stride, 2, 'pool_stride')
padding_algorithm = "EXPLICIT"
if isinstance(padding, str):
padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(padding))
if padding == "VALID":
padding_algorithm = "VALID"
padding = [0]
if ceil_mode != False:
raise ValueError(
"When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. "
"Received ceil_mode: True.")
elif padding == "SAME":
padding_algorithm = "SAME"
padding = [0]
padding = update_padding1d(padding, 'max') channel_last = _channel_last(data_format, 2)
padding, padding_algorithm = _update_padding_nd(
padding, 2, channel_last, ceil_mode=ceil_mode)
if in_dygraph_mode(): if in_dygraph_mode():
pool_out = core.ops.max_pool2d_with_index( output = core.ops.pool2d(
x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride, x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling',
'paddings', padding, 'padding_algorithm', padding_algorithm, False, 'padding_algorithm', padding_algorithm, 'strides', stride,
'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'paddings', padding, 'use_cudnn', True, 'ceil_mode', ceil_mode,
'exclusive', True, 'data_format', data_format) 'use_mkldnn', False, 'exclusive', not count_include_pad,
return (squeeze(pool_out[0], [2]), squeeze( 'data_format', data_format)
pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2]) if divisor_override is None:
return output
else:
_check_instance(divisor_override, "divisor_override")
return output * (kernel_size[0] * kernel_size[1]) / divisor_override
op_type = 'max_pool2d_with_index' op_type = 'pool2d'
helper = LayerHelper(op_type, **locals()) helper = LayerHelper(op_type, **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype) pool_out = helper.create_variable_for_type_inference(dtype)
mask = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out, "Mask": mask}
helper.append_op( helper.append_op(
type=op_type, type=op_type,
inputs={"X": x}, inputs={"X": x},
outputs=outputs, outputs={"Out": pool_out},
attrs={ attrs={
"pooling_type": 'max', "pooling_type": "avg",
"ksize": kernel_size, "ksize": kernel_size,
"global_pooling": False, "global_pooling": False,
"strides": stride, "strides": stride,
...@@ -403,335 +373,211 @@ def max_pool1d(x, ...@@ -403,335 +373,211 @@ def max_pool1d(x,
"use_cudnn": True, "use_cudnn": True,
"ceil_mode": ceil_mode, "ceil_mode": ceil_mode,
"use_mkldnn": False, "use_mkldnn": False,
"exclusive": True, "exclusive": not count_include_pad,
"data_format": data_format, "data_format": data_format,
}) })
return (squeeze(pool_out, [2]), if divisor_override is None:
squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2]) return pool_out
else:
_check_instance(divisor_override, "divisor_override")
return pool_out * (kernel_size[0] * kernel_size[1]) / divisor_override
def adaptive_avg_pool1d(x, output_size, name=None): def avg_pool3d(x,
kernel_size,
stride=None,
padding=0,
ceil_mode=False,
count_include_pad=False,
divisor_override=None,
data_format="NCDHW",
name=None):
""" """
This API implements average pooling 3d operation.
This operation applies a 1D adaptive average pooling over an input signal composed See more details in :ref:`api_nn_pooling_AvgPool3d` .
of several input planes, based on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
For average adaptive pool1d:
.. math::
lstart &= floor(i * L_{in} / L_{out})
lend &= ceil((i + 1) * L_{in} / L_{out})
Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)}
Args: Args:
x (Tensor): The input tensor of pooling operator, which is a 3-D tensor x (Tensor): The input tensor of pooling operator, which is a 5-D tensor with
with shape [N, C, L]. The format of input tensor is NCL, shape [N, C, D, H, W], where `N` represents the batch size, `C` represents
where N is batch size, C is the number of channels, L is the the number of channels, `D`, `H` and `W` represent the depth, height and width of the feature respectively.
length of the feature. The data type is float32 or float64. kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, is a tuple or list, it must contain three integers,
it must contain one int. (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
Otherwise, the pool kernel size will be the cube of an int.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain three integers, [stride_Depth, stride_Height, stride_Width).
Otherwise, the pool stride size will be a cube of an int.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An int, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
ceil_mode (bool): ${ceil_mode_comment}
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is True.
divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_depth, input_height, input_width]`.
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: Returns:
Tensor: The output tensor of adaptive average pooling result. The data type is same Tensor: The output tensor of pooling result. The data type is same as input tensor.
as input tensor.
Raises: Raises:
ValueError: 'output_size' should be a integer or list or tuple with length as 1. ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid
# average adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
#
import paddle import paddle
import paddle.nn.functional as F x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
paddle.disable_static() # avg pool3d
out = paddle.nn.functional.avg_pool3d(
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) x,
pool_out = F.adaptive_average_pool1d(data, output_size=16) kernel_size = 2,
# pool_out shape: [1, 3, 16]) stride = 2,
padding=0)
# out.shape: [1, 3, 16, 16, 16]
""" """
pool_type = 'avg' check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d')
check_variable_and_dtype(x, 'input', ['float32', 'float64'], kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size')
'adaptive_pool2d') if stride is None:
check_input(x, 3) stride = kernel_size
check_type(output_size, 'pool_size', (int), 'adaptive_pool1d') else:
stride = utils.convert_to_list(stride, 3, 'pool_stride')
pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size') channel_last = _channel_last(data_format, 3)
padding, padding_algorithm = _update_padding_nd(
padding, 3, channel_last=channel_last, ceil_mode=ceil_mode)
l_type = "pool2d"
x = unsqueeze(x, [2])
if in_dygraph_mode(): if in_dygraph_mode():
pool_out = core.ops.pool2d(x, 'pooling_type', pool_type, 'ksize', output = core.ops.pool3d(
pool_size, 'adaptive', True) x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', stride,
return squeeze(pool_out, [2]) 'paddings', padding, 'global_pooling', False, 'padding_algorithm',
padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
'use_mkldnn', False, 'exclusive', not count_include_pad,
'data_format', data_format)
if divisor_override is None:
return output
else:
_check_instance(divisor_override, "divisor_override")
return output * (kernel_size[0] * kernel_size[1] *
kernel_size[2]) / divisor_override
helper = LayerHelper(l_type, **locals()) op_type = "pool3d"
helper = LayerHelper(op_type, **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype) pool_out = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out} outputs = {"Out": pool_out}
helper.append_op(
type=l_type,
inputs={"X": x},
outputs=outputs,
attrs={
"pooling_type": pool_type,
"ksize": pool_size,
"adaptive": True,
})
return squeeze(pool_out, [2])
def adaptive_max_pool1d(x, output_size, return_indices=False, name=None):
"""
This operation applies a 1D adaptive max pooling over an input signal composed
of several input planes, based on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
For max adaptive pool1d:
.. math::
lstart &= floor(i * L_{in} / L_{out})
lend &= ceil((i + 1) * L_{in} / L_{out})
Output(i) &= max(Input[lstart:lend])}
Args:
x (Tensor): The input tensor of pooling operator, which is a 3-D tensor
with shape [N, C, L]. The format of input tensor is NCL,
where N is batch size, C is the number of channels, L is the
length of the feature. The data type is float32 or float64.
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one int.
return_indices (bool): If true, the index of max pooling point will be returned along
with outputs. It cannot be set in average pooling type. Default False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output tensor of adaptive pooling result. The data type is same
as input tensor.
Raises:
ValueError: 'output_size' should be a integer or list or tuple with length as 1.
Examples:
.. code-block:: python
# max adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = max(input[:, :, lstart: lend])
#
import paddle
import paddle.nn.functional as F
paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
pool_out = F.adaptive_max_pool1d(data, output_size=16)
# pool_out shape: [1, 3, 16])
pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_indices=True)
# pool_out shape: [1, 3, 16] indices shape: [1, 3, 16]
"""
pool_type = 'max'
check_variable_and_dtype(x, 'input', ['float32', 'float64'],
'adaptive_max_pool1d')
check_input(x, 3)
check_type(output_size, 'pool_size', (int), 'adaptive_max_pool1d')
check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool1d')
pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
l_type = 'max_pool2d_with_index'
x = unsqueeze(x, [2])
if in_dygraph_mode():
pool_out = core.ops.max_pool2d_with_index(
x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True)
return (squeeze(pool_out[0], [2]), squeeze(
pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2])
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
mask = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out, "Mask": mask}
helper.append_op( helper.append_op(
type=l_type, type=op_type,
inputs={"X": x}, inputs={"X": x},
outputs=outputs, outputs=outputs,
attrs={ attrs={
"pooling_type": pool_type, "pooling_type": 'avg',
"ksize": pool_size, "ksize": kernel_size,
"adaptive": True, "global_pooling": False,
"strides": stride,
"paddings": padding,
"padding_algorithm": padding_algorithm,
"use_cudnn": True,
"ceil_mode": ceil_mode,
"use_mkldnn": False,
"exclusive": not count_include_pad,
"data_format": data_format,
}) })
return (squeeze(pool_out, [2]), if divisor_override is None:
squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2]) return pool_out
else:
_check_instance(divisor_override, "divisor_override")
return pool_out * (kernel_size[0] * kernel_size[1] *
kernel_size[2]) / divisor_override
def max_pool2d(x, def max_pool1d(x,
kernel_size, kernel_size,
stride=None, stride=None,
padding=0, padding=0,
return_indices=False, return_indices=False,
ceil_mode=False, ceil_mode=False,
data_format="NCHW",
name=None): name=None):
""" """
This operation applies 2D max pooling over input feature based on the input, This API implements max pooling 1d opereation.
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are See more details in :ref:`api_nn_pooling_MaxPool1d` .
in NCHW format, where N is batch size, C is the number of channels,
H is the height of the feature, and W is the width of the feature.
Example:
Input:
X shape: $(N, C, H_{in}, W_{in})$
Attr:
kernel_size: ksize
stride: stride
Output:
Out shape: $(N, C, H_{out}, W_{out})$
$$
out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\
& \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
\text{stride[1]} \times w + n)
$$
Args: Args:
x (Tensor): The input tensor of pooling operator which is a 4-D tensor with x (Tensor): The input tensor of pooling operator which is a 3-D tensor with
shape [N, C, H, W]. The format of input tensor is `"NCHW"` or shape [N, C, L], where `N` is batch size, `C` is the number of channels,
`"NHWC"`, where `N` is batch size, `C` is the number of channels, `L` is the length of the feature. The data type if float32 or float64.
`H` is the height of the feature, and `W` is the width of the
feature. The data type if float32 or float64.
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (pool_size_Height, pool_size_Width). it must contain an integer.
Otherwise, the pool kernel size will be a square of an int.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain two integers, (pool_stride_Height, pool_stride_Width). it must contain an integer.
Otherwise, the pool stride size will be a square of an int. padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or 1. A string in ['valid', 'same'].
'SAME' which is the padding algorithm. If pool padding size is a tuple or list, 2. An integer, which means the feature map is zero padded by size of `padding` on every sides.
it could be in three forms: `[pad_height, pad_width]` or 3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, 4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
`pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
when `data_format` is `"NHWC"`, `pool_padding` can be in the form The default value is 0.
`[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. return_indices (bool): Whether return the max indices along with the outputs. default is `False`.
Otherwise, the pool padding size will be a square of an int. ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape If it is set to False, the floor function will be used. Default False.
return_indices (bool): Whether to return the max indices along with the outputs.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: Returns:
Tensor: The output tensor of pooling result. The data type is same as input tensor. Tensor: The output tensor of pooling result. The data type is same as input tensor.
Raises: Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True. ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the input is not a 3-D tensor.
ShapeError: If the output's shape calculated is not greater than 0. ShapeError: If the output's shape calculated is not greater than 0.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
import paddle.nn.functional as F import paddle.nn.functional as F
import numpy as np
paddle.disable_static() paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
# max pool2d pool_out = F.max_pool1d(data, kernel_size=2, stride=2, padding=0)
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) # pool_out shape: [1, 3, 16]
output = F.max_pool2d(input, pool_out, indices = F.max_pool1d(data, kernel_size=2, stride=2, padding=0, return_indices=True)
kernel_size=2, # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
stride=2, padding=0)
# output.shape [1, 3, 16, 16]
# for return_indices=True
output, max_indices = F.max_pool2d(input,
kernel_size=2,
stride=2,
padding=0,
return_indices=True)
# output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
""" """
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool2d') """NCL to NCHW"""
kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size') data_format = "NCHW"
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool1d')
_check_input(x, 3)
x = unsqueeze(x, [2])
kernel_size = [1] + utils.convert_to_list(kernel_size, 1, 'pool_size')
if stride is None: if stride is None:
stride = kernel_size stride = kernel_size
else: else:
stride = utils.convert_to_list(stride, 2, 'pool_stride') stride = [1] + utils.convert_to_list(stride, 1, 'pool_stride')
if data_format not in ["NCHW", "NHWC"]: padding, padding_algorithm = _update_padding_nd(
raise ValueError( padding, 1, ceil_mode=ceil_mode)
"Attr(data_format) should be 'NCHW' or 'NHWC'. Received "
"Attr(data_format): %s." % str(data_format))
padding_algorithm = "EXPLICIT"
if isinstance(padding, str):
padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(padding))
if padding == "VALID":
padding_algorithm = "VALID"
padding = [0, 0]
if ceil_mode != False:
raise ValueError(
"When Attr(padding) is \"VALID\", Attr(ceil_mode) must be False. "
"Received ceil_mode: True.")
elif padding == "SAME":
padding_algorithm = "SAME"
padding = [0, 0]
padding = update_padding2d(padding, data_format) # use 2d to implenment 1d should expand padding in advance.
padding = _expand_low_nd_padding(padding)
if in_dygraph_mode(): if in_dygraph_mode():
output = core.ops.max_pool2d_with_index( pool_out = core.ops.max_pool2d_with_index(
x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride, x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride,
'paddings', padding, 'padding_algorithm', padding_algorithm, 'paddings', padding, 'padding_algorithm', padding_algorithm,
'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False,
'exclusive', True, 'data_format', data_format) 'exclusive', True, 'data_format', data_format)
return output if return_indices else output[0] return (squeeze(pool_out[0], [2]), squeeze(
pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2])
op_type = 'max_pool2d_with_index' op_type = 'max_pool2d_with_index'
helper = LayerHelper(op_type, **locals()) helper = LayerHelper(op_type, **locals())
...@@ -758,36 +604,21 @@ def max_pool2d(x, ...@@ -758,36 +604,21 @@ def max_pool2d(x,
"data_format": data_format, "data_format": data_format,
}) })
return (pool_out, mask) if return_indices else pool_out return (squeeze(pool_out, [2]),
squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2])
def avg_pool2d(x, def max_pool2d(x,
kernel_size, kernel_size,
stride=None, stride=None,
padding=0, padding=0,
return_indices=False,
ceil_mode=False, ceil_mode=False,
count_include_pad=True,
divisor_override=None,
data_format="NCHW", data_format="NCHW",
name=None): name=None):
""" """
This operation applies 2D average pooling over input features based on the input, This API implements max pooling 2d operation.
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are See more details in :ref:`api_nn_pooling_MaxPool2d` .
in NCHW format, where N is batch size, C is the number of channels,
H is the height of the feature, and W is the width of the feature.
Example:
Input:
X shape: $(N, C, H_{in}, W_{in})$
Attr:
kernel_size: ksize
Output:
Out shape: $(N, C, H_{out}, W_{out})$
$$
out(N_i, C_j, h, w) = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1}
input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
$$
Args: Args:
x (Tensor): The input tensor of pooling operator which is a 4-D tensor with x (Tensor): The input tensor of pooling operator which is a 4-D tensor with
...@@ -796,30 +627,26 @@ def avg_pool2d(x, ...@@ -796,30 +627,26 @@ def avg_pool2d(x,
`H` is the height of the feature, and `W` is the width of the `H` is the height of the feature, and `W` is the width of the
feature. The data type if float32 or float64. feature. The data type if float32 or float64.
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two integers, (pool_size_Height, pool_size_Width). it must contain two integers, (kernel_size_Height, kernel_size_Width).
Otherwise, the pool kernel size will be a square of an int. Otherwise, the pool kernel size will be a square of an int.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain two integers, (pool_stride_Height, pool_stride_Width). it must contain two integers, (stride_Height, stride_Width).
Otherwise, the pool stride size will be a square of an int. Otherwise, the pool stride size will be a square of an int.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
'SAME' which is the padding algorithm. If pool padding size is a tuple or list, 1. A string in ['valid', 'same'].
it could be in three forms: `[pad_height, pad_width]` or 2. An int, which means the feature map is zero padded by size of `padding` on every sides.
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, 3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
`pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. 4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
when `data_format` is `"NHWC"`, `pool_padding` can be in the form 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
`[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. The default value is 0.
Otherwise, the pool padding size will be a square of an int.
ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
count_include_pad (bool): Whether to exclude padding points in average pooling return_indices (bool): Whether to return the max indices along with the outputs.
mode, default is `true`. data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`.
divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`. `[batch_size, input_channels, input_height, input_width]`.
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: Returns:
Tensor: The output tensor of pooling result. The data type is same as input tensor. Tensor: The output tensor of pooling result. The data type is same as input tensor.
Raises: Raises:
...@@ -832,87 +659,71 @@ def avg_pool2d(x, ...@@ -832,87 +659,71 @@ def avg_pool2d(x,
import paddle.nn.functional as F import paddle.nn.functional as F
import numpy as np import numpy as np
paddle.disable_static() paddle.disable_static()
# max pool2d
# avg pool2d x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) out = F.max_pool2d(x,
output = F.avg_pool2d(input,
kernel_size=2, kernel_size=2,
stride=2, padding=0) stride=2, padding=0)
# output.shape [1, 3, 16, 16] # output.shape [1, 3, 16, 16]
# for return_indices=True
out, max_indices = F.max_pool2d(x,
kernel_size=2,
stride=2,
padding=0,
return_indices=True)
# out.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
""" """
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'avg_pool2d') check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool2d')
kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size') kernel_size = utils.convert_to_list(kernel_size, 2, 'pool_size')
if stride is None: if stride is None:
stride = kernel_size stride = kernel_size
else: else:
stride = utils.convert_to_list(stride, 2, 'pool_stride') stride = utils.convert_to_list(stride, 2, 'pool_stride')
padding_algorithm = "EXPLICIT"
if isinstance(padding, str):
padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(padding))
if padding == "VALID":
padding_algorithm = "VALID"
padding = [0, 0]
if ceil_mode != False:
raise ValueError(
"When Attr(pool_padding) is \"VALID\", Attr(ceil_mode) must be False. "
"Received ceil_mode: True.")
elif padding == "SAME":
padding_algorithm = "SAME"
padding = [0, 0]
if data_format not in ["NCHW", "NHWC"]: if data_format not in ["NCHW", "NHWC"]:
raise ValueError( raise ValueError(
"Attr(data_format) should be 'NCHW' or 'NHWC'. Received " "Attr(data_format) should be 'NCHW' or 'NHWC'. Received "
"Attr(data_format): %s." % str(data_format)) "Attr(data_format): %s." % str(data_format))
pool_padding = update_padding2d(padding, data_format)
channel_last = True if data_format == "NHWC" else False
padding, padding_algorithm = _update_padding_nd(
padding, num_dims=2, channel_last=channel_last, ceil_mode=ceil_mode)
if in_dygraph_mode(): if in_dygraph_mode():
output = core.ops.pool2d( output = core.ops.max_pool2d_with_index(
x, 'pooling_type', 'avg', 'ksize', kernel_size, 'global_pooling', x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride,
False, 'padding_algorithm', padding_algorithm, 'strides', stride, 'paddings', padding, 'padding_algorithm', padding_algorithm,
'paddings', pool_padding, 'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False,
'use_mkldnn', False, 'exclusive', not count_include_pad, 'exclusive', True, 'data_format', data_format)
'data_format', data_format) return output if return_indices else output[0]
if divisor_override is None:
return output
else:
check_instance(divisor_override, "divisor_override")
return output * (kernel_size[0] * kernel_size[1]) / divisor_override
op_type = 'pool2d' op_type = 'max_pool2d_with_index'
helper = LayerHelper(op_type, **locals()) helper = LayerHelper(op_type, **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype) pool_out = helper.create_variable_for_type_inference(dtype)
mask = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out, "Mask": mask}
helper.append_op( helper.append_op(
type=op_type, type=op_type,
inputs={"X": x}, inputs={"X": x},
outputs={"Out": pool_out}, outputs=outputs,
attrs={ attrs={
"pooling_type": "avg", "pooling_type": 'max',
"ksize": kernel_size, "ksize": kernel_size,
"global_pooling": False, "global_pooling": False,
"strides": stride, "strides": stride,
"paddings": pool_padding, "paddings": padding,
"padding_algorithm": padding_algorithm, "padding_algorithm": padding_algorithm,
"use_cudnn": True, "use_cudnn": True,
"ceil_mode": ceil_mode, "ceil_mode": ceil_mode,
"use_mkldnn": False, "use_mkldnn": False,
"exclusive": not count_include_pad, "exclusive": True,
"data_format": data_format, "data_format": data_format,
}) })
if divisor_override is None: return (pool_out, mask) if return_indices else pool_out
return pool_out
else:
check_instance(divisor_override, "divisor_override")
return pool_out * (kernel_size[0] * kernel_size[1]) / divisor_override
def max_pool3d(x, def max_pool3d(x,
...@@ -924,47 +735,25 @@ def max_pool3d(x, ...@@ -924,47 +735,25 @@ def max_pool3d(x,
data_format="NCDHW", data_format="NCDHW",
name=None): name=None):
""" """
This operation applies 3D max pooling over input features based on the input, This API implements max pooling 2d operation.
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are See more details in :ref:`api_nn_pooling_MaxPool3d` .
in NCDHW format, where N is batch size, C is the number of channels,
H is the height of the feature, D is the depth of the feature, and W is the width of the feature.
Example:
Input:
X shape: $(N, C, D_{in}, H_{in}, W_{in})$
Attr:
kernel_size: ksize
Output:
Out shape: $(N, C, D_{out}, H_{out}, W_{out})$
$$
\text{out}(N_i, C_j, d, h, w) ={} & \max_{k=0, \ldots, ksize[0]-1} \max_{m=0, \ldots, ksize[1]-1} \max_{n=0, \ldots, ksize[2]-1} \\
& \text{input}(N_i, C_j, \text{stride[0]} \times d + k,
\text{stride[1]} \times h + m, \text{stride[2]} \times w + n)
$$
Args: Args:
x (Tensor): The input tensor of pooling operator, which is a 5-D tensor with x (Tensor): The input tensor of pooling operator, which is a 5-D tensor with
shape [N, C, D, H, W]. The format of shape [N, C, D, H, W]. The format of input tensor is `"NCDHW"` or `"NDHWC"`, where N represents batch size, C represents the number of channels, D, H and W represent the depth, height and width of the feature respectively.
input tensor is `"NCDHW"` or `"NDHWC"`, where `N` is batch size, `C` is kernel_size (int|list|tuple): The pool kernel size. If the kernel size
the number of channels, `D` is the depth of the feature,
`H` is the height of the feature, and `W` is the width
of the feature.
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
is a tuple or list, it must contain three integers, is a tuple or list, it must contain three integers,
(pool_size_Depth, pool_size_Height, pool_size_Width). (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
Otherwise, the pool kernel size will be the cube of an int. Otherwise, the pool kernel size will be the cube of an int.
stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
'SAME' which is the padding algorithm. If pool stride size is a tuple or list, it must contain three integers, [stride_Depth, stride_Height, stride_Width).
it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`.
Otherwise, the pool stride size will be a cube of an int. Otherwise, the pool stride size will be a cube of an int.
padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list, padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
it could be in three forms: `[pad_depth, pad_height, pad_width]` or 1. A string in ['valid', 'same'].
`[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, 2. An int, which means the feature map is zero padded by size of `padding` on every sides.
and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form 3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
`[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. 4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
when `data_format` is `"NDHWC"`, `pool_padding` can be in the form 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
`[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. The default value is 0.
ceil_mode (bool): ${ceil_mode_comment} ceil_mode (bool): ${ceil_mode_comment}
return_indices (bool): Whether to return the max indices along with the outputs. return_indices (bool): Whether to return the max indices along with the outputs.
data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`. data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
...@@ -973,7 +762,6 @@ def max_pool3d(x, ...@@ -973,7 +762,6 @@ def max_pool3d(x,
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: Returns:
Tensor: The output tensor of pooling result. The data type is same as input tensor. Tensor: The output tensor of pooling result. The data type is same as input tensor.
Raises: Raises:
...@@ -986,23 +774,20 @@ def max_pool3d(x, ...@@ -986,23 +774,20 @@ def max_pool3d(x,
import paddle.nn.functional as F import paddle.nn.functional as F
import numpy as np import numpy as np
paddle.disable_static() paddle.disable_static()
# max pool3d # max pool3d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32)) x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
output = F.max_pool2d(input, output = F.max_pool2d(x,
kernel_size=2, kernel_size=2,
stride=2, padding=0) stride=2, padding=0)
output.shape [1, 3, 16, 16, 16] output.shape [1, 3, 16, 16, 16]
# for return_indices=True # for return_indices=True
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32)) x = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32))
output, max_indices = paddle.nn.functional.max_pool3d(input, output, max_indices = paddle.nn.functional.max_pool3d(x,
kernel_size = 2, kernel_size = 2,
stride = 2, stride = 2,
padding=0, padding=0,
return_indices=True) return_indices=True)
# output.shape [None, 3, 16, 16, 16], max_indices.shape [None, 3, 16, 16, 16], # output.shape [None, 3, 16, 16, 16], max_indices.shape [None, 3, 16, 16, 16],
""" """
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d') check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d')
kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size') kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size')
...@@ -1011,29 +796,10 @@ def max_pool3d(x, ...@@ -1011,29 +796,10 @@ def max_pool3d(x,
else: else:
stride = utils.convert_to_list(stride, 3, 'pool_stride') stride = utils.convert_to_list(stride, 3, 'pool_stride')
padding_algorithm = "EXPLICIT" channel_last = _channel_last(data_format, 3)
if isinstance(padding, str):
padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(padding))
if padding == "VALID":
padding_algorithm = "VALID"
padding = [0, 0, 0]
if ceil_mode != False:
raise ValueError(
"When Attr(pool_padding) is \"VALID\", ceil_mode must be False. "
"Received ceil_mode: True.")
elif padding == "SAME":
padding_algorithm = "SAME"
padding = [0, 0, 0]
if data_format not in ["NCDHW", "NDHWC"]: padding, padding_algorithm = _update_padding_nd(
raise ValueError( padding, 3, channel_last=channel_last, ceil_mode=ceil_mode)
"Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
"Attr(data_format): %s" % str(data_format))
padding = update_padding3d(padding, data_format)
if in_dygraph_mode(): if in_dygraph_mode():
output = core.ops.max_pool3d_with_index( output = core.ops.max_pool3d_with_index(
...@@ -1071,170 +837,83 @@ def max_pool3d(x, ...@@ -1071,170 +837,83 @@ def max_pool3d(x,
return (pool_out, mask) if return_indices else pool_out return (pool_out, mask) if return_indices else pool_out
def avg_pool3d(x, def adaptive_avg_pool1d(x, output_size, name=None):
kernel_size,
stride=None,
padding=0,
ceil_mode=False,
count_include_pad=False,
divisor_override=None,
data_format="NCDHW",
name=None):
""" """
This operation applies 3D max pooling over input features based on the input, This API implements adaptive average pooling 1d operation.
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are See more details in :ref:`api_nn_pooling_AdaptiveAvgPool1d` .
in NCDHW format, where N is batch size, C is the number of channels,
H is the height of the feature, D is the depth of the feature, and W is the width of the feature.
Args: Args:
input (Tensor): The input tensor of pooling operator, which is a 5-D tensor with x (Tensor): The input tensor of pooling operator, which is a 3-D tensor
shape [N, C, D, H, W], where `N` is batch size, `C` is with shape [N, C, L]. The format of input tensor is NCL,
the number of channels, `D` is the depth of the feature, where N is batch size, C is the number of channels, L is the
`H` is the height of the feature, and `W` is the width length of the feature. The data type is float32 or float64.
of the feature. output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size it must contain one int.
is a tuple or list, it must contain three integers,
(pool_size_Depth, pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be the cube of an int.
stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool stride size is a tuple or list,
it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`.
Otherwise, the pool stride size will be a cube of an int.
padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list,
it could be in three forms: `[pad_depth, pad_height, pad_width]` or
`[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
`[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
ceil_mode (bool): ${ceil_mode_comment}
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is True.
divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_depth, input_height, input_width]`.
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: Returns:
Tensor: The output tensor of pooling result. The data type is same as input tensor. Tensor: The output tensor of adaptive average pooling result. The data type is same
as input tensor.
Raises: Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: 'output_size' should be an integer or list or tuple with length as 1.
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid # average adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
#
import paddle import paddle
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32, 32]).astype(np.float32)) import paddle.nn.functional as F
# avg pool3d paddle.disable_static()
pool3d = paddle.nn.functional.avg_pool3d( data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
input, pool_out = F.adaptive_average_pool1d(data, output_size=16)
kernel_size = 2, # pool_out shape: [1, 3, 16])
stride = 2,
padding=0)
# pool3d.shape: [1, 3, 16, 16, 16]
""" """
check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'max_pool3d') pool_type = 'avg'
kernel_size = utils.convert_to_list(kernel_size, 3, 'pool_size') check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'adaptive_pool2d')
if stride is None: _check_input(x, 3)
stride = kernel_size check_type(output_size, 'pool_size', (int), 'adaptive_pool1d')
else:
stride = utils.convert_to_list(stride, 3, 'pool_stride')
padding_algorithm = "EXPLICIT"
if isinstance(padding, str):
padding = padding.upper()
if padding not in ["SAME", "VALID"]:
raise ValueError(
"Unknown Attr(pool_padding): '%s'. It can only be 'SAME' or 'VALID'."
% str(padding))
if padding == "VALID":
padding_algorithm = "VALID"
padding = [0, 0, 0]
if ceil_mode != False:
raise ValueError(
"When Attr(pool_padding) is \"VALID\", ceil_mode must be False. "
"Received ceil_mode: True.")
elif padding == "SAME":
padding_algorithm = "SAME"
padding = [0, 0, 0]
if data_format not in ["NCDHW", "NDHWC"]: pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
raise ValueError(
"Attr(data_format) should be 'NCDHW' or 'NDHWC'. Received "
"Attr(data_format): %s" % str(data_format))
padding = update_padding3d(padding, data_format)
l_type = "pool2d"
x = unsqueeze(x, [2])
if in_dygraph_mode(): if in_dygraph_mode():
output = core.ops.pool3d( pool_out = core.ops.pool2d(x, 'pooling_type', pool_type, 'ksize',
x, 'pooling_type', 'avg', 'ksize', kernel_size, 'strides', stride, pool_size, 'adaptive', True)
'paddings', padding, 'global_pooling', False, 'padding_algorithm', return squeeze(pool_out, [2])
padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode,
'use_mkldnn', False, 'exclusive', not count_include_pad,
'data_format', data_format)
if divisor_override is None:
return output
else:
check_instance(divisor_override, "divisor_override")
return output * (kernel_size[0] * kernel_size[1] *
kernel_size[2]) / divisor_override
op_type = "pool3d" helper = LayerHelper(l_type, **locals())
helper = LayerHelper(op_type, **locals())
dtype = helper.input_dtype() dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype) pool_out = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out}
outputs = {"Out": pool_out}
helper.append_op( helper.append_op(
type=op_type, type=l_type,
inputs={"X": x}, inputs={"X": x},
outputs=outputs, outputs=outputs,
attrs={ attrs={
"pooling_type": 'avg', "pooling_type": pool_type,
"ksize": kernel_size, "ksize": pool_size,
"global_pooling": False, "adaptive": True,
"strides": stride,
"paddings": padding,
"padding_algorithm": padding_algorithm,
"use_cudnn": True,
"ceil_mode": ceil_mode,
"use_mkldnn": False,
"exclusive": not count_include_pad,
"data_format": data_format,
}) })
if divisor_override is None: return squeeze(pool_out, [2])
return pool_out
else:
check_instance(divisor_override, "divisor_override")
return pool_out * (kernel_size[0] * kernel_size[1] *
kernel_size[2]) / divisor_override
def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
""" """
This API implements adaptive average pooling 2d operation.
This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions See more details in :ref:`api_nn_pooling_AdaptiveAvgPool2d` .
of the output tensor are determined by the parameter output_size.
See more detail in :ref:`api_nn_pooling_AdaptiveAvgPool2d` .
For avg adaptive pool2d:
.. math::
hstart &= floor(i * H_{in} / H_{out})
hend &= ceil((i + 1) * H_{in} / H_{out})
wstart &= floor(j * W_{in} / W_{out})
wend &= ceil((j + 1) * W_{in} / W_{out})
Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
Args: Args:
x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor. x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor.
...@@ -1248,16 +927,12 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): ...@@ -1248,16 +927,12 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: Returns:
Tensor: The output tensor of avg adaptive pool2d result. The data type is same as input tensor. Tensor: The output tensor of avg adaptive pool2d result. The data type is same as input tensor.
Raises: Raises:
ValueError: If `data_format` is not "NCHW" or "NHWC". ValueError: If `data_format` is not "NCHW" or "NHWC".
Examples: Examples:
.. code-block:: python .. code-block:: python
# adaptive avg pool2d # adaptive avg pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n], # suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions # output shape is [N, C, m, n], adaptive pool divide H and W dimensions
...@@ -1279,10 +954,10 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): ...@@ -1279,10 +954,10 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
input_data = np.random.rand(2, 3, 32, 32) input_data = np.random.rand(2, 3, 32, 32)
x = paddle.to_tensor(input_data) x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 32, 32] # x.shape is [2, 3, 32, 32]
pool_out = paddle.nn.functional.adaptive_avg_pool2d( out = paddle.nn.functional.adaptive_avg_pool2d(
x = x, x = x,
output_size=[3, 3]) output_size=[3, 3])
# pool_out.shape is [2, 3, 3, 3] # out.shape is [2, 3, 3, 3]
""" """
if not in_dygraph_mode(): if not in_dygraph_mode():
check_variable_and_dtype(x, 'x', ['float32', 'float64'], check_variable_and_dtype(x, 'x', ['float32', 'float64'],
...@@ -1337,28 +1012,8 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): ...@@ -1337,28 +1012,8 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None):
def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
""" """
This API implements adaptive average pooling 3d operation.
This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions See more details in :ref:`api_nn_pooling_AdaptiveAvgPool3d` .
of the output tensor are determined by the parameter output_size.
See more detail in :ref:`api_nn_pooling_AdaptiveAvgPool3d` .
For avg adaptive pool3d:
.. math::
dstart &= floor(i * D_{in} / D_{out})
dend &= ceil((i + 1) * D_{in} / D_{out})
hstart &= floor(j * H_{in} / H_{out})
hend &= ceil((j + 1) * H_{in} / H_{out})
wstart &= floor(k * W_{in} / W_{out})
wend &= ceil((k + 1) * W_{in} / W_{out})
Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
Args: Args:
x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor. x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor.
...@@ -1372,16 +1027,12 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): ...@@ -1372,16 +1027,12 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: Returns:
Tensor: The output tensor of avg adaptive pool3d result. The data type is same as input tensor. Tensor: The output tensor of avg adaptive pool3d result. The data type is same as input tensor.
Raises: Raises:
ValueError: If `data_format` is not "NCDHW" or "NDHWC". ValueError: If `data_format` is not "NCDHW" or "NDHWC".
Examples: Examples:
.. code-block:: python .. code-block:: python
# adaptive avg pool3d # adaptive avg pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
...@@ -1406,10 +1057,10 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): ...@@ -1406,10 +1057,10 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
input_data = np.random.rand(2, 3, 8, 32, 32) input_data = np.random.rand(2, 3, 8, 32, 32)
x = paddle.to_tensor(input_data) x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 8, 32, 32] # x.shape is [2, 3, 8, 32, 32]
pool_out = paddle.nn.functional.adaptive_avg_pool3d( out = paddle.nn.functional.adaptive_avg_pool3d(
x = x, x = x,
output_size=[3, 3, 3]) output_size=[3, 3, 3])
# pool_out.shape is [2, 3, 3, 3, 3] # out.shape is [2, 3, 3, 3, 3]
""" """
if not in_dygraph_mode(): if not in_dygraph_mode():
check_variable_and_dtype(x, 'x', ['float32', 'float64'], check_variable_and_dtype(x, 'x', ['float32', 'float64'],
...@@ -1461,3 +1112,257 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): ...@@ -1461,3 +1112,257 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None):
}) })
return pool_out return pool_out
def adaptive_max_pool1d(x, output_size, return_indices=False, name=None):
"""
This API implements adaptive max pooling 1d operation.
See more details in :ref:`api_nn_pooling_AdaptiveMaxPool1d` .
Args:
x (Tensor): The input tensor of pooling operator, which is a 3-D tensor
with shape [N, C, L]. The format of input tensor is NCL,
where N is batch size, C is the number of channels, L is the
length of the feature. The data type is float32 or float64.
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one int.
return_indices (bool): If true, the index of max pooling point will be returned along
with outputs. It cannot be set in average pooling type. Default False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
Tensor: The output tensor of adaptive pooling result. The data type is same
as input tensor.
Raises:
ValueError: 'output_size' should be a integer or list or tuple with length as 1.
Examples:
.. code-block:: python
# max adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = max(input[:, :, lstart: lend])
#
import paddle
import paddle.nn.functional as F
paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
pool_out = F.adaptive_max_pool1d(data, output_size=16)
# pool_out shape: [1, 3, 16])
pool_out, indices = F.adaptive_max_pool1d(data, output_size=16, return_indices=True)
# pool_out shape: [1, 3, 16] indices shape: [1, 3, 16]
"""
pool_type = 'max'
check_variable_and_dtype(x, 'x', ['float32', 'float64'],
'adaptive_max_pool1d')
_check_input(x, 3)
check_type(output_size, 'pool_size', (int), 'adaptive_max_pool1d')
check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool1d')
pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size')
l_type = 'max_pool2d_with_index'
x = unsqueeze(x, [2])
if in_dygraph_mode():
pool_out = core.ops.max_pool2d_with_index(
x, 'pooling_type', pool_type, 'ksize', pool_size, 'adaptive', True)
return (squeeze(pool_out[0], [2]), squeeze(
pool_out[1], [2])) if return_indices else squeeze(pool_out[0], [2])
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
mask = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out, "Mask": mask}
helper.append_op(
type=l_type,
inputs={"X": x},
outputs=outputs,
attrs={
"pooling_type": pool_type,
"ksize": pool_size,
"adaptive": True,
})
return (squeeze(pool_out, [2]),
squeeze(mask, [2])) if return_indices else squeeze(pool_out, [2])
def adaptive_max_pool2d(x, output_size, return_indices=False, name=None):
"""
This operation applies a 2D adaptive max pooling on input tensor.
See more details in :ref:`api_nn_pooling_AdaptiveMaxPool2d` .
Args:
x (Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type can be float16, float32, float64, int32 or int64.
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two elements, (H, W). H and W can be either a int, or None which means the size will be the same as that of the input.
return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default.
Returns:
Tensor: The output tensor of adaptive max pool2d result. The data type is same as input tensor.
Examples:
.. code-block:: python
# max adaptive pool2d
# suppose input data in the shape of [N, C, H, W], `output_size` is [m, n]
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m*n grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
#
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 32, 32]
out = paddle.nn.functional.adaptive_max_pool2d(
x = x,
output_size=[3, 3])
# out.shape is [2, 3, 3, 3]
"""
if not in_dygraph_mode():
check_variable_and_dtype(x, 'x', ['float32', 'float64'],
'adaptive_max_pool2d')
_check_input(x, 4)
#check_type(output_size, 'pool_size', (int), 'adaptive_max_pool2d')
check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool2d')
in_h, in_w = x.shape[2:4]
if isinstance(output_size, int):
output_size = utils.convert_to_list(output_size, 2, 'output_size')
else:
if output_size[0] == None:
output_size[0] = in_h
if output_size[1] == None:
output_size[1] = in_w
if in_dygraph_mode():
pool_out = core.ops.max_pool2d_with_index(
x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True)
return pool_out if return_indices else pool_out[0]
l_type = 'max_pool2d_with_index'
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
mask = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out, "Mask": mask}
helper.append_op(
type=l_type,
inputs={"X": x},
outputs=outputs,
attrs={
"pooling_type": 'max',
"ksize": output_size,
"adaptive": True,
})
#return (pool_out, mask) if return_indices else pool_out
return pool_out
def adaptive_max_pool3d(x, output_size, return_indices=False, name=None):
"""
This operation applies a 3D adaptive max pooling on input tensor.
See more details in :ref:`api_nn_pooling_AdaptiveMaxPool3d` .
Args:
x (Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type can be float32, float64.
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as that of the input.
return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default.
Returns:
Tensor: The output tensor of adaptive max pool3d result. The data type is same as input tensor.
Examples:
.. code-block:: python
# adaptive max pool3d
# suppose input data in the shape of [N, C, D, H, W], `output_size` is [l, m, n]
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into m*n grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
# dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l)
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j, k] = max(input[:, :, dstart: dend, hstart: hend, wstart: wend])
#
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 8, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 8, 32, 32]
out = paddle.nn.functional.adaptive_max_pool3d(
x = x,
output_size=[3, 3, 3])
# out.shape is [2, 3, 3, 3, 3]
"""
if not in_dygraph_mode():
check_variable_and_dtype(x, 'x', ['float32', 'float64'],
'adaptive_max_pool3d')
_check_input(x, 5)
#check_type(output_size, 'pool_size', (int), 'adaptive_max_pool3d')
check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool3d')
in_l, in_h, in_w = x.shape[2:5]
if isinstance(output_size, int):
output_size = utils.convert_to_list(output_size, 3, 'output_size')
else:
if output_size[0] == None:
output_size[0] = in_l
if output_size[1] == None:
output_size[1] = in_h
if output_size[2] == None:
output_size[2] = in_w
if in_dygraph_mode():
pool_out = core.ops.max_pool3d_with_index(
x, 'pooling_type', 'max', 'ksize', output_size, 'adaptive', True)
return pool_out if return_indices else pool_out[0]
l_type = 'max_pool3d_with_index'
helper = LayerHelper(l_type, **locals())
dtype = helper.input_dtype()
pool_out = helper.create_variable_for_type_inference(dtype)
mask = helper.create_variable_for_type_inference(dtype)
outputs = {"Out": pool_out, "Mask": mask}
helper.append_op(
type=l_type,
inputs={"X": x},
outputs=outputs,
attrs={
"pooling_type": 'max',
"ksize": output_size,
"adaptive": True,
})
return (pool_out, mask) if return_indices else pool_out
...@@ -66,16 +66,18 @@ from .common import Dropout #DEFINE_ALIAS ...@@ -66,16 +66,18 @@ from .common import Dropout #DEFINE_ALIAS
from .common import Dropout2D #DEFINE_ALIAS from .common import Dropout2D #DEFINE_ALIAS
from .common import Dropout3D #DEFINE_ALIAS from .common import Dropout3D #DEFINE_ALIAS
from .common import AlphaDropout #DEFINE_ALIAS from .common import AlphaDropout #DEFINE_ALIAS
from .pooling import AdaptiveAvgPool2d #DEFINE_ALIAS
from .pooling import AdaptiveAvgPool3d #DEFINE_ALIAS
from .pooling import AvgPool1d #DEFINE_ALIAS from .pooling import AvgPool1d #DEFINE_ALIAS
from .pooling import MaxPool1d #DEFINE_ALIAS
from .pooling import AdaptiveAvgPool1d #DEFINE_ALIAS
from .pooling import AdaptiveMaxPool1d #DEFINE_ALIAS
from .pooling import AvgPool2d #DEFINE_ALIAS from .pooling import AvgPool2d #DEFINE_ALIAS
from .pooling import MaxPool2d #DEFINE_ALIAS
from .pooling import AvgPool3d #DEFINE_ALIAS from .pooling import AvgPool3d #DEFINE_ALIAS
from .pooling import MaxPool1d #DEFINE_ALIAS
from .pooling import MaxPool2d #DEFINE_ALIAS
from .pooling import MaxPool3d #DEFINE_ALIAS from .pooling import MaxPool3d #DEFINE_ALIAS
from .pooling import AdaptiveAvgPool1d #DEFINE_ALIAS
from .pooling import AdaptiveAvgPool2d #DEFINE_ALIAS
from .pooling import AdaptiveAvgPool3d #DEFINE_ALIAS
from .pooling import AdaptiveMaxPool1d #DEFINE_ALIAS
from .pooling import AdaptiveMaxPool2d #DEFINE_ALIAS
from .pooling import AdaptiveMaxPool3d #DEFINE_ALIAS
from .conv import Conv1d #DEFINE_ALIAS from .conv import Conv1d #DEFINE_ALIAS
from .conv import Conv2d #DEFINE_ALIAS from .conv import Conv2d #DEFINE_ALIAS
from .conv import Conv3d #DEFINE_ALIAS from .conv import Conv3d #DEFINE_ALIAS
......
...@@ -99,7 +99,8 @@ class _ConvNd(layers.Layer): ...@@ -99,7 +99,8 @@ class _ConvNd(layers.Layer):
raise ValueError("in_channels must be divisible by groups.") raise ValueError("in_channels must be divisible by groups.")
if padding_mode in {'reflect', 'replicate', 'circular'}: if padding_mode in {'reflect', 'replicate', 'circular'}:
_paired_padding = utils.convert_to_list(padding, 2, 'padding') _paired_padding = utils.convert_to_list(padding, dims,
'padding')
self._reversed_padding_repeated_twice = _reverse_repeat_list( self._reversed_padding_repeated_twice = _reverse_repeat_list(
_paired_padding, 2) _paired_padding, 2)
...@@ -318,62 +319,80 @@ class Conv2d(_ConvNd): ...@@ -318,62 +319,80 @@ class Conv2d(_ConvNd):
output of the convolution, and the corresponding activation function is output of the convolution, and the corresponding activation function is
applied to the final result. applied to the final result.
For each input :math:`X`, the equation is: For each input :math:`X`, the equation is:
.. math:: .. math::
Out = \\sigma (W \\ast X + b)
Out = \sigma (W \\ast X + b)
Where: Where:
* :math:`X`: Input value, a ``Tensor`` with NCHW format. * :math:`X`: Input value, a ``Tensor`` with NCHW format.
* :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] . * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] .
* :math:`\\ast`: Convolution operation. * :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1]. * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1].
* :math:`\\sigma`: Activation function. * :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Parameters: Parameters:
in_channels(int): The number of channels in the input image. in_channels(int): The number of input channels in the input image.
out_channels(int): The number of channels produced by convolution. out_channels(int): The number of output channels produced by the convolution.
kernel_size (int|list|tuple): The size of convolution kernel. kernel_size(int|list|tuple, optional): The size of the convolving kernel.
stride (int|list|tuple, optional): The stride size. If stride is a tuple, it must stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain two integers, (stride_H, stride_W). Otherwise, the contain three integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: 1. stride_H = stride_W = stride. The default value is 1.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
1. a string in ['valid', 'same']. 1. a string in ['valid', 'same'].
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`on both sides 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`
3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...]. 3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions.
5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
The default value is 0. The default value is 0.
padding_mode (str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'`` . dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
dilation (int|list|tuple, optional): The dilation size. If dilation is a tuple, it must contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
contain two integers, (dilation_H, dilation_W). Otherwise, the dilation_D = dilation_H = dilation_W = dilation. The default value is 1.
dilation_H = dilation_W = dilation. Default: 1. groups(int, optional): The groups number of the Conv3d Layer. According to grouped
groups (int, optional): The groups number of the Conv2d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2, convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: 1. connected to the second half of the input channels. The default value is 1.
weight_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) padding_mode(str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``.
weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights
of conv2d. If it is set to None or one attribute of ParamAttr, conv2d of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as param_attr. If the Initializer of the param_attr will create ParamAttr as param_attr. If it is set to None, the parameter
is not set, the parameter is initialized with :math:`Normal(0.0, std)`, is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is
and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None. :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None.
bias_attr (ParamAttr|bool, optional): The attribute for the bias of conv2d. bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv2d.
If it is set to False, no bias will be added to the output units. If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv2d If it is set to None or one attribute of ParamAttr, conv2d
will create ParamAttr as bias_attr. If the Initializer of the bias_attr will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None. is not set, the bias is initialized zero. The default value is None.
data_format (str, optional): Data format that specifies the layout of input. data_format(str, optional): Data format that specifies the layout of input.
It can be "NCHW" or "NHWC". Default: "NCHW". It can be "NCHW" or "NHWC". Default: "NCHW".
Attribute: Attribute:
**weight** (Parameter): the learnable weights of filter of this layer. **weight** (Parameter): the learnable weights of filter of this layer.
**bias** (Parameter or None): the learnable bias of this layer. **bias** (Parameter or None): the learnable bias of this layer.
Shape: Shape:
- x: :math:`(N, C_{in}, H_{in}, W_{in})` - x: :math:`(N, C_{in}, H_{in}, W_{in})`
- output: :math:`(N, C_{out}, H_{out}, W_{out})` - output: :math:`(N, C_{out}, H_{out}, W_{out})`
Where Where
.. math:: .. math::
H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel_size[0] - 1) + 1))}{strides[0]} + 1 \\\\
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel_size[1] - 1) + 1))}{strides[1]} + 1 H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1
W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np import numpy as np
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
...@@ -646,35 +665,29 @@ class ConvTranspose2d(_ConvNd): ...@@ -646,35 +665,29 @@ class ConvTranspose2d(_ConvNd):
The details of convolution transpose layer, please refer to the following explanation and references The details of convolution transpose layer, please refer to the following explanation and references
`conv2dtranspose <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_ . `conv2dtranspose <http://www.matthewzeiler.com/wp-content/uploads/2017/07/cvpr2010.pdf>`_ .
For each input :math:`X`, the equation is: For each input :math:`X`, the equation is:
.. math:: .. math::
Out = \sigma (W \\ast X + b) Out = \sigma (W \\ast X + b)
Where: Where:
* :math:`X`: Input value, a ``Tensor`` with NCHW format. * :math:`X`: Input value, a ``Tensor`` with NCHW format.
* :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] . * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] .
* :math:`\\ast`: Convolution operation. * :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1]. * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1].
* :math:`\\sigma`: Activation function. * :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
- Input:
Input shape: :math:`(N, C_{in}, H_{in}, W_{in})`
Filter shape: :math:`(C_{in}, C_{out}, H_f, W_f)`
- Output:
Output shape: :math:`(N, C_{out}, H_{out}, W_{out})`
Where
.. math::
H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (H_f - 1) + 1 \\\\
W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (W_f - 1) + 1 \\\\
H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] ) \\\\
W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
Parameters: Parameters:
in_channels(int): The number of channels in the input image. in_channels(int): The number of channels in the input image.
out_channels(int): The number of channels produced by the convolution. out_channels(int): The number of channels produced by the convolution.
kernel_size(int|list|uple): The kernel size. If kernel_size is a tuple, kernel_size(int|list|uple): The kernel size. If kernel_size is a tuple,
it must contain two integers, (kernel_size_H, kernel_size_W). it must contain two integers, (kernel_size_H, kernel_size_W).
Otherwise, the kernel will be a square. Otherwise, the kernel will be a square.
output_padding(int|list|tuple, optional): Additional size added to one side stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
of each dimension in the output shape. Default: 0. contain two integers, (stride_H, stride_W). Otherwise, the
stride_H = stride_W = stride. Default: 1.
padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
1. a string in ['valid', 'same']. 1. a string in ['valid', 'same'].
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` on both sides 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` on both sides
...@@ -682,9 +695,8 @@ class ConvTranspose2d(_ConvNd): ...@@ -682,9 +695,8 @@ class ConvTranspose2d(_ConvNd):
4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions.
5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
The default value is 0. The default value is 0.
stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must output_padding(int|list|tuple, optional): Additional size added to one side
contain two integers, (stride_H, stride_W). Otherwise, the of each dimension in the output shape. Default: 0.
stride_H = stride_W = stride. Default: 1.
dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain two integers, (dilation_H, dilation_W). Otherwise, the contain two integers, (dilation_H, dilation_W). Otherwise, the
dilation_H = dilation_W = dilation. Default: 1. dilation_H = dilation_W = dilation. Default: 1.
...@@ -694,29 +706,46 @@ class ConvTranspose2d(_ConvNd): ...@@ -694,29 +706,46 @@ class ConvTranspose2d(_ConvNd):
first half of the input channels, while the second half of the first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels. filters is only connected to the second half of the input channels.
Default: 1. Default: 1.
weight_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) weight_attr(ParamAttr, optional): The parameter attribute for learnable weights(Parameter)
of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose of conv2d_transpose. If it is set to None or one attribute of ParamAttr, conv2d_transpose
will create ParamAttr as param_attr. If the Initializer of the param_attr will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. Default: None. is not set, the parameter is initialized with Xavier. Default: None.
bias_attr (ParamAttr|bool, optional): The attribute for the bias of conv2d_transpose. bias_attr(ParamAttr|bool, optional): The attribute for the bias of conv2d_transpose.
If it is set to False, no bias will be added to the output units. If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv2d_transpose If it is set to None or one attribute of ParamAttr, conv2d_transpose
will create ParamAttr as bias_attr. If the Initializer of the bias_attr will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. Default: None. is not set, the bias is initialized zero. Default: None.
data_format (str, optional): Data format that specifies the layout of input. data_format(str, optional): Data format that specifies the layout of input.
It can be "NCHW" or "NHWC". Default: "NCHW". It can be "NCHW" or "NHWC". Default: "NCHW".
Attribute: Attribute:
**weight** (Parameter): the learnable weights of filters of this layer. **weight** (Parameter): the learnable weights of filters of this layer.
**bias** (Parameter or None): the learnable bias of this layer. **bias** (Parameter or None): the learnable bias of this layer.
Shape: Shape:
- x: :math:`(N, C_{in}, H_{in}, W_{in})` - x: :math:`(N, C_{in}, H_{in}, W_{in})`
- output: :math:`(N, C_{out}, H_{out}, W_{out})` - output: :math:`(N, C_{out}, H_{out}, W_{out})`
Where Where
.. math:: .. math::
H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel_size[0] - 1) + 1 \\\\
W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel_size[1] - 1) + 1 \\\\ H^\prime_{out} &= (H_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel\_size[0] - 1) + 1
W^\prime_{out} &= (W_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel\_size[1] - 1) + 1
H_{out} &\in [ H^\prime_{out}, H^\prime_{out} + strides[0] )
W_{out} &\in [ W^\prime_{out}, W^\prime_{out} + strides[1] )
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np import numpy as np
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
...@@ -791,66 +820,86 @@ class Conv3d(_ConvNd): ...@@ -791,66 +820,86 @@ class Conv3d(_ConvNd):
provided, bias is added to the output of the convolution, and the provided, bias is added to the output of the convolution, and the
corresponding activation function is applied to the final result. corresponding activation function is applied to the final result.
For each input :math:`X`, the equation is: For each input :math:`X`, the equation is:
.. math:: .. math::
Out = \sigma (W \\ast X + b) Out = \sigma (W \\ast X + b)
In the above equation: In the above equation:
* :math:`X`: Input value, a tensor with NCDHW or NDHWC format. * :math:`X`: Input value, a tensor with NCDHW or NDHWC format.
* :math:`W`: Filter value, a tensor with MCDHW format. * :math:`W`: Filter value, a tensor with MCDHW format.
* :math:`\\ast`: Convolution operation. * :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function. * :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Parameters: Parameters:
in_channels(int): The number of input channels in the input image. in_channels(int): The number of input channels in the input image.
out_channels(int): The number of output channels produced by the convolution. out_channels(int): The number of output channels produced by the convolution.
kernel_size (int|list|tuple, optional): The size of the convolving kernel. kernel_size(int|list|tuple, optional): The size of the convolving kernel.
stride (int|list|tuple, optional): The stride size. If stride is a tuple, it must stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must
contain three integers, (stride_D, stride_H, stride_W). Otherwise, the contain three integers, (stride_D, stride_H, stride_W). Otherwise, the
stride_D = stride_H = stride_W = stride. The default value is 1. stride_D = stride_H = stride_W = stride. The default value is 1.
padding (int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms.
1. a string in ['valid', 'same']. 1. a string in ['valid', 'same'].
2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding`
3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...]. 3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...].
4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions.
5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0).
The default value is 0. The default value is 0.
dilation (int|list|tuple, optional): The dilation size. If dilation is a tuple, it must dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must
contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the
dilation_D = dilation_H = dilation_W = dilation. The default value is 1. dilation_D = dilation_H = dilation_W = dilation. The default value is 1.
groups (int, optional): The groups number of the Conv3d Layer. According to grouped groups(int, optional): The groups number of the Conv3d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2, convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only of the input channels, while the second half of the filters is only
connected to the second half of the input channels. The default value is 1. connected to the second half of the input channels. The default value is 1.
padding_mode (str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``. padding_mode(str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``.
weight_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights
of conv3d. If it is set to None or one attribute of ParamAttr, conv3d of conv3d. If it is set to None or one attribute of ParamAttr, conv3d
will create ParamAttr as param_attr. If it is set to None, the parameter will create ParamAttr as param_attr. If it is set to None, the parameter
is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is
:math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None. :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None.
bias_attr (ParamAttr|bool, optional): The parameter attribute for the bias of conv3d. bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv3d.
If it is set to False, no bias will be added to the output units. If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv3d If it is set to None or one attribute of ParamAttr, conv3d
will create ParamAttr as bias_attr. If the Initializer of the bias_attr will create ParamAttr as bias_attr. If the Initializer of the bias_attr
is not set, the bias is initialized zero. The default value is None. is not set, the bias is initialized zero. The default value is None.
data_format (str, optional): Data format that specifies the layout of input. data_format(str, optional): Data format that specifies the layout of input.
It can be "NCDHW" or "NDHWC". Default: "NCDHW". It can be "NCDHW" or "NDHWC". Default: "NCDHW".
Attribute: Attribute:
**weight** (Parameter): the learnable weights of filters of this layer. **weight** (Parameter): the learnable weights of filters of this layer.
**bias** (Parameter): the learnable bias of this layer. **bias** (Parameter): the learnable bias of this layer.
Shape: Shape:
- x: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` - x: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
- output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` - output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
Where Where
.. math:: .. math::
D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (D_f - 1) + 1))}{strides[0]} + 1 \\\\
H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (H_f - 1) + 1))}{strides[1]} + 1 \\\\ D_{out}&= \\frac{(D_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1
W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (W_f - 1) + 1))}{strides[2]} + 1
H_{out}&= \\frac{(H_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1
W_{out}&= \\frac{(W_{in} + 2 * paddings[2] - (dilations[2] * (kernel\_size[2] - 1) + 1))}{strides[2]} + 1
Raises: Raises:
ValueError: If the shapes of input, filter_size, stride, padding and ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch. groups mismatch.
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np import numpy as np
import paddle import paddle
...@@ -936,17 +985,22 @@ class ConvTranspose3d(_ConvNd): ...@@ -936,17 +985,22 @@ class ConvTranspose3d(_ConvNd):
the output of the convolution, and the corresponding activation function the output of the convolution, and the corresponding activation function
is applied to the final result. is applied to the final result.
For each input :math:`X`, the equation is: For each input :math:`X`, the equation is:
.. math:: .. math::
Out = \sigma (W \\ast X + b) Out = \sigma (W \\ast X + b)
In the above equation: In the above equation:
* :math:`X`: Input value, a tensor with NCDHW format. * :math:`X`: Input value, a tensor with NCDHW format.
* :math:`W`: Filter value, a tensor with MCDHW format. * :math:`W`: Filter value, a tensor with MCDHW format.
* :math:`\\ast`: Convolution operation. * :math:`\\ast`: Convolution operation.
* :math:`b`: Bias value, a 2-D tensor with shape [M, 1]. * :math:`b`: Bias value, a 2-D tensor with shape [M, 1].
* :math:`\\sigma`: Activation function. * :math:`\\sigma`: Activation function.
* :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different.
Example:
**Note**: **Note**:
The conv_transpose3d can be seen as the backward of the conv3d. For conv3d, The conv_transpose3d can be seen as the backward of the conv3d. For conv3d,
when stride > 1, conv3d maps multiple input shape to the same output shape, when stride > 1, conv3d maps multiple input shape to the same output shape,
so for conv_transpose3d, when stride > 1, input shape maps multiple output shape. so for conv_transpose3d, when stride > 1, input shape maps multiple output shape.
...@@ -957,6 +1011,7 @@ class ConvTranspose3d(_ConvNd): ...@@ -957,6 +1011,7 @@ class ConvTranspose3d(_ConvNd):
and :math:`H^\prime_{out} + strides[1]`, and the :math:`W_{out}` of the output size must and :math:`H^\prime_{out} + strides[1]`, and the :math:`W_{out}` of the output size must
between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[2]`, between :math:`W^\prime_{out}` and :math:`W^\prime_{out} + strides[2]`,
conv_transpose3d can compute the kernel size automatically. conv_transpose3d can compute the kernel size automatically.
Parameters: Parameters:
in_channels(int): The number of channels in the input image. in_channels(int): The number of channels in the input image.
out_channels(int): The number of channels produced by the convolution. out_channels(int): The number of channels produced by the convolution.
...@@ -985,11 +1040,11 @@ class ConvTranspose3d(_ConvNd): ...@@ -985,11 +1040,11 @@ class ConvTranspose3d(_ConvNd):
first half of the input channels, while the second half of the first half of the input channels, while the second half of the
filters is only connected to the second half of the input channels. filters is only connected to the second half of the input channels.
The default value is 1. The default value is 1.
weight_attr (ParamAttr, optional): The parameter attribute for learnable parameters/weights weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights
of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose of conv3d_transpose. If it is set to None or one attribute of ParamAttr, conv3d_transpose
will create ParamAttr as param_attr. If the Initializer of the param_attr will create ParamAttr as param_attr. If the Initializer of the param_attr
is not set, the parameter is initialized with Xavier. The default value is None. is not set, the parameter is initialized with Xavier. The default value is None.
bias_attr (ParamAttr|bool, optional): The parameter attribute for the bias of conv3d_transpose. bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv3d_transpose.
If it is set to False, no bias will be added to the output units. If it is set to False, no bias will be added to the output units.
If it is set to None or one attribute of ParamAttr, conv3d_transpose If it is set to None or one attribute of ParamAttr, conv3d_transpose
will create ParamAttr as bias_attr. If the Initializer of the bias_attr will create ParamAttr as bias_attr. If the Initializer of the bias_attr
...@@ -999,24 +1054,38 @@ class ConvTranspose3d(_ConvNd): ...@@ -999,24 +1054,38 @@ class ConvTranspose3d(_ConvNd):
filter_size, padding, and stride to calculate output_size. filter_size, padding, and stride to calculate output_size.
if output_size and filter_size are specified at the same time, They if output_size and filter_size are specified at the same time, They
should follow the formula above. Default: None. should follow the formula above. Default: None.
data_format (str, optional): Data format that specifies the layout of input. data_format(str, optional): Data format that specifies the layout of input.
It can be "NCDHW" or "NDHWC". Default: "NCDHW". It can be "NCDHW" or "NDHWC". Default: "NCDHW".
Attribute: Attribute:
**weight** (Parameter): the learnable weights of filters of this layer. **weight** (Parameter): the learnable weights of filters of this layer.
**bias** (Parameter): the learnable bias of this layer. **bias** (Parameter): the learnable bias of this layer.
Shape: Shape:
- x: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})` - x: :math:`(N, C_{in}, D_{in}, H_{in}, W_{in})`
- output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})` - output: :math:`(N, C_{out}, D_{out}, H_{out}, W_{out})`
Where Where
.. math:: .. math::
D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel_size[0] - 1) + 1 \\\\
H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel_size[1] - 1) + 1 \\\\ D^\prime_{out} &= (D_{in} - 1) * strides[0] - 2 * paddings[0] + dilations[0] * (kernel\_size[0] - 1) + 1
W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (kernel_size[2] - 1) + 1 \\\\
H^\prime_{out} &= (H_{in} - 1) * strides[1] - 2 * paddings[1] + dilations[1] * (kernel\_size[1] - 1) + 1
W^\prime_{out} &= (W_{in} - 1) * strides[2] - 2 * paddings[2] + dilations[2] * (kernel\_size[2] - 1) + 1
Raises: Raises:
ValueError: If the shapes of input, filter_size, stride, padding and ValueError: If the shapes of input, filter_size, stride, padding and
groups mismatch. groups mismatch.
Examples: Examples:
.. code-block:: python .. code-block:: python
import numpy as np import numpy as np
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
...@@ -1024,7 +1093,7 @@ class ConvTranspose3d(_ConvNd): ...@@ -1024,7 +1093,7 @@ class ConvTranspose3d(_ConvNd):
paddle.disable_static() paddle.disable_static()
x_var = paddle.to_tensor(x) x_var = paddle.to_tensor(x)
conv = nn.Conv3DTranspose(4, 6, (3, 3, 3)) conv = nn.ConvTranspose3d(4, 6, (3, 3, 3))
y_var = conv(x_var) y_var = conv(x_var)
y_np = y_var.numpy() y_np = y_var.numpy()
print(y_np.shape) print(y_np.shape)
......
...@@ -634,9 +634,12 @@ class KLDivLoss(fluid.dygraph.Layer): ...@@ -634,9 +634,12 @@ class KLDivLoss(fluid.dygraph.Layer):
Default is ``'mean'``. Default is ``'mean'``.
Shape: Shape:
- input: (N, *) where * means, any number of additional dimensions.
- label: (N, *), same shape as input - input (Tensor): (N, *), where * means, any number of additional dimensions.
- output: tensor with shape: (1) by default.
- label (Tensor): (N, *), same shape as input.
- output (Tensor): tensor with shape: [1] by default.
Examples: Examples:
...@@ -646,7 +649,7 @@ class KLDivLoss(fluid.dygraph.Layer): ...@@ -646,7 +649,7 @@ class KLDivLoss(fluid.dygraph.Layer):
import numpy as np import numpy as np
import paddle.nn as nn import paddle.nn as nn
paddle.enable_imperative() paddle.disable_static()
shape = (5, 20) shape = (5, 20)
x = np.random.uniform(-10, 10, shape).astype('float32') x = np.random.uniform(-10, 10, shape).astype('float32')
...@@ -654,26 +657,26 @@ class KLDivLoss(fluid.dygraph.Layer): ...@@ -654,26 +657,26 @@ class KLDivLoss(fluid.dygraph.Layer):
# 'batchmean' reduction, loss shape will be [N] # 'batchmean' reduction, loss shape will be [N]
kldiv_criterion = nn.KLDivLoss(reduction='batchmean') kldiv_criterion = nn.KLDivLoss(reduction='batchmean')
pred_loss = kldiv_criterion(paddle.to_variable(x), pred_loss = kldiv_criterion(paddle.to_tensor(x),
paddle.to_variable(target)) paddle.to_tensor(target))
# shape=[5] # shape=[5]
# 'mean' reduction, loss shape will be [1] # 'mean' reduction, loss shape will be [1]
kldiv_criterion = nn.KLDivLoss(reduction='mean') kldiv_criterion = nn.KLDivLoss(reduction='mean')
pred_loss = kldiv_criterion(paddle.to_variable(x), pred_loss = kldiv_criterion(paddle.to_tensor(x),
paddle.to_variable(target)) paddle.to_tensor(target))
# shape=[1] # shape=[1]
# 'sum' reduction, loss shape will be [1] # 'sum' reduction, loss shape will be [1]
kldiv_criterion = nn.KLDivLoss(reduction='sum') kldiv_criterion = nn.KLDivLoss(reduction='sum')
pred_loss = kldiv_criterion(paddle.to_variable(x), pred_loss = kldiv_criterion(paddle.to_tensor(x),
paddle.to_variable(target)) paddle.to_tensor(target))
# shape=[1] # shape=[1]
# 'none' reduction, loss shape is same with X shape # 'none' reduction, loss shape is same with X shape
kldiv_criterion = nn.KLDivLoss(reduction='none') kldiv_criterion = nn.KLDivLoss(reduction='none')
pred_loss = kldiv_criterion(paddle.to_variable(x), pred_loss = kldiv_criterion(paddle.to_tensor(x),
paddle.to_variable(target)) paddle.to_tensor(target))
# shape=[5, 20] # shape=[5, 20]
""" """
......
...@@ -27,6 +27,7 @@ ...@@ -27,6 +27,7 @@
# TODO: define normalization api # TODO: define normalization api
import six
from ...fluid.dygraph.nn import InstanceNorm from ...fluid.dygraph.nn import InstanceNorm
from ...fluid.dygraph import BatchNorm #DEFINE_ALIAS from ...fluid.dygraph import BatchNorm #DEFINE_ALIAS
...@@ -36,7 +37,6 @@ from ...fluid.dygraph import BatchNorm #DEFINE_ALIAS ...@@ -36,7 +37,6 @@ from ...fluid.dygraph import BatchNorm #DEFINE_ALIAS
from ...fluid.dygraph import SpectralNorm #DEFINE_ALIAS from ...fluid.dygraph import SpectralNorm #DEFINE_ALIAS
from ...fluid.dygraph import layers from ...fluid.dygraph import layers
from ...framework import get_default_dtype, set_default_dtype from ...framework import get_default_dtype, set_default_dtype
from ...fluid.framework import in_dygraph_mode from ...fluid.framework import in_dygraph_mode
...@@ -50,6 +50,7 @@ from ..functional import batch_norm, layer_norm, instance_norm ...@@ -50,6 +50,7 @@ from ..functional import batch_norm, layer_norm, instance_norm
import numpy as np import numpy as np
import numbers import numbers
import warnings import warnings
from ...fluid.dygraph.base import no_grad
__all__ = [ __all__ = [
'BatchNorm', 'GroupNorm', 'LayerNorm', 'SpectralNorm', 'InstanceNorm', 'BatchNorm', 'GroupNorm', 'LayerNorm', 'SpectralNorm', 'InstanceNorm',
...@@ -566,17 +567,28 @@ class _BatchNormBase(layers.Layer): ...@@ -566,17 +567,28 @@ class _BatchNormBase(layers.Layer):
param_shape = [num_features] param_shape = [num_features]
# create parameter # create parameter
if weight_attr == False:
self.weight = self.create_parameter(
attr=None, shape=param_shape, default_initializer=Constant(1.0))
self.weight.stop_gradient = True
else:
self.weight = self.create_parameter( self.weight = self.create_parameter(
attr=self._weight_attr, attr=self._weight_attr,
shape=param_shape, shape=param_shape,
default_initializer=Constant(1.0)) default_initializer=Constant(1.0))
self.weight.stop_gradient = (self._weight_attr is False) or ( self.weight.stop_gradient = self._weight_attr != None and self._weight_attr.learning_rate == 0.
self._weight_attr and self._weight_attr.learning_rate == 0.)
if bias_attr == False:
self.bias = self.create_parameter(
attr=None,
shape=param_shape,
default_initializer=Constant(0.0),
is_bias=True)
self.bias.stop_gradient = True
else:
self.bias = self.create_parameter( self.bias = self.create_parameter(
attr=self._bias_attr, shape=param_shape, is_bias=True) attr=self._bias_attr, shape=param_shape, is_bias=True)
self.bias.stop_gradient = (self._bias_attr is False) or ( self.bias.stop_gradient = self._bias_attr != None and self._bias_attr.learning_rate == 0.
self._bias_attr and self._bias_attr.learning_rate == 0.)
moving_mean_name = None moving_mean_name = None
moving_variance_name = None moving_variance_name = None
...@@ -611,6 +623,7 @@ class _BatchNormBase(layers.Layer): ...@@ -611,6 +623,7 @@ class _BatchNormBase(layers.Layer):
self._epsilon = epsilon self._epsilon = epsilon
self._fuse_with_relu = False self._fuse_with_relu = False
self._track_running_stats = track_running_stats self._track_running_stats = track_running_stats
self._name = name
def _check_input_dim(self, input): def _check_input_dim(self, input):
raise NotImplementedError("BatchNorm Base error") raise NotImplementedError("BatchNorm Base error")
...@@ -898,7 +911,7 @@ class BatchNorm3d(_BatchNormBase): ...@@ -898,7 +911,7 @@ class BatchNorm3d(_BatchNormBase):
len(input.shape))) len(input.shape)))
class SyncBatchNorm(layers.Layer): class SyncBatchNorm(_BatchNormBase):
""" """
This interface is used to construct a callable object of the ``SyncBatchNorm`` class. This interface is used to construct a callable object of the ``SyncBatchNorm`` class.
It implements the function of the Cross-GPU Synchronized Batch Normalization Layer, and can It implements the function of the Cross-GPU Synchronized Batch Normalization Layer, and can
...@@ -984,72 +997,16 @@ class SyncBatchNorm(layers.Layer): ...@@ -984,72 +997,16 @@ class SyncBatchNorm(layers.Layer):
def __init__(self, def __init__(self,
num_features, num_features,
epsilon=1e-05,
momentum=0.9, momentum=0.9,
track_running_stats=True, epsilon=1e-05,
weight_attr=None, weight_attr=None,
bias_attr=None, bias_attr=None,
data_format='NCHW', data_format='NCHW',
track_running_stats=True,
name=None): name=None):
super(SyncBatchNorm, self).__init__() super(SyncBatchNorm,
self._weight_attr = weight_attr self).__init__(num_features, momentum, epsilon, weight_attr,
self._bias_attr = bias_attr bias_attr, data_format, track_running_stats, name)
self._num_features = num_features
self._data_layout = data_format
self._momentum = momentum
self._epsilon = epsilon
self._track_running_stats = track_running_stats
if self._track_running_stats == False:
warnings.warn(
"moving mean and moving variance will be calculated whether `track_running_stats` is set to `True` or `False`, we will fix it in the next version."
)
param_shape = [self._num_features]
# create parameter
if weight_attr == False:
self.weight = self.create_parameter(
attr=None, shape=param_shape, default_initializer=Constant(1.0))
self.weight.stop_gradient = True
else:
self.weight = self.create_parameter(
attr=self._weight_attr,
shape=param_shape,
default_initializer=Constant(1.0))
self.weight.stop_gradient = self._weight_attr != None and self._weight_attr.learning_rate == 0.
if bias_attr == False:
self.bias = self.create_parameter(
attr=None,
shape=param_shape,
default_initializer=Constant(0.0),
is_bias=True)
self.bias.stop_gradient = True
else:
self.bias = self.create_parameter(
attr=self._bias_attr, shape=param_shape, is_bias=True)
self.bias.stop_gradient = self._weight_attr != None and self._weight_attr.learning_rate == 0.
self._mean = self.create_parameter(
attr=ParamAttr(
name=None,
initializer=Constant(0.0),
trainable=False,
do_model_average=True),
shape=param_shape,
dtype=self._dtype)
self._mean.stop_gradient = True
self._variance = self.create_parameter(
attr=ParamAttr(
name=None,
initializer=Constant(1.0),
trainable=False,
do_model_average=True),
shape=param_shape,
dtype=self._dtype)
self._variance.stop_gradient = True
def forward(self, x): def forward(self, x):
# create output # create output
...@@ -1063,7 +1020,7 @@ class SyncBatchNorm(layers.Layer): ...@@ -1063,7 +1020,7 @@ class SyncBatchNorm(layers.Layer):
if in_dygraph_mode(): if in_dygraph_mode():
attrs = ("momentum", self._momentum, "epsilon", self._epsilon, attrs = ("momentum", self._momentum, "epsilon", self._epsilon,
"is_test", not self.training, "data_layout", "is_test", not self.training, "data_layout",
self._data_layout, "use_mkldnn", False, "fuse_with_relu", self._data_format, "use_mkldnn", False, "fuse_with_relu",
False, "use_global_stats", False, 'trainable_statistics', False, "use_global_stats", False, 'trainable_statistics',
False) False)
sync_batch_norm_out, _, _, _, _, _ = core.ops.sync_batch_norm( sync_batch_norm_out, _, _, _, _, _ = core.ops.sync_batch_norm(
...@@ -1073,13 +1030,13 @@ class SyncBatchNorm(layers.Layer): ...@@ -1073,13 +1030,13 @@ class SyncBatchNorm(layers.Layer):
return sync_batch_norm_out return sync_batch_norm_out
check_variable_and_dtype(x, 'input', ['float16', 'float32', 'float64'], check_variable_and_dtype(x, 'input', ['float16', 'float32', 'float64'],
'BatchNorm') 'SyncBatchNorm')
attrs = { attrs = {
"momentum": self._momentum, "momentum": self._momentum,
"epsilon": self._epsilon, "epsilon": self._epsilon,
"is_test": not self.training, "is_test": not self.training,
"data_layout": self._data_layout, "data_layout": self._data_format,
"use_mkldnn": False, "use_mkldnn": False,
"fuse_with_relu": False, "fuse_with_relu": False,
"use_global_stats": False, "use_global_stats": False,
...@@ -1112,3 +1069,45 @@ class SyncBatchNorm(layers.Layer): ...@@ -1112,3 +1069,45 @@ class SyncBatchNorm(layers.Layer):
self._helper.append_op( self._helper.append_op(
type="sync_batch_norm", inputs=inputs, outputs=outputs, attrs=attrs) type="sync_batch_norm", inputs=inputs, outputs=outputs, attrs=attrs)
return sync_batch_norm_out return sync_batch_norm_out
@classmethod
def convert_sync_batchnorm(cls, layer):
"""
Helper function to convert :class: `paddle.nn.BatchNorm*d` layers in the model to :class: `paddle.nn.SyncBatchNorm` layers.
Parameters:
layer(paddle.nn.Layer): model containing one or more `BatchNorm*d` layers.
Returns:
The original model with converted SyncBatchNorm layers. If BatchNorm*d layer in the model, use SyncBatchNorm layer instead.
Examples:
.. code-block:: python
import paddle
import paddle.nn as nn
paddle.disable_static()
model = nn.Sequential(nn.Conv2d(3, 5, 3), nn.BatchNorm2d(5))
sync_model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
"""
layer_output = layer
if isinstance(layer, _BatchNormBase):
layer_output = SyncBatchNorm(layer._num_features, layer._epsilon,
layer._momentum, layer._weight_attr,
layer._bias_attr, layer._data_format,
layer._name)
if layer._weight_attr != False and layer._bias_attr != False:
with no_grad():
layer_output.weight = layer.weight
layer_output.bias = layer.bias
layer_output._mean = layer._mean
layer_output._variance = layer._variance
for name, sublayer in layer.named_sublayers():
layer_output.add_sublayer(name,
cls.convert_sync_batchnorm(sublayer))
del layer
return layer_output
...@@ -12,198 +12,26 @@ ...@@ -12,198 +12,26 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle
from ...fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype
from ...fluid.layers import utils
from ...fluid.dygraph import layers from ...fluid.dygraph import layers
from ...fluid.layer_helper import LayerHelper from ...fluid.layer_helper import LayerHelper
from .. import functional as F from .. import functional as F
__all__ = [ __all__ = [
'AdaptiveAvgPool2d',
'AdaptiveAvgPool3d',
'AvgPool1d', 'AvgPool1d',
'maxPool1d',
'AdaptiveMaxPool1d',
'AdaptiveAvgPool1d',
'AvgPool2d', 'AvgPool2d',
'MaxPool2d',
'AvgPool3d', 'AvgPool3d',
'MaxPool1d',
'MaxPool2d',
'MaxPool3d', 'MaxPool3d',
'AdaptiveAvgPool1d',
'AdaptiveAvgPool2d',
'AdaptiveAvgPool3d',
'AdaptiveMaxPool1d',
'AdaptiveMaxPool2d',
'AdaptiveMaxPool3d',
] ]
class AdaptiveAvgPool2d(layers.Layer):
"""
This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size.
For avg adaptive pool2d:
.. math::
hstart &= floor(i * H_{in} / H_{out})
hend &= ceil((i + 1) * H_{in} / H_{out})
wstart &= floor(j * W_{in} / W_{out})
wend &= ceil((j + 1) * W_{in} / W_{out})
Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
Parameters:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain two element, (H, W). H and W can be either a int, or None which means
the size will be the same as that of the input.
data_format (str): The data format of the input and output data. An optional string
from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in
the order of: [batch_size, input_channels, input_height, input_width].
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Shape:
x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type can be float32 or float64.
output (Tensor): The output tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type is same as input x.
Returns:
A callable object of AdaptiveAvgPool2d.
Examples:
.. code-block:: python
# adaptive avg pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive avg pool performs calculations as follow:
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
#
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 32, 32]
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=3)
pool_out = adaptive_avg_pool(x = x)
# pool_out.shape is [2, 3, 3, 3]
"""
def __init__(self, output_size, data_format="NCHW", name=None):
super(AdaptiveAvgPool2d, self).__init__()
self._output_size = output_size
self._data_format = data_format
self._name = name
def forward(self, x):
return F.adaptive_avg_pool2d(
x,
output_size=self._output_size,
data_format=self._data_format,
name=self._name)
class AdaptiveAvgPool3d(layers.Layer):
"""
This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size.
For avg adaptive pool3d:
.. math::
dstart &= floor(i * D_{in} / D_{out})
dend &= ceil((i + 1) * D_{in} / D_{out})
hstart &= floor(j * H_{in} / H_{out})
hend &= ceil((j + 1) * H_{in} / H_{out})
wstart &= floor(k * W_{in} / W_{out})
wend &= ceil((k + 1) * W_{in} / W_{out})
Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
Parameters:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means
the size will be the same as that of the input.
data_format (str): The data format of the input and output data. An optional string
from: "NCDHW", "NDHWC". The default is "NCDHW". When it is "NCDHW", the data is stored in
the order of: [batch_size, input_channels, input_depth, input_height, input_width].
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Shape:
x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type can be float32 or float64.
output (Tensor): The output tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type is same as input x.
Returns:
A callable object of AdaptiveAvgPool3d.
Examples:
.. code-block:: python
# adaptive avg pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into l * m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive avg pool performs calculations as follow:
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
# dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l)
# hstart = floor(j * H / m)
# hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] =
# avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 8, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 8, 32, 32]
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d(output_size=3)
pool_out = adaptive_avg_pool(x = x)
# pool_out = [2, 3, 3, 3, 3]
"""
def __init__(self, output_size, data_format="NCDHW", name=None):
super(AdaptiveAvgPool3d, self).__init__()
self._output_size = output_size
self._data_format = data_format
self._name = name
def forward(self, x):
return F.adaptive_avg_pool3d(
x,
output_size=self._output_size,
data_format=self._data_format,
name=self._name)
class AvgPool1d(layers.Layer): class AvgPool1d(layers.Layer):
""" """
This operation applies a 1D average pooling over an input signal composed This operation applies a 1D average pooling over an input signal composed
...@@ -223,17 +51,20 @@ class AvgPool1d(layers.Layer): ...@@ -223,17 +51,20 @@ class AvgPool1d(layers.Layer):
Args: Args:
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one integers. it must contain an integer.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain one integers. it must contain an integer.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
'SAME' which is the padding algorithm. If pool padding size is a tuple or list, 1. A string in ['valid', 'same'].
it could be the following forms: `[pad_left, pad_right]`. If padding is non-zero, 2. An int, which means the feature map is zero padded by size of `padding` on every sides.
then the input is implicitly zero-padded on both sides for padding number of points. 3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
count_include_pad (bool): Whether to exclude padding points in average pooling count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is `true`. mode, default is `True`.
ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width. ceil_mode (bool): ${ceil_mode_comment}Whether to use the ceil function to calculate output height and width.
If it is set to False, the floor function will be used. Default False If it is set to False, the floor function will be used. The default value is False.
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
...@@ -245,10 +76,14 @@ class AvgPool1d(layers.Layer): ...@@ -245,10 +76,14 @@ class AvgPool1d(layers.Layer):
ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True. ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ValueError: If `padding` is a list or tuple but its length greater than 1. ValueError: If `padding` is a list or tuple but its length greater than 1.
ShapeError: If the input is not a 3-D. ShapeError: If the input is not a 3-D tensor.
ShapeError: If the output's shape calculated is not greater than 0. ShapeError: If the output's shape calculated is not greater than 0.
Shape:
- inpuut: 3-D tensor.
- output: 3-D tensor
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -284,63 +119,74 @@ class AvgPool1d(layers.Layer): ...@@ -284,63 +119,74 @@ class AvgPool1d(layers.Layer):
return out return out
class MaxPool1d(layers.Layer): class AvgPool2d(layers.Layer):
""" """
Applies a 1D max pooling over an input signal composed of several input planes based This operation applies 2D average pooling over input features based on the input,
on the input, output_size, return_indices parameters. and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
Input(X) and output(Out) are in NCL format, where N is batch in NCHW format, where N is batch size, C is the number of channels,
size, C is the number of channels, L is the length of the feature. H is the height of the feature, and W is the width of the feature.
The output value of the layer with input size (N, C, L),
output (N, C, L_{out}) and kernel_size k can be precisely described as
For average pool1d:
.. math:: Example:
Input:
X shape: $(N, C, H_{in}, W_{in})$
Attr:
kernel_size: ksize
Output(N_i, C_i, l) &= max(Input[N_i, C_i, stride \times l:stride \times l+k])} Output:
Out shape: $(N, C, H_{out}, W_{out})$
$$
out(N_i, C_j, h, w) = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1}
input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
$$
Args: Args:
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one integers. it must contain two integers, (pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be a square of an int.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain one integers. it must contain two integers, (pool_stride_Height, pool_stride_Width).
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or Otherwise, the pool stride size will be a square of an int.
'SAME' which is the padding algorithm. If pool padding size is a tuple or list,
it could be the following forms: `[pad_left, pad_right]`. padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
return_indices (bool): Whether return the max indices along with the outputs. default is `False`. 1. A string in ['valid', 'same'].
ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default. 2. An int, which means the feature map is zero padded by size of `padding` on every sides.
If it is set to False, the floor function will be used. Default False 3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is `true`.
divisor_override (float): if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: Shape:
None. - x: 4-D tensor.
- out: 2-D tensor
Returns: None.
Raises: Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True. ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ValueError: If `padding` is a list or tuple but its length greater than 1.
ShapeError: If the input is not a 3-D.
ShapeError: If the output's shape calculated is not greater than 0. ShapeError: If the output's shape calculated is not greater than 0.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import numpy as np
paddle.disable_static() paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) # max pool2d
MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0) input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
pool_out = MaxPool1d(data) AvgPool2d = nn.AvgPool2d(kernel_size=2,
# pool_out shape: [1, 3, 16] stride=2, padding=0)
output = AvgPoo2d(input)
MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0, return_indices=True) # output.shape [1, 3, 16, 16]
pool_out, indices = MaxPool1d(data)
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
""" """
...@@ -348,113 +194,155 @@ class MaxPool1d(layers.Layer): ...@@ -348,113 +194,155 @@ class MaxPool1d(layers.Layer):
kernel_size, kernel_size,
stride=None, stride=None,
padding=0, padding=0,
return_indices=False,
ceil_mode=False, ceil_mode=False,
count_include_pad=True,
divisor_override=None,
data_format="NCHW",
name=None): name=None):
super(MaxPool1d, self).__init__() super(AvgPool2d, self).__init__()
self.kernel_size = kernel_size self.ksize = kernel_size
self.stride = stride self.stride = stride
self.padding = padding self.padding = padding
self.ceil_mode = ceil_mode self.ceil_mode = ceil_mode
self.return_indices = return_indices self.count_include_pad = count_include_pad
self.name = name self.divisor = divisor_override
self.data_format = data_format
self.name = name
def forward(self, input): def forward(self, x):
out = F.max_pool1d(input, self.kernel_size, self.stride, self.padding, return F.avg_pool2d(
self.return_indices, self.ceil_mode, self.name) x,
return out kernel_size=self.ksize,
stride=self.stride,
padding=self.padding,
ceil_mode=self.ceil_mode,
count_include_pad=self.count_include_pad,
divisor_override=self.divisor,
data_format=self.data_format,
name=self.name)
class AdaptiveAvgPool1d(layers.Layer): class AvgPool3d(layers.Layer):
""" """
This operation applies 3D max pooling over input features based on the input,
This operation applies a 1D adaptive average pooling over an input signal composed and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
of several input planes, based on the input, output_size, return_indices parameters. in NCDHW format, where N is batch size, C is the number of channels,
Input(X) and output(Out) are in NCL format, where N is batch H is the height of the feature, D is the depth of the feature, and W is the width of the feature.
size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
For average adaptive pool1d:
.. math::
lstart &= floor(i * L_{in} / L_{out})
lend &= ceil((i + 1) * L_{in} / L_{out})
Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)}
Args: Args:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, kernel_size (int|list|tuple): The pool kernel size. If pool kernel size
it must contain one int. is a tuple or list, it must contain three integers,
(kernel_size_Depth, kernel_size_Height, kernel_size_Width).
Otherwise, the pool kernel size will be the cube of an int.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain three integers, [stride_Depth, stride_Height, stride_Width).
Otherwise, the pool stride size will be a cube of an int.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An int, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
ceil_mode (bool): ${ceil_mode_comment}
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is True.
divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_depth, input_height, input_width]`.
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: Returns: None.
None.
Raises: Raises:
ValueError: 'pool_size' should be a integer or list or tuple with length as 1. ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0.
Shape:
- x: 5-D tensor.
- out: 5-D tensor.
Examples: Examples:
.. code-block:: python .. code-block:: python
# average adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
#
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import numpy as np
paddle.disable_static() paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) # avg pool3d
AdaptiveAvgPool1d = nn.AdaptiveAvgPool1d(output_size=16) input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
pool_out = AdaptiveAvgPool1d(data) AvgPool3d = nn.AvgPool3d(kernel_size=2,
# pool_out shape: [1, 3, 16] stride=2, padding=0)
output = AvgPool3d(input)
# output.shape [1, 2, 3, 16, 16]
""" """
def __init__(self, output_size, name=None): def __init__(self,
super(AdaptiveAvgPool1d, self).__init__() kernel_size,
self.output_size = output_size stride,
padding=0,
ceil_mode=False,
count_include_pad=True,
divisor_override=None,
data_format="NCDHW",
name=None):
super(AvgPool3d, self).__init__()
self.ksize = kernel_size
self.stride = stride
self.padding = padding
self.ceil_mode = ceil_mode
self.count_include_pad = count_include_pad
self.divisor = divisor_override
self.data_format = data_format
self.name = name self.name = name
def forward(self, input): def forward(self, x):
return F.adaptive_avg_pool1d(input, self.output_size, self.name) return F.avg_pool3d(
x,
kernel_size=self.ksize,
stride=self.stride,
padding=self.padding,
ceil_mode=self.ceil_mode,
count_include_pad=self.count_include_pad,
divisor_override=self.divisor,
data_format=self.data_format,
name=self.name)
class AdaptiveMaxPool1d(layers.Layer): class MaxPool1d(layers.Layer):
""" """
Applies a 1D max pooling over an input signal composed of several input planes based
This operation applies a 1D adaptive max pooling over an input signal composed on the input, output_size, return_indices parameters.
of several input planes, based on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature. size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
For max adaptive pool1d: The output value of the layer with input size (N, C, L),
output (N, C, L_{out}) and kernel_size k can be precisely described as
For average pool1d:
.. math:: .. math::
lstart &= floor(i * L_{in} / L_{out}) Output(N_i, C_i, l) &= max(Input[N_i, C_i, stride \times l:stride \times l+k])}
lend &= ceil((i + 1) * L_{in} / L_{out})
Output(i) &= max(Input[lstart:lend])}
Args: Args:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one int. it must contain an integer.
return_indices (bool): If true, the index of max pooling point will be returned along stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
with outputs. It cannot be set in average pooling type. Default False. it must contain an integer.
padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
1. A string in ['valid', 'same'].
2. An integer, which means the feature map is zero padded by size of `padding` on every sides.
3. A list[int] or tuple(int) whose length is 1, which means the feature map is zero padded by the size of `padding[0]` on every sides.
4. A list[int] or tuple(int) whose length is 2. It has the form [pad_before, pad_after].
5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
The default value is 0.
return_indices (bool): Whether return the max indices along with the outputs. default is `False`.
ceil_mode (bool): Whether to use the ceil function to calculate output height and width. False is the default.
If it is set to False, the floor function will be used. Default False.
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
...@@ -462,53 +350,60 @@ class AdaptiveMaxPool1d(layers.Layer): ...@@ -462,53 +350,60 @@ class AdaptiveMaxPool1d(layers.Layer):
None. None.
Raises: Raises:
ValueError: 'pool_size' should be a integer or list or tuple with length as 1. ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ValueError: If `padding` is a list or tuple but its length greater than 1.
ShapeError: If the input is not a 3-D.
ShapeError: If the output's shape calculated is not greater than 0.
Shape:
- x: 3-D tensor.
- out: 3-D tensor.
Examples: Examples:
.. code-block:: python .. code-block:: python
# max adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = max(input[:, :, lstart: lend])
#
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
paddle.disable_static() paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32)) data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16) MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0)
pool_out = AdaptiveMaxPool1d(data) pool_out = MaxPool1d(data)
# pool_out shape: [1, 3, 16] # pool_out shape: [1, 3, 16]
# for return_indices = true MaxPool1d = nn.MaxPool1d(kernel_size=2, stride=2, padding=0, return_indices=True)
AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16, return_indices=True) pool_out, indices = MaxPool1d(data)
pool_out, indices = AdaptiveMaxPool1d(data)
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16] # pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
""" """
def __init__(self, output_size, return_indices=False, name=None): def __init__(self,
super(AdaptiveMaxPool1d, self).__init__() kernel_size,
self.output_size = output_size stride=None,
padding=0,
return_indices=False,
ceil_mode=False,
name=None):
super(MaxPool1d, self).__init__()
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.ceil_mode = ceil_mode
self.return_indices = return_indices self.return_indices = return_indices
self.name = name self.name = name
def forward(self, input): def forward(self, input):
return F.adaptive_max_pool1d(input, self.output_size, out = F.max_pool1d(input, self.kernel_size, self.stride, self.padding,
self.return_indices, self.name) self.return_indices, self.ceil_mode, self.name)
return out
class AvgPool2d(layers.Layer): class MaxPool2d(layers.Layer):
""" """
This operation applies 2D average pooling over input features based on the input, This operation applies 2D max pooling over input feature based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCHW format, where N is batch size, C is the number of channels, in NCHW format, where N is batch size, C is the number of channels,
H is the height of the feature, and W is the width of the feature. H is the height of the feature, and W is the width of the feature.
...@@ -522,8 +417,9 @@ class AvgPool2d(layers.Layer): ...@@ -522,8 +417,9 @@ class AvgPool2d(layers.Layer):
Output: Output:
Out shape: $(N, C, H_{out}, W_{out})$ Out shape: $(N, C, H_{out}, W_{out})$
$$ $$
out(N_i, C_j, h, w) = \frac{1}{ksize[0] * ksize[1]} \sum_{m=0}^{ksize[0]-1} \sum_{n=0}^{ksize[1]-1} out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\
input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n) & \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
\text{stride[1]} \times w + n)
$$ $$
Args: Args:
...@@ -532,31 +428,33 @@ class AvgPool2d(layers.Layer): ...@@ -532,31 +428,33 @@ class AvgPool2d(layers.Layer):
Otherwise, the pool kernel size will be a square of an int. Otherwise, the pool kernel size will be a square of an int.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain two integers, (pool_stride_Height, pool_stride_Width). it must contain two integers, (pool_stride_Height, pool_stride_Width).
Otherwise, the pool stride size will be a square of an int. Default: kernel_size. Otherwise, the pool stride size will be a square of an int.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
'SAME' which is the padding algorithm. If pool padding size is a tuple or list, 1. A string in ['valid', 'same'].
it could be in three forms: `[pad_height, pad_width]` or 2. An int, which means the feature map is zero padded by size of `padding` on every sides.
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, 3. A list[int] or tuple(int) whose length is 2, [pad_height, pad_weight] whose value means the padding size of each dimension.
`pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. 4. A list[int] or tuple(int) whose length is 4. [pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
when `data_format` is `"NHWC"`, `pool_padding` can be in the form 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
`[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. The default value is 0.
Otherwise, the pool padding size will be a square of an int.
ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
count_include_pad (bool): Whether to exclude padding points in average pooling return_indices (bool): Whether to return the max indices along with the outputs.
mode, default is `true`.
divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`. `[batch_size, input_channels, input_height, input_width]`.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns: None. Returns: None
Raises: Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True. ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0. ShapeError: If the output's shape calculated is not greater than 0.
Shape:
- x: 4-D tensor.
- out: 4-D tensor.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
...@@ -566,95 +464,87 @@ class AvgPool2d(layers.Layer): ...@@ -566,95 +464,87 @@ class AvgPool2d(layers.Layer):
# max pool2d # max pool2d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32))
AvgPool2d = nn.AvgPool2d(kernel_size=2, MaxPool2d = nn.MaxPool2d(kernel_size=2,
stride=2, padding=0) stride=2, padding=0)
output = AvgPoo2d(input) output = MaxPool2d(input)
# output.shape [1, 3, 16, 16] # output.shape [1, 3, 16, 16]
# for return_indices=True
MaxPool2d = nn.MaxPool2d(kernel_size=2,stride=2, padding=0, return_indices=True)
output, max_indices = MaxPool2d(input)
# output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16],
""" """
def __init__(self, def __init__(self,
kernel_size, kernel_size,
stride=None, stride=None,
padding=0, padding=0,
return_indices=False,
ceil_mode=False, ceil_mode=False,
count_include_pad=True,
divisor_override=None,
data_format="NCHW", data_format="NCHW",
name=None): name=None):
super(AvgPool2d, self).__init__() super(MaxPool2d, self).__init__()
self.ksize = kernel_size self.ksize = kernel_size
self.stride = stride self.stride = stride
self.padding = padding self.padding = padding
self.return_indices = return_indices
self.ceil_mode = ceil_mode self.ceil_mode = ceil_mode
self.count_include_pad = count_include_pad
self.divisor = divisor_override
self.data_format = data_format self.data_format = data_format
self.name = name self.name = name
def forward(self, x): def forward(self, x):
return F.avg_pool2d( return F.max_pool2d(
x, x,
kernel_size=self.ksize, kernel_size=self.ksize,
stride=self.stride, stride=self.stride,
padding=self.padding, padding=self.padding,
ceil_mode=self.ceil_mode, return_indices=self.return_indices,
count_include_pad=self.count_include_pad,
divisor_override=self.divisor,
data_format=self.data_format, data_format=self.data_format,
name=self.name) name=self.name)
class MaxPool2d(layers.Layer): class MaxPool3d(layers.Layer):
""" """
This operation applies 2D max pooling over input feature based on the input, This operation applies 3D max pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCHW format, where N is batch size, C is the number of channels, in NCDHW format, where N is batch size, C is the number of channels,
H is the height of the feature, and W is the width of the feature. H is the height of the feature, D is the depth of the feature, and W is the width of the feature.
Example:
Input:
X shape: $(N, C, H_{in}, W_{in})$
Attr:
kernel_size: ksize
Output:
Out shape: $(N, C, H_{out}, W_{out})$
$$
out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, ksize[0] -1} \max_{n=0, \ldots, ksize[1]-1} \\
& \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
\text{stride[1]} \times w + n)
$$
Args: Args:
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, kernel_size (int|list|tuple): The pool kernel size. If the kernel size
it must contain two integers, (pool_size_Height, pool_size_Width). is a tuple or list, it must contain three integers,
Otherwise, the pool kernel size will be a square of an int. (kernel_size_Depth, kernel_size_Height, kernel_size_Width).
Otherwise, the pool kernel size will be the cube of an int.
stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list, stride (int|list|tuple): The pool stride size. If pool stride size is a tuple or list,
it must contain two integers, (pool_stride_Height, pool_stride_Width). it must contain three integers, [stride_Depth, stride_Height, stride_Width).
Otherwise, the pool stride size will be a square of an int. Default: kernel_size. Otherwise, the pool stride size will be a cube of an int.
padding (string|int|list|tuple): The pool padding. If `pool_padding` is a string, either 'VALID' or padding (string|int|list|tuple): The padding size. Padding could be in one of the following forms.
'SAME' which is the padding algorithm. If pool padding size is a tuple or list, 1. A string in ['valid', 'same'].
it could be in three forms: `[pad_height, pad_width]` or 2. An int, which means the feature map is zero padded by size of `padding` on every sides.
`[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, 3. A list[int] or tuple(int) whose length is 3, [pad_depth, pad_height, pad_weight] whose value means the padding size of each dimension.
`pool_padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. 4. A list[int] or tuple(int) whose length is 6. [pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right] whose value means the padding size of each side.
when `data_format` is `"NHWC"`, `pool_padding` can be in the form 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0).
`[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. The default value is 0.
Otherwise, the pool padding size will be a square of an int. ceil_mode (bool): ${ceil_mode_comment}
ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape
return_indices (bool): Whether to return the max indices along with the outputs. return_indices (bool): Whether to return the max indices along with the outputs.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`. `[batch_size, input_channels, input_depth, input_height, input_width]`.
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: None
Returns:None.
Raises: Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: If `padding` is a string, but not "SAME" or "VALID".
ValueError: If `padding` is "VALID", but `ceil_mode` is True. ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0. ShapeError: If the output's shape calculated is not greater than 0.
Shape:
- x: 5-D tensor.
- out: 5-D tensor.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle import paddle
...@@ -662,28 +552,28 @@ class MaxPool2d(layers.Layer): ...@@ -662,28 +552,28 @@ class MaxPool2d(layers.Layer):
import numpy as np import numpy as np
paddle.disable_static() paddle.disable_static()
# max pool2d # max pool3d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32, 32]).astype(np.float32)) input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
MaxPool2d = nn.MaxPool2d(kernel_size=2, MaxPool3d = nn.MaxPool3d(kernel_size=2,
stride=2, padding=0) stride=2, padding=0)
output = MaxPool2d(input) output = MaxPool3d(input)
# output.shape [1, 3, 16, 16] # output.shape [1, 2, 3, 16, 16]
# for return_indices=True # for return_indices=True
MaxPool2d = nn.MaxPool2d(kernel_size=2,stride=2, padding=0, return_indices=True) MaxPool3d = nn.MaxPool3d(kernel_size=2,stride=2, padding=0, return_indices=True)
output, max_indices = MaxPool2d(input) output, max_indices = MaxPool3d(input)
# output.shape [1, 3, 16, 16], max_indices.shape [1, 3, 16, 16], # output.shape [1, 2, 3, 16, 16], max_indices.shape [1, 2, 3, 16, 16],
""" """
def __init__(self, def __init__(self,
kernel_size, kernel_size,
stride=None, stride,
padding=0, padding,
return_indices=False, return_indices=False,
ceil_mode=False, ceil_mode=False,
data_format="NCHW", data_format="NCDHW",
name=None): name=None):
super(MaxPool2d, self).__init__() super(MaxPool3d, self).__init__()
self.ksize = kernel_size self.ksize = kernel_size
self.stride = stride self.stride = stride
self.padding = padding self.padding = padding
...@@ -693,7 +583,7 @@ class MaxPool2d(layers.Layer): ...@@ -693,7 +583,7 @@ class MaxPool2d(layers.Layer):
self.name = name self.name = name
def forward(self, x): def forward(self, x):
return F.max_pool2d( return F.max_pool3d(
x, x,
kernel_size=self.ksize, kernel_size=self.ksize,
stride=self.stride, stride=self.stride,
...@@ -703,175 +593,457 @@ class MaxPool2d(layers.Layer): ...@@ -703,175 +593,457 @@ class MaxPool2d(layers.Layer):
name=self.name) name=self.name)
class MaxPool3d(layers.Layer): class AdaptiveAvgPool1d(layers.Layer):
""" """
This operation applies 3D max pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are This operation applies a 1D adaptive average pooling over an input signal composed
in NCDHW format, where N is batch size, C is the number of channels, of several input planes, based on the input, output_size, return_indices parameters.
H is the height of the feature, D is the depth of the feature, and W is the width of the feature. Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
For average adaptive pool1d:
.. math::
lstart &= floor(i * L_{in} / L_{out})
lend &= ceil((i + 1) * L_{in} / L_{out})
Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)}
Args: Args:
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
is a tuple or list, it must contain three integers, it must contain one int.
(pool_size_Depth, pool_size_Height, pool_size_Width).
Otherwise, the pool kernel size will be the cube of an int.
stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or
'SAME' which is the padding algorithm. If pool stride size is a tuple or list,
it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`.
Otherwise, the pool stride size will be a cube of an int. Default kernel_size.
padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list,
it could be in three forms: `[pad_depth, pad_height, pad_width]` or
`[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`,
and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
`[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`.
when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`.
ceil_mode (bool): when True, will use ceil instead of floor to compute the output shape.
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is True.
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`.
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of:
`[batch_size, input_channels, input_height, input_width]`.
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns:
None.
Returns:None.
Raises: Raises:
ValueError: If `padding` is a string, but not "SAME" or "VALID". ValueError: 'pool_size' should be a integer or list or tuple with length as 1.
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0. Shape:
- x: 3-D tensor.
- out: 3-D tensor.
Examples: Examples:
.. code-block:: python .. code-block:: python
# average adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = sum(input[:, :, lstart: lend])/(lstart - lend)
#
import paddle import paddle
import paddle.nn as nn import paddle.nn as nn
import numpy as np
paddle.disable_static() paddle.disable_static()
# max pool3d data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32)) AdaptiveAvgPool1d = nn.AdaptiveAvgPool1d(output_size=16)
MaxPool3d = nn.MaxPool3d(kernel_size=2, pool_out = AdaptiveAvgPool1d(data)
stride=2, padding=0) # pool_out shape: [1, 3, 16]
output = MaxPool3d(input)
# output.shape [1, 2, 3, 16, 16]
# for return_indices=True
MaxPool3d = nn.MaxPool3d(kernel_size=2,stride=2, padding=0, return_indices=True)
output, max_indices = MaxPool3d(input)
# output.shape [1, 2, 3, 16, 16], max_indices.shape [1, 2, 3, 16, 16],
""" """
def __init__(self, def __init__(self, output_size, name=None):
kernel_size, super(AdaptiveAvgPool1d, self).__init__()
stride, self.output_size = output_size
padding,
return_indices=False,
ceil_mode=False,
data_format="NCDHW",
name=None):
super(MaxPool3d, self).__init__()
self.ksize = kernel_size
self.stride = stride
self.padding = padding
self.return_indices = return_indices
self.ceil_mode = ceil_mode
self.data_format = data_format
self.name = name self.name = name
def forward(self, x): def forward(self, input):
return F.max_pool3d( return F.adaptive_avg_pool1d(input, self.output_size, self.name)
x,
kernel_size=self.ksize,
stride=self.stride,
padding=self.padding,
return_indices=self.return_indices,
data_format=self.data_format,
name=self.name)
class AvgPool3d(layers.Layer): class AdaptiveAvgPool2d(layers.Layer):
""" """
This operation applies 3D max pooling over input features based on the input,
and kernel_size, stride, padding parameters. Input(X) and Output(Out) are
in NCDHW format, where N is batch size, C is the number of channels,
H is the height of the feature, D is the depth of the feature, and W is the width of the feature.
Args: This operation applies 2D adaptive avg pooling on input tensor. The h and w dimensions
kernel_size (int|list|tuple): The pool kernel size. If pool kernel size of the output tensor are determined by the parameter output_size.
is a tuple or list, it must contain three integers,
(pool_size_Depth, pool_size_Height, pool_size_Width). For avg adaptive pool2d:
Otherwise, the pool kernel size will be the cube of an int.
stride (string|int|list|tuple)): The pool padding. If `pool_padding` is a string, either 'VALID' or .. math::
'SAME' which is the padding algorithm. If pool stride size is a tuple or list,
it must contain three integers, `[stride_Depth, stride_Height, stride_Width]`. hstart &= floor(i * H_{in} / H_{out})
Otherwise, the pool stride size will be a cube of an int.
padding (int|list|tuple): The pool padding size. If pool padding size is a tuple or list, hend &= ceil((i + 1) * H_{in} / H_{out})
it could be in three forms: `[pad_depth, pad_height, pad_width]` or
`[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, wstart &= floor(j * W_{in} / W_{out})
and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form
`[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. wend &= ceil((j + 1) * W_{in} / W_{out})
when `data_format` is `"NDHWC"`, `pool_padding` can be in the form
`[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Output(i ,j) &= \\frac{sum(Input[hstart:hend, wstart:wend])}{(hend - hstart) * (wend - wstart)}
ceil_mode (bool): ${ceil_mode_comment}
count_include_pad (bool): Whether to exclude padding points in average pooling
mode, default is True. Parameters:
divisor_override (int|float) if specified, it will be used as divisor, otherwise kernel_size will be used. Default None. output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NDHW"`. it must contain two element, (H, W). H and W can be either a int, or None which means
The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: the size will be the same as that of the input.
`[batch_size, input_channels, input_height, input_width]`. data_format (str): The data format of the input and output data. An optional string
from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in
the order of: [batch_size, input_channels, input_height, input_width].
name(str, optional): For detailed information, please refer name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and to :ref:`api_guide_Name`. Usually name is no need to set and
None by default. None by default.
Returns: None. Shape:
Raises: x (Tensor): The input tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type can be float32, float64.
ValueError: If `padding` is a string, but not "SAME" or "VALID". output (Tensor): The output tensor of adaptive avg pool2d operator, which is a 4-D tensor. The data type is same as input x.
ValueError: If `padding` is "VALID", but `ceil_mode` is True.
ShapeError: If the output's shape calculated is not greater than 0. Returns:
A callable object of AdaptiveAvgPool2d.
Examples: Examples:
.. code-block:: python .. code-block:: python
# adaptive avg pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive avg pool performs calculations as follow:
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = avg(input[:, :, hstart: hend, wstart: wend])
#
import paddle import paddle
import paddle.nn as nn
import numpy as np import numpy as np
paddle.disable_static() paddle.disable_static()
input_data = np.random.rand(2, 3, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 32, 32]
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool2d(output_size=3)
pool_out = adaptive_avg_pool(x = x)
# pool_out.shape is [2, 3, 3, 3]
"""
def __init__(self, output_size, data_format="NCHW", name=None):
super(AdaptiveAvgPool2d, self).__init__()
self._output_size = output_size
self._data_format = data_format
self._name = name
def forward(self, x):
return F.adaptive_avg_pool2d(
x,
output_size=self._output_size,
data_format=self._data_format,
name=self._name)
# avg pool3d
input = paddle.to_tensor(np.random.uniform(-1, 1, [1, 2, 3, 32, 32]).astype(np.float32))
AvgPool3d = nn.AvgPool3d(kernel_size=2,
stride=2, padding=0)
output = AvgPool3d(input)
# output.shape [1, 2, 3, 16, 16]
class AdaptiveAvgPool3d(layers.Layer):
""" """
def __init__(self, This operation applies 3D adaptive avg pooling on input tensor. The h and w dimensions
kernel_size, of the output tensor are determined by the parameter output_size.
stride,
padding=0, For avg adaptive pool3d:
ceil_mode=False,
count_include_pad=True, .. math::
divisor_override=None,
data_format="NCDHW", dstart &= floor(i * D_{in} / D_{out})
name=None):
super(AvgPool3d, self).__init__() dend &= ceil((i + 1) * D_{in} / D_{out})
self.ksize = kernel_size
self.stride = stride hstart &= floor(j * H_{in} / H_{out})
self.padding = padding
self.ceil_mode = ceil_mode hend &= ceil((j + 1) * H_{in} / H_{out})
self.count_include_pad = count_include_pad
self.divisor = divisor_override wstart &= floor(k * W_{in} / W_{out})
self.data_format = data_format
wend &= ceil((k + 1) * W_{in} / W_{out})
Output(i ,j, k) &= \\frac{sum(Input[dstart:dend, hstart:hend, wstart:wend])}{(dend - dstart) * (hend - hstart) * (wend - wstart)}
Parameters:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means
the size will be the same as that of the input.
data_format (str): The data format of the input and output data. An optional string
from: "NCDHW", "NDHWC". The default is "NCDHW". When it is "NCDHW", the data is stored in
the order of: [batch_size, input_channels, input_depth, input_height, input_width].
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Shape:
x (Tensor): The input tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type can be float32, float64.
output (Tensor): The output tensor of adaptive avg pool3d operator, which is a 5-D tensor. The data type is same as input x.
Returns:
A callable object of AdaptiveAvgPool3d.
Examples:
.. code-block:: python
# adaptive avg pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into l * m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive avg pool performs calculations as follow:
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
# dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l)
# hstart = floor(j * H / m)
# hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] =
# avg(input[:, :, dstart:dend, hstart: hend, wstart: wend])
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 8, 32, 32)
x = paddle.to_tensor(input_data)
# x.shape is [2, 3, 8, 32, 32]
adaptive_avg_pool = paddle.nn.AdaptiveAvgPool3d(output_size=3)
pool_out = adaptive_avg_pool(x = x)
# pool_out = [2, 3, 3, 3, 3]
"""
def __init__(self, output_size, data_format="NCDHW", name=None):
super(AdaptiveAvgPool3d, self).__init__()
self._output_size = output_size
self._data_format = data_format
self._name = name
def forward(self, x):
return F.adaptive_avg_pool3d(
x,
output_size=self._output_size,
data_format=self._data_format,
name=self._name)
class AdaptiveMaxPool1d(layers.Layer):
"""
This operation applies a 1D adaptive max pooling over an input signal composed
of several input planes, based on the input, output_size, return_indices parameters.
Input(X) and output(Out) are in NCL format, where N is batch
size, C is the number of channels, L is the length of the feature.
The output tensor shape will be [N, C, output_size].
For max adaptive pool1d:
.. math::
lstart &= floor(i * L_{in} / L_{out})
lend &= ceil((i + 1) * L_{in} / L_{out})
Output(i) &= max(Input[lstart:lend])}
Args:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain one int.
return_indices (bool): If true, the index of max pooling point will be returned along
with outputs. It cannot be set in average pooling type. Default False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Returns:
None.
Raises:
ValueError: 'pool_size' should be a integer or list or tuple with length as 1.
Shape:
x (Tensor): The input tensor of adaptive max pool1d operator, which is a 3-D tensor. The data type can be float32, float64.
output (Tensor): The output tensor of adaptive max pool1d operator, which is a 3-D tensor. The data type is same as input x.
Examples:
.. code-block:: python
# max adaptive pool1d
# suppose input data in shape of [N, C, L], `output_size` is m or [m],
# output shape is [N, C, m], adaptive pool divide L dimension
# of input data into m grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# lstart = floor(i * L / m)
# lend = ceil((i + 1) * L / m)
# output[:, :, i] = max(input[:, :, lstart: lend])
#
import paddle
import paddle.nn as nn
paddle.disable_static()
data = paddle.to_tensor(np.random.uniform(-1, 1, [1, 3, 32]).astype(np.float32))
AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16)
pool_out = AdaptiveMaxPool1d(data)
# pool_out shape: [1, 3, 16]
# for return_indices = true
AdaptiveMaxPool1d = nn.AdaptiveMaxPool1d(output_size=16, return_indices=True)
pool_out, indices = AdaptiveMaxPool1d(data)
# pool_out shape: [1, 3, 16], indices shape: [1, 3, 16]
"""
def __init__(self, output_size, return_indices=False, name=None):
super(AdaptiveMaxPool1d, self).__init__()
self.output_size = output_size
self.return_indices = return_indices
self.name = name self.name = name
def forward(self, input):
return F.adaptive_max_pool1d(input, self.output_size,
self.return_indices, self.name)
class AdaptiveMaxPool2d(layers.Layer):
"""
This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size.
For adaptive max pool2d:
.. math::
hstart &= floor(i * H_{in} / H_{out})
hend &= ceil((i + 1) * H_{in} / H_{out})
wstart &= floor(j * W_{in} / W_{out})
wend &= ceil((j + 1) * W_{in} / W_{out})
Output(i ,j) &= max(Input[hstart:hend, wstart:wend])
Parameters:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two element, (H, W). H and W can be either a int, or None which means the size will be the same as that of the input.
return_indices (bool): If true, the index of max pooling point will be returned along with outputs. It cannot be set in average pooling type. Default False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Shape:
x (Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type can be float32, float64.
output (Tensor): The output tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type is same as input x.
Returns:
A callable object of AdaptiveMaxPool2d.
Examples:
.. code-block:: python
# adaptive max pool2d
# suppose input data in shape of [N, C, H, W], `output_size` is [m, n],
# output shape is [N, C, m, n], adaptive pool divide H and W dimensions
# of input data into m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(m):
# for j in range(n):
# hstart = floor(i * H / m)
# hend = ceil((i + 1) * H / m)
# wstart = floor(i * W / n)
# wend = ceil((i + 1) * W / n)
# output[:, :, i, j] = max(input[:, :, hstart: hend, wstart: wend])
#
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 32, 32)
x = paddle.to_tensor(input_data)
adaptive_max_pool = paddle.nn.AdaptiveMaxPool2d(output_size=3, return_indices=True)
pool_out, indices = adaptive_max_pool(x = x)
"""
def __init__(self, output_size, return_indices=False, name=None):
super(AdaptiveMaxPool2d, self).__init__()
self._output_size = output_size
self._return_indices = return_indices
self._name = name
def forward(self, x): def forward(self, x):
return F.avg_pool3d( return F.adaptive_max_pool2d(
x, x,
kernel_size=self.ksize, output_size=self._output_size,
stride=self.stride, return_indices=self._return_indices,
padding=self.padding, name=self._name)
ceil_mode=self.ceil_mode,
count_include_pad=self.count_include_pad,
divisor_override=self.divisor, class AdaptiveMaxPool3d(layers.Layer):
data_format=self.data_format, """
name=self.name) This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions
of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size.
For adaptive max pool3d:
.. math::
dstart &= floor(i * D_{in} / D_{out})
dend &= ceil((i + 1) * D_{in} / D_{out})
hstart &= floor(j * H_{in} / H_{out})
hend &= ceil((j + 1) * H_{in} / H_{out})
wstart &= floor(k * W_{in} / W_{out})
wend &= ceil((k + 1) * W_{in} / W_{out})
Output(i ,j, k) &= max(Input[dstart:dend, hstart:hend, wstart:wend])
Parameters:
output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list,
it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means
the size will be the same as that of the input.
return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False.
name(str, optional): For detailed information, please refer
to :ref:`api_guide_Name`. Usually name is no need to set and
None by default.
Shape:
x (Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type can be float32, float64.
output (Tensor): The output tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type is same as input x.
Returns:
A callable object of AdaptiveMaxPool3d.
Examples:
.. code-block:: python
# adaptive max pool3d
# suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n],
# output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions
# of input data into l * m * n grids averagely and performs poolings in each
# grid to get output.
# adaptive max pool performs calculations as follow:
#
# for i in range(l):
# for j in range(m):
# for k in range(n):
# dstart = floor(i * D / l)
# dend = ceil((i + 1) * D / l)
# hstart = floor(j * H / m)
# hend = ceil((j + 1) * H / m)
# wstart = floor(k * W / n)
# wend = ceil((k + 1) * W / n)
# output[:, :, i, j, k] =
# max(input[:, :, dstart:dend, hstart: hend, wstart: wend])
import paddle
import numpy as np
paddle.disable_static()
input_data = np.random.rand(2, 3, 8, 32, 32)
x = paddle.to_tensor(input_data)
pool = paddle.nn.AdaptiveMaxPool3d(output_size=4)
out = pool(x)
# out shape: [2, 3, 4, 4, 4]
pool, indices = paddle.nn.AdaptiveMaxPool3d(output_size=3, return_indices=True)
out = pool(x)
# out shape: [2, 3, 4, 4, 4], indices shape: [2, 3, 4, 4, 4]
"""
def __init__(self, output_size, return_indices=False, name=None):
super(AdaptiveMaxPool3d, self).__init__()
self._output_size = output_size
self._return_indices = return_indices
self._name = name
def forward(self, x):
return F.adaptive_max_pool3d(
x,
output_size=self._output_size,
return_indices=self._return_indices,
name=self._name)
...@@ -26,9 +26,8 @@ __all__ = [ ...@@ -26,9 +26,8 @@ __all__ = [
] ]
from ..fluid.optimizer import SGD, Momentum, Adagrad, Dpsgd, DecayedAdagrad, \ from ..fluid.optimizer import Momentum, Adagrad, Dpsgd, DecayedAdagrad, Ftrl,\
Ftrl, Adadelta, \ AdagradOptimizer,DpsgdOptimizer,\
SGDOptimizer, MomentumOptimizer, AdagradOptimizer,DpsgdOptimizer,\
DecayedAdagradOptimizer,FtrlOptimizer,AdadeltaOptimizer, \ DecayedAdagradOptimizer,FtrlOptimizer,AdadeltaOptimizer, \
ModelAverage, LarsMomentum, DGCMomentumOptimizer, LambOptimizer,\ ModelAverage, LarsMomentum, DGCMomentumOptimizer, LambOptimizer,\
ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, \ ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, \
...@@ -39,6 +38,9 @@ from .adam import Adam ...@@ -39,6 +38,9 @@ from .adam import Adam
from .adamw import AdamW from .adamw import AdamW
from .adamax import Adamax from .adamax import Adamax
from .rmsprop import RMSProp from .rmsprop import RMSProp
from .adadelta import Adadelta
from .sgd import SGD
from .momentum import Momentum
from . import lr_scheduler from . import lr_scheduler
from .lr_scheduler import _LRScheduler, NoamLR, PiecewiseLR, NaturalExpLR, InverseTimeLR, PolynomialLR, \ from .lr_scheduler import _LRScheduler, NoamLR, PiecewiseLR, NaturalExpLR, InverseTimeLR, PolynomialLR, \
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .optimizer import Optimizer
from ..fluid import core
from ..fluid import framework
from ..fluid.framework import Variable, name_scope
__all__ = ["Adadelta"]
class Adadelta(Optimizer):
"""
**Notes: This API does not support sparse parameter optimization.**
Adadelta Optimizer. Please refer to this for details:
`ADADELTA: AN ADAPTIVE LEARNING RATE METHOD <https://arxiv.org/abs/1212.5701>`_.
The update is done as follows:
.. math::
E(g_t^2) &= \\rho * E(g_{t-1}^2) + (1-\\rho) * g^2
learning\_rate &= \sqrt{ ( E(dx_{t-1}^2) + \\epsilon ) / ( E(g_t^2) + \\epsilon ) }
E(dx_t^2) &= \\rho * E(dx_{t-1}^2) + (1-\\rho) * (-g*learning\_rate)^2
Args:
learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``.
It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001.
epsilon (float): a small float number for numeric stability. Default 1.0e-6.
rho (float): a floating point value indicating the decay rate. Default 0.95.
parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \
This parameter is required in dygraph mode. \
The default value is None in static mode, at this time all parameters will be updated.
weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
It canbe a float value as coeff of L2 regularization or \
:ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \
the regularization setting here in optimizer will be ignored for this parameter. \
Otherwise, the regularization setting here in optimizer will take effect. \
Default None, meaning there is no regularization.
grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
some derived class of ``GradientClipBase`` . There are three cliping strategies
( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` ,
:ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping.
name (str, optional): The default value is None. Normally there is no need for user
to set this property. For more information, please refer to
:ref:`api_guide_Name` .
Examples:
.. code-block:: python
import paddle
import numpy as np
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp)
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
adadelta = paddle.optimizer.Adadelta(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
back = out.backward()
adadelta.step()
adadelta.clear_grad()
"""
_avg_squared_grad_acc_str = "_avg_squared_grad"
_avg_squared_update_acc_str = "_avg_squared_update"
def __init__(self,
learning_rate=0.001,
epsilon=1.0e-6,
rho=0.95,
parameters=None,
weight_decay=None,
grad_clip=None,
name=None):
if learning_rate is None:
raise ValueError("learning_rate is not set.")
if epsilon is None:
raise ValueError("epsilon is not set.")
if rho is None:
raise ValueError("rho is not set.")
super(Adadelta, self).__init__(
learning_rate=learning_rate,
parameters=parameters,
weight_decay=weight_decay,
grad_clip=grad_clip,
name=name)
self.type = "adadelta"
self._epsilon = epsilon
self._rho = rho
def _create_accumulators(self, block, parameters):
if not isinstance(block, framework.Block):
raise TypeError("block is not instance of framework.Block.")
for p in parameters:
self._add_accumulator(self._avg_squared_grad_acc_str, p)
self._add_accumulator(self._avg_squared_update_acc_str, p)
def _append_optimize_op(self, block, param_and_grad):
if not isinstance(block, framework.Block):
raise TypeError("block is not instance of framework.Block.")
avg_squared_grad_acc = self._get_accumulator(
self._avg_squared_grad_acc_str, param_and_grad[0])
avg_squared_update_acc = self._get_accumulator(
self._avg_squared_update_acc_str, param_and_grad[0])
# Create the adadelta optimizer op
adadelta_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"AvgSquaredGrad": avg_squared_grad_acc,
"AvgSquaredUpdate": avg_squared_update_acc
},
outputs={
"ParamOut": param_and_grad[0],
"AvgSquaredGradOut": avg_squared_grad_acc,
"AvgSquaredUpdateOut": avg_squared_update_acc
},
attrs={"epsilon": self._epsilon,
"rho": self._rho},
stop_gradient=True)
return adadelta_op
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .optimizer import Optimizer
from ..fluid import core
from ..fluid import framework
from ..fluid.framework import Variable, name_scope
__all__ = ["Momentum"]
class Momentum(Optimizer):
"""
Simple Momentum optimizer with velocity state
This optimizer has a flag for Nestrov Momentum.
The update equations are as follows:
.. math::
& velocity = mu * velocity + gradient
& if (use\_nesterov):
&\quad param = param - (gradient + mu * velocity) * learning\_rate
& else:
&\quad param = param - learning\_rate * velocity
Parameters:
learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``.
It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001.
momentum (float): Momentum factor. The default value is 0.9.
parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \
This parameter is required in dygraph mode. \
The default value is None in static mode, at this time all parameters will be updated.
weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
It canbe a float value as coeff of L2 regularization or \
:ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \
the regularization setting here in optimizer will be ignored for this parameter. \
Otherwise, the regularization setting here in optimizer will take effect. \
Default None, meaning there is no regularization.
grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
some derived class of ``GradientClipBase`` . There are three cliping strategies
( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` ,
:ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping.
name (str, optional): The default value is None. Normally there is no need for user
to set this property. For more information, please refer to
:ref:`api_guide_Name` .
Examples:
.. code-block:: python
import paddle
import numpy as np
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp)
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
momentum = paddle.optimizer.Momentum(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
back = out.backward()
momentum.step()
momentum.clear_grad()
"""
_velocity_acc_str = "velocity"
def __init__(self,
learning_rate=0.001,
momentum=0.9,
parameters=None,
use_nesterov=False,
weight_decay=None,
grad_clip=None,
name=None):
if learning_rate is None:
raise ValueError("learning_rate is not set")
if momentum is None:
raise ValueError("momentum is not set")
super(Momentum, self).__init__(
learning_rate=learning_rate,
parameters=parameters,
weight_decay=weight_decay,
grad_clip=grad_clip,
name=name)
self.type = "momentum"
self._momentum = momentum
self._use_nesterov = bool(use_nesterov)
def _create_accumulators(self, block, parameters):
assert isinstance(block, framework.Block)
for p in parameters:
self._add_accumulator(self._velocity_acc_str, p)
def _append_optimize_op(self, block, param_and_grad):
assert isinstance(block, framework.Block)
velocity_acc = self._get_accumulator(self._velocity_acc_str,
param_and_grad[0])
lr = self._create_param_lr(param_and_grad)
if framework.in_dygraph_mode():
_, _ = core.ops.momentum(param_and_grad[0], param_and_grad[1],
velocity_acc, lr, param_and_grad[0],
velocity_acc, 'mu', self._momentum,
'use_nesterov', self._use_nesterov)
return None
attrs = {"mu": self._momentum, "use_nesterov": self._use_nesterov}
inputs = {
"Param": [param_and_grad[0]],
"Grad": [param_and_grad[1]],
"Velocity": [velocity_acc],
"LearningRate": [lr]
}
outputs = {
"ParamOut": [param_and_grad[0]],
"VelocityOut": [velocity_acc]
}
# create the momentum optimize op
momentum_op = block.append_op(
type=self.type,
inputs=inputs,
outputs=outputs,
attrs=attrs,
stop_gradient=True)
return momentum_op
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .optimizer import Optimizer
from ..fluid import core
from ..fluid import framework
from ..fluid.framework import Variable, name_scope
from ..fluid.dygraph import no_grad
__all__ = ["SGD"]
class SGD(Optimizer):
"""
Optimizer of the stochastic gradient descent algorithm.
.. math::
param\_out = param - learning\_rate * grad
Parameters:
learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``.
It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001.
parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \
This parameter is required in dygraph mode. \
The default value is None in static mode, at this time all parameters will be updated.
weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
It canbe a float value as coeff of L2 regularization or \
:ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \
the regularization setting here in optimizer will be ignored for this parameter. \
Otherwise, the regularization setting here in optimizer will take effect. \
Default None, meaning there is no regularization.
grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
some derived class of ``GradientClipBase`` . There are three cliping strategies
( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` ,
:ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping.
name (str, optional): The default value is None. Normally there is no need for user
to set this property. For more information, please refer to
:ref:`api_guide_Name` .
Examples:
.. code-block:: python
import paddle
import numpy as np
paddle.disable_static()
inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32")
linear = paddle.nn.Linear(10, 10)
inp = paddle.to_tensor(inp)
out = linear(inp)
loss = paddle.mean(out)
beta1 = paddle.to_tensor([0.9], dtype="float32")
beta2 = paddle.to_tensor([0.99], dtype="float32")
sgd = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
back = out.backward()
sgd.step()
sgd.clear_grad()
"""
def __init__(self,
learning_rate=0.001,
parameters=None,
weight_decay=None,
grad_clip=None,
name=None):
if learning_rate is None:
raise ValueError("learning_rate is not set")
super(SGD, self).__init__(
learning_rate=learning_rate,
parameters=parameters,
weight_decay=weight_decay,
grad_clip=grad_clip,
name=name)
self.type = "sgd"
@no_grad()
def _append_optimize_op(self, block, param_and_grad):
lr = self._create_param_lr(param_and_grad)
if framework.in_dygraph_mode():
core.ops.sgd(param_and_grad[0], lr, param_and_grad[1],
param_and_grad[0])
return None
assert isinstance(block, framework.Block)
# create the optimize op
sgd_op = block.append_op(
type=self.type,
inputs={
"Param": param_and_grad[0],
"Grad": param_and_grad[1],
"LearningRate": lr
},
outputs={"ParamOut": param_and_grad[0]},
stop_gradient=True)
return sgd_op
...@@ -1618,6 +1618,10 @@ def clip(x, min=None, max=None, name=None): ...@@ -1618,6 +1618,10 @@ def clip(x, min=None, max=None, name=None):
fmax = float(np.finfo(np_dtype).max) fmax = float(np.finfo(np_dtype).max)
if in_dygraph_mode(): if in_dygraph_mode():
if isinstance(min, Variable):
min = min.numpy().item(0)
if isinstance(max, Variable):
max = max.numpy().item(0)
min = fmin if min is None else min min = fmin if min is None else min
max = fmax if max is None else max max = fmax if max is None else max
return core.ops.clip(x, "min", min, "max", max) return core.ops.clip(x, "min", min, "max", max)
......
...@@ -94,7 +94,7 @@ def bernoulli(x, name=None): ...@@ -94,7 +94,7 @@ def bernoulli(x, name=None):
return out return out
def gaussian_random(shape, mean=0.0, std=1.0, dtype='float32', name=None): def gaussian_random(shape, mean=0.0, std=1.0, dtype=None, name=None):
""" """
This OP returns a Tensor filled with random values sampled from a Gaussian This OP returns a Tensor filled with random values sampled from a Gaussian
distribution, with ``shape`` and ``dtype``. distribution, with ``shape`` and ``dtype``.
...@@ -109,9 +109,10 @@ def gaussian_random(shape, mean=0.0, std=1.0, dtype='float32', name=None): ...@@ -109,9 +109,10 @@ def gaussian_random(shape, mean=0.0, std=1.0, dtype='float32', name=None):
std(float|int, optional): Standard deviation of the output tensor, default std(float|int, optional): Standard deviation of the output tensor, default
is 1.0. is 1.0.
seed(int, optional): ${seed_comment} seed(int, optional): ${seed_comment}
dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of dtype(str|np.dtype, optional): The data type of the output Tensor.
the output Tensor. Supported data types: float32, float64. Supported data types: float32, float64.
Default is float32. Default is None, use global default dtype (see ``get_default_dtype``
for details).
name(str, optional): The default value is None. Normally there is no name(str, optional): The default value is None. Normally there is no
need for user to set this property. For more information, please need for user to set this property. For more information, please
refer to :ref:`api_guide_Name`. refer to :ref:`api_guide_Name`.
...@@ -120,6 +121,13 @@ def gaussian_random(shape, mean=0.0, std=1.0, dtype='float32', name=None): ...@@ -120,6 +121,13 @@ def gaussian_random(shape, mean=0.0, std=1.0, dtype='float32', name=None):
Tensor: A Tensor filled with random values sampled from a Gaussian Tensor: A Tensor filled with random values sampled from a Gaussian
distribution, with ``shape`` and ``dtype``. distribution, with ``shape`` and ``dtype``.
""" """
if dtype is None:
dtype = paddle.framework.get_default_dtype()
if dtype not in ['float32', 'float64']:
raise TypeError(
"gaussian_random only supports [float32, float64], but the default dtype is %s"
% dtype)
if not isinstance(dtype, core.VarDesc.VarType): if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype) dtype = convert_np_dtype_to_dtype_(dtype)
seed = 0 seed = 0
...@@ -169,9 +177,10 @@ def standard_normal(shape, dtype=None, name=None): ...@@ -169,9 +177,10 @@ def standard_normal(shape, dtype=None, name=None):
(with the shape [1], and the data type int32 or int64). If ``shape`` (with the shape [1], and the data type int32 or int64). If ``shape``
is a Tensor, it should be a 1-D Tensor(with the data type int32 or is a Tensor, it should be a 1-D Tensor(with the data type int32 or
int64). int64).
dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of the dtype(str|np.dtype, optional): The data type of the output Tensor.
output tensor. Supported data types: float32, float64. If ``dytpe`` Supported data types: float32, float64.
is None, the data type is float32. Default is None. Default is None, use global default dtype (see ``get_default_dtype``
for details).
name (str, optional): Name for the operation (optional, default is None). name (str, optional): Name for the operation (optional, default is None).
For more information, please refer to :ref:`api_guide_Name`. For more information, please refer to :ref:`api_guide_Name`.
...@@ -216,7 +225,11 @@ def standard_normal(shape, dtype=None, name=None): ...@@ -216,7 +225,11 @@ def standard_normal(shape, dtype=None, name=None):
""" """
if dtype is None: if dtype is None:
dtype = 'float32' dtype = paddle.framework.get_default_dtype()
if dtype not in ['float32', 'float64']:
raise TypeError(
"standard_normal only supports [float32, float64], but the default dtype is %s"
% dtype)
return gaussian_random( return gaussian_random(
shape=shape, mean=0.0, std=1.0, dtype=dtype, name=name) shape=shape, mean=0.0, std=1.0, dtype=dtype, name=name)
...@@ -325,7 +338,7 @@ def normal(mean=0.0, std=1.0, shape=None, name=None): ...@@ -325,7 +338,7 @@ def normal(mean=0.0, std=1.0, shape=None, name=None):
return out return out
def uniform(shape, dtype='float32', min=-1.0, max=1.0, seed=0, name=None): def uniform(shape, dtype=None, min=-1.0, max=1.0, seed=0, name=None):
""" """
This OP returns a Tensor filled with random values sampled from a uniform This OP returns a Tensor filled with random values sampled from a uniform
distribution in the range [``min``, ``max``), with ``shape`` and ``dtype``. distribution in the range [``min``, ``max``), with ``shape`` and ``dtype``.
...@@ -343,9 +356,10 @@ def uniform(shape, dtype='float32', min=-1.0, max=1.0, seed=0, name=None): ...@@ -343,9 +356,10 @@ def uniform(shape, dtype='float32', min=-1.0, max=1.0, seed=0, name=None):
(with the shape [1], and the data type int32 or int64). If ``shape`` (with the shape [1], and the data type int32 or int64). If ``shape``
is a Tensor, it should be a 1-D Tensor(with the data type int32 or is a Tensor, it should be a 1-D Tensor(with the data type int32 or
int64). int64).
dtype(str|np.dtype, optional): The data type of dtype(str|np.dtype, optional): The data type of the output Tensor.
the output Tensor. Supported data types: float32, float64. Supported data types: float32, float64.
Default is float32. Default is None, use global default dtype (see ``get_default_dtype``
for details).
min(float|int, optional): The lower bound on the range of random values min(float|int, optional): The lower bound on the range of random values
to generate, ``min`` is included in the range. Default is -1.0. to generate, ``min`` is included in the range. Default is -1.0.
max(float|int, optional): The upper bound on the range of random values max(float|int, optional): The upper bound on the range of random values
...@@ -401,6 +415,13 @@ def uniform(shape, dtype='float32', min=-1.0, max=1.0, seed=0, name=None): ...@@ -401,6 +415,13 @@ def uniform(shape, dtype='float32', min=-1.0, max=1.0, seed=0, name=None):
""" """
if dtype is None:
dtype = paddle.framework.get_default_dtype()
if dtype not in ['float32', 'float64']:
raise TypeError(
"uniform only supports [float32, float64], but the default dtype is %s"
% dtype)
if not isinstance(dtype, core.VarDesc.VarType): if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype) dtype = convert_np_dtype_to_dtype_(dtype)
...@@ -447,7 +468,7 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None): ...@@ -447,7 +468,7 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None):
(with the shape [1], and the data type int32 or int64). If ``shape`` (with the shape [1], and the data type int32 or int64). If ``shape``
is a Tensor, it should be a 1-D Tensor(with the data type int32 or is a Tensor, it should be a 1-D Tensor(with the data type int32 or
int64). Default is [1]. int64). Default is [1].
dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of the dtype(str|np.dtype, optional): The data type of the
output tensor. Supported data types: int32, int64. If ``dytpe`` output tensor. Supported data types: int32, int64. If ``dytpe``
is None, the data type is int64. Default is None. is None, the data type is int64. Default is None.
name(str, optional): The default value is None. Normally there is no name(str, optional): The default value is None. Normally there is no
...@@ -550,7 +571,7 @@ def randperm(n, dtype="int64", name=None): ...@@ -550,7 +571,7 @@ def randperm(n, dtype="int64", name=None):
Args: Args:
n(int): The upper bound (exclusive), and it should be greater than 0. n(int): The upper bound (exclusive), and it should be greater than 0.
dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of dtype(str|np.dtype, optional): The data type of
the output Tensor. Supported data types: int32, int64, float32, the output Tensor. Supported data types: int32, int64, float32,
float64. Default is int64. float64. Default is int64.
name(str, optional): The default value is None. Normally there is no name(str, optional): The default value is None. Normally there is no
...@@ -622,9 +643,10 @@ def rand(shape, dtype=None, name=None): ...@@ -622,9 +643,10 @@ def rand(shape, dtype=None, name=None):
(with the shape [1], and the data type int32 or int64). If ``shape`` (with the shape [1], and the data type int32 or int64). If ``shape``
is a Tensor, it should be a 1-D Tensor(with the data type int32 or is a Tensor, it should be a 1-D Tensor(with the data type int32 or
int64). int64).
dtype(str|np.dtype|core.VarDesc.VarType, optional): The data type of the dtype(str|np.dtype, optional): The data type of the output Tensor.
output tensor. Supported data types: float32, float64. If ``dytpe`` Supported data types: float32, float64.
is None, the data type is float32. Default is None. Default is None, use global default dtype (see ``get_default_dtype``
for details).
name(str, optional): The default value is None. Normally there is no name(str, optional): The default value is None. Normally there is no
need for user to set this property. For more information, please need for user to set this property. For more information, please
refer to :ref:`api_guide_Name`. refer to :ref:`api_guide_Name`.
...@@ -668,7 +690,11 @@ def rand(shape, dtype=None, name=None): ...@@ -668,7 +690,11 @@ def rand(shape, dtype=None, name=None):
""" """
if dtype is None: if dtype is None:
dtype = 'float32' dtype = paddle.framework.get_default_dtype()
if dtype not in ['float32', 'float64']:
raise TypeError(
"rand only supports [float32, float64], but the default dtype is %s"
% dtype)
out = uniform(shape, dtype, min=0.0, max=1.0, name=name) out = uniform(shape, dtype, min=0.0, max=1.0, name=name)
out.stop_gradient = True out.stop_gradient = True
......
...@@ -55,7 +55,7 @@ def get_os_info(): ...@@ -55,7 +55,7 @@ def get_os_info():
else: else:
plat = None plat = None
ver = None ver = None
envs['os_info'] = "{} {}".format(plat, ver) envs['os_info'] = "{0} {1}".format(plat, ver)
def get_python_info(): def get_python_info():
...@@ -93,7 +93,7 @@ def get_cudnn_info(): ...@@ -93,7 +93,7 @@ def get_cudnn_info():
if cudnn_dll_path: if cudnn_dll_path:
cudnn_header_path = cudnn_dll_path.split('bin')[ cudnn_header_path = cudnn_dll_path.split('bin')[
0] + 'include\cudnn.h' 0] + 'include\cudnn.h'
cmd = 'type "{}" | findstr "{}" | findstr /v "CUDNN_VERSION"' cmd = 'type "{0}" | findstr "{1}" | findstr /v "CUDNN_VERSION"'
else: else:
envs['cudnn_version'] = None envs['cudnn_version'] = None
return return
...@@ -102,7 +102,7 @@ def get_cudnn_info(): ...@@ -102,7 +102,7 @@ def get_cudnn_info():
'whereis "cudnn.h" | awk \'{print $2}\'') 'whereis "cudnn.h" | awk \'{print $2}\'')
if cudnn_header_path: if cudnn_header_path:
cudnn_header_path = cudnn_header_path.strip() cudnn_header_path = cudnn_header_path.strip()
cmd = 'cat "{}" | grep "{}" | grep -v "CUDNN_VERSION"' cmd = 'cat "{0}" | grep "{1}" | grep -v "CUDNN_VERSION"'
else: else:
envs['cudnn_version'] = None envs['cudnn_version'] = None
return return
...@@ -112,7 +112,7 @@ def get_cudnn_info(): ...@@ -112,7 +112,7 @@ def get_cudnn_info():
patch_level = _get_cudnn_ver( patch_level = _get_cudnn_ver(
cmd.format(cudnn_header_path, 'CUDNN_PATCHLEVEL')) cmd.format(cudnn_header_path, 'CUDNN_PATCHLEVEL'))
envs['cudnn_version'] = "{}.{}.{}".format(major, minor, patch_level) envs['cudnn_version'] = "{0}.{1}.{2}".format(major, minor, patch_level)
def get_driver_info(): def get_driver_info():
...@@ -132,7 +132,7 @@ def main(): ...@@ -132,7 +132,7 @@ def main():
get_cuda_info() get_cuda_info()
get_cudnn_info() get_cudnn_info()
get_driver_info() get_driver_info()
print(envs_template.format(**envs)) print('*' * 40 + envs_template.format(**envs) + '*' * 40)
if __name__ == '__main__': if __name__ == '__main__':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册