diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 9d5c0cc7048f7db539c090d28c6184ac6d72d75a..bb5e2e1369a8478b500572106f9d11dff12e0189 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -272,7 +272,7 @@ cc_test(op_compatible_info_test SRCS op_compatible_info_test.cc DEPS op_compatib cc_library(save_load_util SRCS save_load_util DEPS tensor scope layer) cc_test(save_load_util_test SRCS save_load_util_test.cc DEPS save_load_util tensor scope layer) -cc_library(generator SRCS generator.cc) +cc_library(generator SRCS generator.cc DEPS enforce place) # Get the current working branch execute_process( diff --git a/paddle/fluid/framework/data_layout_transform.cc b/paddle/fluid/framework/data_layout_transform.cc index 3cea7a66d01051824a1de01d62c237636771804b..f757e244e38ec965d62d673e63ed082ca70c63c7 100644 --- a/paddle/fluid/framework/data_layout_transform.cc +++ b/paddle/fluid/framework/data_layout_transform.cc @@ -116,6 +116,8 @@ void* GetDataFromTensor(const Tensor& tensor, mkldnn::memory::data_type type) { return platform::to_void_cast(tensor.data()); case mkldnn::memory::data_type::s32: return platform::to_void_cast(tensor.data()); + case mkldnn::memory::data_type::bf16: + return platform::to_void_cast(tensor.data()); default: PADDLE_THROW( platform::errors::InvalidArgument("Wrong mkldnn type provided.")); diff --git a/paddle/fluid/framework/data_layout_transform.h b/paddle/fluid/framework/data_layout_transform.h index 6eb84ef9d7c01b589cc95a78ea9727a81f6dc36e..b92c47c2eb018603e1b3156921fb2c1702864c57 100644 --- a/paddle/fluid/framework/data_layout_transform.h +++ b/paddle/fluid/framework/data_layout_transform.h @@ -61,7 +61,8 @@ inline MKLDNNDataType ToMKLDNNDataType(proto::VarType::Type type) { {DataTypeTrait::DataType(), MKLDNNDataType::f32}, {DataTypeTrait::DataType(), MKLDNNDataType::s8}, {DataTypeTrait::DataType(), MKLDNNDataType::u8}, - {DataTypeTrait::DataType(), MKLDNNDataType::s32}}; + {DataTypeTrait::DataType(), MKLDNNDataType::s32}, + {DataTypeTrait::DataType(), MKLDNNDataType::bf16}}; auto iter = dict.find(static_cast(type)); if (iter != dict.end()) return iter->second; return MKLDNNDataType::undef; @@ -74,6 +75,9 @@ void innerTransDataLayoutFromMKLDNN(DataLayout in_layout, DataLayout out_layout, void TransDataLayoutFromMKLDNN(const OpKernelType& kernel_type_for_var, const OpKernelType& expected_kernel_type, const Tensor& in, Tensor* out); + +void* GetDataFromTensor(const Tensor& tensor, MKLDNNDataType type); + #endif std::vector GetAxis(const DataLayout& from, const DataLayout& to); diff --git a/paddle/fluid/framework/data_layout_transform_test.cc b/paddle/fluid/framework/data_layout_transform_test.cc index a0d08826b854fea9256382f0e065fd59dda8c8b3..8dfad23db65178c46140b887811846e413bebd00 100644 --- a/paddle/fluid/framework/data_layout_transform_test.cc +++ b/paddle/fluid/framework/data_layout_transform_test.cc @@ -43,3 +43,17 @@ TEST(DataTransform, DataLayoutFunction) { EXPECT_TRUE(in.layout() == paddle::framework::DataLayout::kNHWC); EXPECT_TRUE(in.dims() == paddle::framework::make_ddim({2, 3, 1, 2})); } + +#ifdef PADDLE_WITH_MKLDNN +TEST(DataTransform, GetDataFromTensorDNNL) { + auto place = paddle::platform::CPUPlace(); + paddle::framework::Tensor in = paddle::framework::Tensor(); + in.mutable_data( + paddle::framework::make_ddim({2, 3, 1, 2}), place); + + void* in_data = + paddle::framework::GetDataFromTensor(in, dnnl::memory::data_type::bf16); + EXPECT_EQ(in_data, paddle::platform::to_void_cast( + in.data())); +} +#endif diff --git a/paddle/fluid/framework/data_set.cc b/paddle/fluid/framework/data_set.cc index df58193f95e2fc2f1ff7e4b7af76dd1f7c9837ef..94934629e28726d15348c5c692eaf31f7598110c 100644 --- a/paddle/fluid/framework/data_set.cc +++ b/paddle/fluid/framework/data_set.cc @@ -95,9 +95,10 @@ void DatasetImpl::SetHdfsConfig(const std::string& fs_name, const std::string& fs_ugi) { fs_name_ = fs_name; fs_ugi_ = fs_ugi; - std::string cmd = std::string("hadoop fs"); + std::string cmd = std::string("$HADOOP_HOME/bin/hadoop fs"); cmd += " -D fs.default.name=" + fs_name; cmd += " -D hadoop.job.ugi=" + fs_ugi; + cmd += " -Ddfs.client.block.write.retries=15 -Ddfs.rpc.timeout=500000"; paddle::framework::hdfs_set_command(cmd); } diff --git a/paddle/fluid/framework/data_type.cc b/paddle/fluid/framework/data_type.cc index f479d92483c1c39a0b43e0d8c514237bf89bcc00..8188d5cde1b90436d040e8b9dcc1070ac85bf319 100644 --- a/paddle/fluid/framework/data_type.cc +++ b/paddle/fluid/framework/data_type.cc @@ -18,6 +18,7 @@ #include using float16 = paddle::platform::float16; +using bfloat16 = paddle::platform::bfloat16; namespace paddle { namespace framework { diff --git a/paddle/fluid/framework/data_type.h b/paddle/fluid/framework/data_type.h index 2c4a7b4d02727437742b19cc6d51e209e4346d03..720e422e114835f367317d4ba265254856885c15 100644 --- a/paddle/fluid/framework/data_type.h +++ b/paddle/fluid/framework/data_type.h @@ -17,6 +17,8 @@ limitations under the License. */ #include #include "paddle/fluid/framework/framework.pb.h" #include "paddle/fluid/platform/enforce.h" + +#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/float16.h" namespace paddle { @@ -36,15 +38,16 @@ struct DataTypeTrait { #define _ForEachDataTypeHelper_(callback, cpp_type, proto_type) \ callback(cpp_type, ::paddle::framework::proto::VarType::proto_type); -#define _ForEachDataType_(callback) \ - _ForEachDataTypeHelper_(callback, float, FP32); \ - _ForEachDataTypeHelper_(callback, ::paddle::platform::float16, FP16); \ - _ForEachDataTypeHelper_(callback, double, FP64); \ - _ForEachDataTypeHelper_(callback, int, INT32); \ - _ForEachDataTypeHelper_(callback, int64_t, INT64); \ - _ForEachDataTypeHelper_(callback, bool, BOOL); \ - _ForEachDataTypeHelper_(callback, uint8_t, UINT8); \ - _ForEachDataTypeHelper_(callback, int16_t, INT16); \ +#define _ForEachDataType_(callback) \ + _ForEachDataTypeHelper_(callback, float, FP32); \ + _ForEachDataTypeHelper_(callback, ::paddle::platform::float16, FP16); \ + _ForEachDataTypeHelper_(callback, ::paddle::platform::bfloat16, BF16); \ + _ForEachDataTypeHelper_(callback, double, FP64); \ + _ForEachDataTypeHelper_(callback, int, INT32); \ + _ForEachDataTypeHelper_(callback, int64_t, INT64); \ + _ForEachDataTypeHelper_(callback, bool, BOOL); \ + _ForEachDataTypeHelper_(callback, uint8_t, UINT8); \ + _ForEachDataTypeHelper_(callback, int16_t, INT16); \ _ForEachDataTypeHelper_(callback, int8_t, INT8) #define _ForEachDataTypeSmall_(callback) \ diff --git a/paddle/fluid/framework/data_type_test.cc b/paddle/fluid/framework/data_type_test.cc index 2a380201f297f42dd82a6809bef9a72660066819..331596da33acc151810cd616ea6d5bdcae333b30 100644 --- a/paddle/fluid/framework/data_type_test.cc +++ b/paddle/fluid/framework/data_type_test.cc @@ -38,3 +38,25 @@ TEST(DataType, float16) { std::string type = "::paddle::platform::float16"; EXPECT_STREQ(f::DataTypeToString(dtype).c_str(), type.c_str()); } + +TEST(DataType, bfloat16) { + using paddle::framework::Tensor; + using paddle::platform::CPUPlace; + using paddle::platform::bfloat16; + namespace f = paddle::framework; + f::proto::VarType::Type dtype = f::proto::VarType::BF16; + + Tensor tensor; + CPUPlace cpu; + tensor.mutable_data(cpu, dtype); + + // test bf16 tensor + EXPECT_EQ(tensor.type(), f::ToDataType(typeid(bfloat16))); + + // test bf16 size + EXPECT_EQ(f::SizeOfType(dtype), 2u); + + // test debug info + std::string type = "::paddle::platform::bfloat16"; + EXPECT_STREQ(f::DataTypeToString(dtype).c_str(), type.c_str()); +} diff --git a/paddle/fluid/framework/data_type_transform.cc b/paddle/fluid/framework/data_type_transform.cc index 44542f05d9d5c92f58a84dc2be59782bae2ff3aa..3d56152c237695126d2eecb0c51ebd964a85a690 100644 --- a/paddle/fluid/framework/data_type_transform.cc +++ b/paddle/fluid/framework/data_type_transform.cc @@ -77,6 +77,10 @@ void TransDataType(const OpKernelType& kernel_type_for_var, framework::VisitDataType(dst_type, CastDataType(in, out, ctx)); break; + case proto::VarType::BF16: + framework::VisitDataType(dst_type, + CastDataType(in, out, ctx)); + break; case proto::VarType::FP32: framework::VisitDataType(dst_type, CastDataType(in, out, ctx)); break; diff --git a/paddle/fluid/framework/data_type_transform_test.cc b/paddle/fluid/framework/data_type_transform_test.cc index bbebea9f13fd37469a0e9b7be9719aca128f5687..ea7a665bcbe02ff382f1b3bf04ce177a674483c9 100644 --- a/paddle/fluid/framework/data_type_transform_test.cc +++ b/paddle/fluid/framework/data_type_transform_test.cc @@ -24,6 +24,11 @@ TEST(DataTypeTransform, CPUTransform) { paddle::framework::DataLayout::kAnyLayout, paddle::framework::LibraryType::kPlain); + auto kernel_bf16 = paddle::framework::OpKernelType( + paddle::framework::proto::VarType::BF16, place, + paddle::framework::DataLayout::kAnyLayout, + paddle::framework::LibraryType::kPlain); + auto kernel_fp32 = paddle::framework::OpKernelType( paddle::framework::proto::VarType::FP32, place, paddle::framework::DataLayout::kAnyLayout, @@ -189,4 +194,120 @@ TEST(DataTypeTransform, CPUTransform) { static_cast(in_data_bool[i]).x); } } + + // data type transform from/to bfloat16 + { + paddle::framework::Tensor in; + paddle::framework::Tensor out; + + paddle::platform::bfloat16* ptr = + in.mutable_data( + paddle::framework::make_ddim({2, 3}), place); + int data_number = 2 * 3; + + for (int i = 0; i < data_number; ++i) { + ptr[i] = i; + } + + // transform from bfloat16 to other data types + paddle::framework::TransDataType(kernel_bf16, kernel_fp32, in, &out); + float* out_data_float = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(out_data_float[i], static_cast(ptr[i])); + } + + paddle::framework::TransDataType(kernel_bf16, kernel_fp64, in, &out); + double* out_data_double = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(out_data_double[i], static_cast(ptr[i])); + } + + paddle::framework::TransDataType(kernel_bf16, kernel_int32, in, &out); + int* out_data_int = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(out_data_int[i], static_cast(ptr[i])); + } + + paddle::framework::TransDataType(kernel_bf16, kernel_int64, in, &out); + int64_t* out_data_int64 = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(out_data_int64[i], static_cast(ptr[i])); + } + + paddle::framework::TransDataType(kernel_bf16, kernel_bool, in, &out); + bool* out_data_bool = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(out_data_bool[i], static_cast(ptr[i])); + } + + // transform float to bfloat16 + float* in_data_float = + in.mutable_data(paddle::framework::make_ddim({2, 3}), place); + for (int i = 0; i < data_number; ++i) { + in_data_float[i] = i; + } + + paddle::framework::TransDataType(kernel_fp32, kernel_bf16, in, &out); + ptr = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(ptr[i].x, + static_cast(in_data_float[i]).x); + } + + // transform double to bfloat16 + double* in_data_double = + in.mutable_data(paddle::framework::make_ddim({2, 3}), place); + for (int i = 0; i < data_number; ++i) { + in_data_double[i] = i; + } + + paddle::framework::TransDataType(kernel_fp64, kernel_bf16, in, &out); + ptr = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(ptr[i].x, + static_cast(in_data_double[i]).x); + } + + // transform int to bfloat16 + int* in_data_int = + in.mutable_data(paddle::framework::make_ddim({2, 3}), place); + for (int i = 0; i < data_number; ++i) { + in_data_int[i] = i; + } + + paddle::framework::TransDataType(kernel_int32, kernel_bf16, in, &out); + ptr = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(ptr[i].x, + static_cast(in_data_int[i]).x); + } + + // transform int64 to bfloat16 + int64_t* in_data_int64 = + in.mutable_data(paddle::framework::make_ddim({2, 3}), place); + for (int i = 0; i < data_number; ++i) { + in_data_int64[i] = i; + } + + paddle::framework::TransDataType(kernel_int64, kernel_bf16, in, &out); + ptr = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(ptr[i].x, + static_cast(in_data_int64[i]).x); + } + + // transform bool to bfloat16 + bool* in_data_bool = + in.mutable_data(paddle::framework::make_ddim({2, 3}), place); + for (int i = 0; i < data_number; ++i) { + in_data_bool[i] = i; + } + + paddle::framework::TransDataType(kernel_bool, kernel_bf16, in, &out); + ptr = out.data(); + for (int i = 0; i < data_number; ++i) { + EXPECT_EQ(ptr[i].x, + static_cast(in_data_bool[i]).x); + } + } } diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt index 4d8bd101258664f6cafd71784ae070e0cb8b9215..a3cc4d1721e20a72817606bd773129230a8154ce 100644 --- a/paddle/fluid/framework/details/CMakeLists.txt +++ b/paddle/fluid/framework/details/CMakeLists.txt @@ -3,6 +3,7 @@ cc_library(op_handle_base SRCS op_handle_base.cc DEPS var_handle device_context cc_library(scale_loss_grad_op_handle SRCS scale_loss_grad_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory) cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory) +cc_library(fetch_async_op_handle SRCS fetch_async_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory) cc_library(share_tensor_buffer_functor SRCS share_tensor_buffer_functor.cc DEPS framework_proto scope place operator op_registry) cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry) @@ -98,7 +99,7 @@ cc_library(scope_buffered_ssa_graph_executor SRCS scope_buffered_ssa_graph_execu #cc_test(reduce_op_handle_test SRCS reduce_op_handle_test.cc DEPS var_handle op_handle_base scope ddim memory # device_context reduce_op_handle ) cc_library(fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executor.cc - DEPS fetch_op_handle ssa_graph_executor scope simple_threadpool device_context) + DEPS fetch_async_op_handle ssa_graph_executor scope simple_threadpool device_context) cc_test(fused_broadcast_op_test SRCS fused_broadcast_op_handle_test.cc DEPS fused_broadcast_op_handle) cc_test(exception_holder_test SRCS exception_holder_test.cc ) diff --git a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc index f5ec78f44b5ebb780cc569c24ccdca6336195961..e440dff2af6b5649d34f47c3b696edeb8a1ba0a2 100644 --- a/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.cc @@ -18,7 +18,8 @@ #include #include #include -#include "paddle/fluid/framework/details/fetch_op_handle.h" +#include "paddle/fluid/framework/details/computation_op_handle.h" +#include "paddle/fluid/framework/details/fetch_async_op_handle.h" #include "paddle/fluid/framework/details/multi_devices_helper.h" #include "paddle/fluid/framework/ir/graph_helper.h" #include "paddle/fluid/platform/profiler.h" @@ -120,6 +121,11 @@ FetchResultType FastThreadedSSAGraphExecutor::Run( } // Wait FetchOps. ClearFetchOp(graph_, &fetch_ops); + + for (auto &place : places_) { + fetch_ctxs_.Get(place)->Wait(); + } + return fetches; } @@ -162,8 +168,8 @@ void FastThreadedSSAGraphExecutor::InsertFetchOps( ir::Node *fetch_node = graph_->CreateEmptyNode("fetch", ir::Node::Type::kOperation); - auto *op = new FetchOpHandle(fetch_node, fetches, i, &local_scopes_, - &local_exec_scopes_, return_merged); + auto *op = new FetchAsyncOpHandle(fetch_node, fetches, i, &local_scopes_, + &local_exec_scopes_, return_merged); fetch_ops->emplace_back(op); for (auto &p : places_) { @@ -174,6 +180,14 @@ void FastThreadedSSAGraphExecutor::InsertFetchOps( op->AddInput(var); } + for (auto *var : vars) { + auto *op = var->GeneratedOp(); + auto *compute_op = dynamic_cast(op); + if (compute_op) { + compute_op->SetLockAndRecordEventFree(false); + } + } + int dep = static_cast(op->NotReadyInputSize()); (*op_deps)[op] = dep; if (dep == 0) { @@ -261,7 +275,7 @@ void FastThreadedSSAGraphExecutor::PrepareAtomicOpDeps() { const ir::Graph &FastThreadedSSAGraphExecutor::Graph() const { return *graph_; } void FastThreadedSSAGraphExecutor::RecordOps(OpHandleBase *op) { - if (strategy_.num_threads_ == 1 && !dynamic_cast(op)) { + if (strategy_.num_threads_ == 1 && !dynamic_cast(op)) { traced_ops_.emplace_back(op); } } diff --git a/paddle/fluid/framework/details/fetch_async_op_handle.cc b/paddle/fluid/framework/details/fetch_async_op_handle.cc new file mode 100644 index 0000000000000000000000000000000000000000..6aae523365ed50e78a78b318ac0990490c801eb3 --- /dev/null +++ b/paddle/fluid/framework/details/fetch_async_op_handle.cc @@ -0,0 +1,275 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/framework/details/fetch_async_op_handle.h" +#include +#include +#include +#include "paddle/fluid/platform/profiler.h" + +namespace paddle { +namespace framework { +namespace details { + +FetchAsyncOpHandle::FetchAsyncOpHandle(ir::Node *node, FetchResultType *data, + size_t offset, + std::vector *local_scopes, + std::vector *local_exec_scopes, + bool return_merged) + : OpHandleBase(node), + data_(data), + offset_(offset), + local_scopes_(local_scopes), + local_exec_scopes_(local_exec_scopes), + return_merged_(return_merged) {} + +FetchAsyncOpHandle::~FetchAsyncOpHandle() {} + +void FetchAsyncOpHandle::RecordWaitEventOnCtx( + platform::DeviceContext *waited_ctx) { + PADDLE_THROW(platform::errors::PermissionDenied( + "No nodes need to wait FetchAsyncOp. Unexpceted Error.")); +} + +static void CheckTensorAttrs(const LoDTensor *tensor, + const proto::VarType::Type &type, + const DataLayout &layout, const DDim &dims, + const LoD &lod, const size_t offset) { + if (tensor->numel() && tensor->IsInitialized()) { + // step1: check type + PADDLE_ENFORCE_EQ( + type, tensor->type(), + platform::errors::InvalidArgument( + "The data type of fetched Tensors or the items of fetched " + "LoDTensorArray are different from each other on different " + "devices(%s vs %s). And the error is caused by the %zu " + "(th) fetched variable. Please set the " + "parameter `return_merged = False` when you " + "call the `Executor.run()` method.", + DataTypeToString(type), DataTypeToString(tensor->type()), offset)); + + // step2: check layout + PADDLE_ENFORCE_EQ( + layout, tensor->layout(), + platform::errors::InvalidArgument( + "The layout of fetched Tensors or the items of fetched " + "LoDTensorArray are different from each other on different " + "devices(%s vs %s). And the error is caused by the %zu " + "(th) fetched variable. Please set the " + "parameter `return_merged = False` when you " + "call the `Executor.run()` method.", + DataLayoutToString(layout), DataLayoutToString(tensor->layout()), + offset)); + } + + // step3: check dims + auto tensor_dims = tensor->dims(); + PADDLE_ENFORCE_EQ(dims.size(), tensor_dims.size(), + platform::errors::InvalidArgument( + "The dimension sizes of fetched Tensors or " + "the items of fetched LoDTensorArray are " + "different from each other on different " + "devices(%s vs %s). And the error is caused by the %zu " + "(th) fetched variable. Please set the " + "parameter `return_merged = False` when you " + "call the `Executor.run()` method.", + dims, tensor_dims, offset)); + for (int j = 1; j < dims.size(); j++) { + PADDLE_ENFORCE_EQ(dims[j], tensor_dims[j], + platform::errors::InvalidArgument( + "The dimensions of fetched Tensors or " + "the items of fetched LoDTensorArray are " + "different from each other on different " + "devices(%s vs %s). And the error is caused by the " + "%zu (th) fetched variable. Please set the " + "parameter `return_merged = False` when " + "you call the `Executor.run()` method.", + dims, tensor_dims, offset)); + } + + // step4: check lod + PADDLE_ENFORCE_EQ( + lod.size(), tensor->lod().size(), + platform::errors::InvalidArgument( + "The LoD information of fetched Tensors or the items of fetched " + "LoDTensorArray are different from each other on different " + "devices(%s vs %s). And the error is caused by the %zu " + "(th) fetched variable. Please set the " + "parameter `return_merged = False` when you " + "call the `Executor.run()` method.", + lod, tensor->lod(), offset)); +} + +static void TransData(const framework::Tensor *src_item, + framework::Tensor *dst_item, + const platform::DeviceContext &ctx) { + if (src_item->IsInitialized() && src_item->numel() > 0) { + if (platform::is_gpu_place(src_item->place())) { +#ifdef PADDLE_WITH_CUDA + TensorCopy(*src_item, platform::CUDAPinnedPlace(), ctx, dst_item); +#endif + } else { + TensorCopy(*src_item, platform::CPUPlace(), dst_item); + } + } +} + +void FetchAsyncOpHandle::FetchMergedLodTensor( + const std::vector &src_lodtensors, + LoDTensor *dst_lodtensor) { + // calc dst type,layout,dim,lod and calc check dim + proto::VarType::Type new_type = proto::VarType::FP32; + framework::DataLayout new_layout; + framework::DDim new_dim; + LoD new_lod = src_lodtensors[0]->lod(); + + framework::DDim check_dim; + + for (auto *t : src_lodtensors) { + if (t->numel() && t->IsInitialized()) { + check_dim = t->dims(); + new_type = t->type(); + new_layout = t->layout(); + break; + } + } + + bool find_first_dims = false; + for (auto *t : src_lodtensors) { + if (t->numel() && t->IsInitialized()) { + if (!find_first_dims) { + new_dim = t->dims(); + find_first_dims = true; + } else { + new_dim[0] += t->dims()[0]; + } + } + } + + // check src type,layout,dim,lod consistence + for (size_t i = 1; i < src_lodtensors.size(); ++i) { + CheckTensorAttrs(src_lodtensors[i], new_type, new_layout, check_dim, + new_lod, offset_); + } + + // set dst tensor + dst_lodtensor->Resize(new_dim); + dst_lodtensor->set_layout(src_lodtensors[0]->layout()); + dst_lodtensor->set_lod(src_lodtensors[0]->lod()); + if (platform::is_gpu_place(src_lodtensors[0]->place())) { + dst_lodtensor->mutable_data(platform::CUDAPinnedPlace(), + src_lodtensors[0]->type()); + } else { + dst_lodtensor->mutable_data(platform::CPUPlace(), + src_lodtensors[0]->type()); + } + + // slice and memcpy + int begin = 0; + for (auto *src : src_lodtensors) { + int end = begin + src->dims()[0]; + if (end == begin) { + continue; + } + auto dst = dst_lodtensor->Slice(begin, end); + TransData(src, &dst, *dev_ctxes_[src->place()]); + begin = end; + } +} + +void FetchAsyncOpHandle::RunImpl() { + platform::RecordEvent record_event(Name()); + WaitInputVarGenerated(); + + // get src vars + auto &scopes = *local_exec_scopes_; + std::vector src_vars; + src_vars.reserve(inputs_.size()); + for (size_t i = 0; i < inputs_.size(); ++i) { + auto *var_handle = static_cast(inputs_[i]); + auto &scope = scopes.at(var_handle->scope_idx()); + auto *var = scope->FindVar(var_handle->name()); + PADDLE_ENFORCE_NOT_NULL( + var, + platform::errors::NotFound( + "Cannot find variable %s in execution scope.", var_handle->name())); + src_vars.emplace_back(var); + } + + if (return_merged_) { + auto &val = BOOST_GET(FetchList, *data_); + if (src_vars[0]->IsType()) { + // to lodtensor type + std::vector src_lodtensors; + src_lodtensors.reserve(src_vars.size()); + for (size_t i = 0; i < src_vars.size(); ++i) { + src_lodtensors.emplace_back(&src_vars[i]->Get()); + } + + LoDTensor dst_lodtensor; + FetchMergedLodTensor(src_lodtensors, &dst_lodtensor); + val.at(offset_) = std::move(dst_lodtensor); + } else { + // to lodtensorarray type + std::vector src_lodtensor_arrays; + src_lodtensor_arrays.reserve(src_vars.size()); + for (size_t i = 0; i < src_vars.size(); ++i) { + src_lodtensor_arrays.emplace_back( + &src_vars[i]->Get()); + } + + LoDTensorArray dst_lodtensor_array; + dst_lodtensor_array.resize(src_lodtensor_arrays[0]->size()); + + for (size_t i = 0; i < dst_lodtensor_array.size(); ++i) { + std::vector src_lodtensors; + src_lodtensors.reserve(src_lodtensor_arrays.size()); + for (size_t j = 0; j < src_lodtensor_arrays.size(); ++j) { + src_lodtensors.emplace_back(&(*src_lodtensor_arrays[j])[i]); + } + FetchMergedLodTensor(src_lodtensors, &dst_lodtensor_array[i]); + } + val.at(offset_) = std::move(dst_lodtensor_array); + } + } else { + auto &val = BOOST_GET(FetchUnmergedList, *data_); + auto &dst_tensors = val.at(offset_); + dst_tensors.reserve(src_vars.size()); + + for (size_t i = 0; i < src_vars.size(); ++i) { + if (src_vars[i]->IsType()) { + auto &t = src_vars[i]->Get(); + LoDTensor item; + TransData(&t, &item, *dev_ctxes_[t.place()]); + dst_tensors.emplace_back(std::move(item)); + } else { + auto &t = src_vars[i]->Get(); + LoDTensorArray item; + item.resize(t.size()); + for (size_t j = 0; j < t.size(); ++j) { + TransData(&t[j], &item[j], *dev_ctxes_[t[j].place()]); + } + dst_tensors.emplace_back(std::move(item)); + } + } + } +} + +bool FetchAsyncOpHandle::IsMultiDeviceTransfer() { return true; } + +std::string FetchAsyncOpHandle::Name() const { return "FetchAsync"; } + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/fetch_async_op_handle.h b/paddle/fluid/framework/details/fetch_async_op_handle.h new file mode 100644 index 0000000000000000000000000000000000000000..691a3286c270badad938610811cc6e73d63c2c04 --- /dev/null +++ b/paddle/fluid/framework/details/fetch_async_op_handle.h @@ -0,0 +1,63 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +#include "paddle/fluid/framework/details/op_handle_base.h" +#include "paddle/fluid/framework/feed_fetch_type.h" +#include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/platform/device_context.h" + +namespace paddle { +namespace framework { +namespace details { + +struct FetchAsyncOpHandle : public OpHandleBase { + public: + FetchAsyncOpHandle(ir::Node *node, FetchResultType *data, size_t offset, + std::vector *local_scopes, + std::vector *local_exec_scopes, + bool return_merged); + + ~FetchAsyncOpHandle(); + + void RecordWaitEventOnCtx(platform::DeviceContext *waited_ctx) override; + + std::string Name() const override; + + bool IsMultiDeviceTransfer() override; + + protected: + void RunImpl() override; + + std::vector GetLocalScopes() override { return *local_scopes_; } + + void FetchMergedLodTensor( + const std::vector &src_lodtensors, + LoDTensor *dst_lodtensor); + + private: + FetchResultType *data_; + size_t offset_; + std::vector *local_scopes_; + std::vector *local_exec_scopes_; + bool return_merged_; +}; + +} // namespace details +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc index 5574a55e18c6d9806cb878dc69ec597f81da97d8..ae69960ef78c3e35143c66226133bd0dceac8b79 100644 --- a/paddle/fluid/framework/details/fetch_op_handle.cc +++ b/paddle/fluid/framework/details/fetch_op_handle.cc @@ -36,7 +36,8 @@ FetchOpHandle::FetchOpHandle(ir::Node *node, FetchResultType *data, FetchOpHandle::~FetchOpHandle() {} void FetchOpHandle::RecordWaitEventOnCtx(platform::DeviceContext *waited_ctx) { - PADDLE_THROW("Nobody should wait FetchOp. Unexpceted Error"); + PADDLE_THROW(platform::errors::PermissionDenied( + "No nodes need to wait FetchOp. Unexpceted Error.")); } static void CheckDims(const framework::DDim &tensor_dims, diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cc b/paddle/fluid/framework/details/nan_inf_utils_detail.cc index 956b099e883f9ea6d96db8716cb0fa693a3796d4..0ad84f5890acaf1c793000859ed3fbc7c1fc22d3 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.cc +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cc @@ -167,6 +167,8 @@ static void PrintNanInf(const T* value, const size_t numel, int print_num, // more detail see: 180 page of // https://www.openmp.org/wp-content/uploads/OpenMP4.0.0.pdf #pragma omp declare reduction(+ : paddle::platform::float16 : omp_out += omp_in) +#pragma omp declare reduction(+ : paddle::platform::bfloat16 : omp_out += \ + omp_in) #endif template @@ -205,6 +207,21 @@ void CheckNanInf( PrintNanInf(value, numel, print_num, op_type, var_name); } } + +template <> +void CheckNanInf( + const paddle::platform::bfloat16* value, const size_t numel, int print_num, + const std::string& op_type, const std::string& var_name) { + float sum = 0.0f; +#pragma omp parallel for reduction(+ : sum) + for (size_t i = 0; i < numel; ++i) { + sum += static_cast(value[i] - value[i]); + } + + if (std::isnan(sum) || std::isinf(sum)) { + PrintNanInf(value, numel, print_num, op_type, var_name); + } +} #endif template <> diff --git a/paddle/fluid/framework/details/ssa_graph_executor.cc b/paddle/fluid/framework/details/ssa_graph_executor.cc index 4f1e44ca26cb65468da6eded74653f34dbf00336..71123f708e3ca149d9fd634f55652cede5a57b50 100644 --- a/paddle/fluid/framework/details/ssa_graph_executor.cc +++ b/paddle/fluid/framework/details/ssa_graph_executor.cc @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/fluid/framework/details/ssa_graph_executor.h" +#include "paddle/fluid/framework/details/fetch_async_op_handle.h" namespace paddle { namespace framework { @@ -23,9 +24,11 @@ void ClearFetchOp(ir::Graph* graph, std::vector* fetch_ops) { if (fetch_ops->empty()) return; for (auto& op : *fetch_ops) { - PADDLE_ENFORCE_NOT_NULL( - dynamic_cast(op), - "The input ops of ClearFetchOp function should be FetchOpHandle."); + PADDLE_ENFORCE_EQ(dynamic_cast(op) != nullptr || + dynamic_cast(op) != nullptr, + true, + "The input ops of ClearFetchOp function should be " + "FetchOpHandle or FetchAsyncOpHandle."); for (auto& out_var : op->Node()->outputs) { graph->RemoveNode(out_var); } diff --git a/paddle/fluid/framework/dlpack_tensor.cc b/paddle/fluid/framework/dlpack_tensor.cc index 180b33d0cb72e2c4c9e6e8caff9f0ef5f1b04689..915589b3242b7d5675e630aca7310185fd109ec2 100644 --- a/paddle/fluid/framework/dlpack_tensor.cc +++ b/paddle/fluid/framework/dlpack_tensor.cc @@ -23,6 +23,7 @@ template static ::DLDataType GetDLDataTypeCode() { ::DLDataType dtype; if (std::is_same::value || + std::is_same::value || std::is_floating_point::value) { dtype.code = kDLFloat; } else if (std::is_unsigned::value) { diff --git a/paddle/fluid/framework/fleet/fleet_wrapper.cc b/paddle/fluid/framework/fleet/fleet_wrapper.cc index cdf210d661c73e69e125c0ebfa85cc852360e352..34fff042770c5f50a280408d8f7f925488b3879c 100644 --- a/paddle/fluid/framework/fleet/fleet_wrapper.cc +++ b/paddle/fluid/framework/fleet/fleet_wrapper.cc @@ -857,7 +857,7 @@ void FleetWrapper::PushSparseVarsWithLabelAsync( float* g = g_tensor->data(); if (scale_sparse_gradient_with_batch_size_ && grad_dim > 0) { - int dim = emb_dim + offset; + int dim = emb_dim; Eigen::Map< Eigen::Matrix> g_mat(g, g_tensor->numel() / dim, dim); diff --git a/paddle/fluid/framework/generator.cc b/paddle/fluid/framework/generator.cc index 9bde9e20b19a0b14ce4489b91d9ab3d5273f7f9a..d51e97d98e902a87cd2a44d2019e93e8dfc30fc8 100644 --- a/paddle/fluid/framework/generator.cc +++ b/paddle/fluid/framework/generator.cc @@ -21,10 +21,46 @@ limitations under the License. */ #include #include #include +#include + +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/gpu_info.h" +#include "paddle/fluid/platform/place.h" namespace paddle { namespace framework { +const std::shared_ptr& GetDefaultCUDAGenerator(int64_t device_id) { +#ifdef PADDLE_WITH_CUDA + + static int64_t num_cuda_devices = -1; + static std::once_flag num_devices_init_flag; + static std::deque cuda_device_flags; + static std::vector> default_cuda_generators; + + std::call_once(num_devices_init_flag, []() { + num_cuda_devices = paddle::platform::GetCUDADeviceCount(); + cuda_device_flags.resize(num_cuda_devices); + default_cuda_generators.resize(num_cuda_devices); + }); + if (device_id < 0) { + PADDLE_THROW(platform::errors::InvalidArgument( + "cuda device id shoule be greater than 0")); + } + + std::call_once(cuda_device_flags[device_id], [device_id]() { + default_cuda_generators[device_id] = + std::make_shared(GetRandomSeed(), device_id); + VLOG(4) << "initial seed: " + << default_cuda_generators[device_id]->GetCurrentSeed(); + }); + return default_cuda_generators[device_id]; +#else + PADDLE_THROW(platform::errors::PermissionDenied( + "getDefaultCUDAGenerator only support in CUDA place")); +#endif +} + const std::shared_ptr& DefaultCPUGenerator() { static auto default_cpu_generator = std::make_shared(GetRandomSeed()); @@ -103,6 +139,7 @@ uint64_t Generator::Seed() { void Generator::SetCurrentSeed(uint64_t seed) { std::lock_guard lock(this->mu_); this->state_.current_seed = seed; + this->state_.thread_offset = 0; std::seed_seq seq({seed}); this->engine_->seed(seq); } @@ -123,6 +160,22 @@ uint64_t Generator::Random64() { return (*engine)(); } +std::pair Generator::IncrementOffset( + uint64_t increament_offset) { + uint64_t cur_offset = this->state_.thread_offset; +#ifdef PADDLE_WITH_CUDA + std::lock_guard lock(this->mu_); + + this->state_.thread_offset += increament_offset; + +#else + PADDLE_THROW(platform::errors::PermissionDenied( + "Increment Offset only support in CUDA place")); +#endif + return std::make_pair(static_cast(this->state_.current_seed), + cur_offset); +} + void Generator::SetIsInitPy(bool is_init_py) { this->is_init_py_ = is_init_py; VLOG(4) << "SetIsInitPy:" << this->is_init_py_; diff --git a/paddle/fluid/framework/generator.h b/paddle/fluid/framework/generator.h index 82b35f7ad550e770e8d10457ddf6cdf8e6fbd709..a279c2e4e1458293b6579b7b7cb2111e440e5d5e 100644 --- a/paddle/fluid/framework/generator.h +++ b/paddle/fluid/framework/generator.h @@ -38,6 +38,7 @@ static uint64_t GetRandomSeed() { struct GeneratorState { int64_t device = -1; uint64_t current_seed = 34342423252; + uint64_t thread_offset = 0; std::mt19937_64 cpu_engine; }; @@ -49,6 +50,7 @@ struct Generator { this->state_.cpu_engine = *engine; this->state_.device = -1; this->state_.current_seed = seed; + this->state_.thread_offset = 0; this->engine_ = engine; VLOG(4) << "initial seed: " << this->state_.current_seed << ", cpu engine: " << &this->state_.cpu_engine; @@ -59,11 +61,25 @@ struct Generator { this->state_.cpu_engine = *engine; this->state_.device = -1; this->state_.current_seed = seed; + this->state_.thread_offset = 0; this->engine_ = engine; VLOG(4) << "initial seed: " << this->state_.current_seed << ", cpu engine: " << &this->state_.cpu_engine; this->is_init_py_ = true; // TODO(zhiqiu): remove it in future } + Generator(uint64_t seed, uint64_t device_id) { + std::seed_seq seq({seed}); + auto engine = std::make_shared(seq); + this->state_.cpu_engine = *engine; + this->state_.device = device_id; + this->state_.current_seed = seed; + this->state_.thread_offset = 0; + this->engine_ = engine; + VLOG(4) << "initial seed: " << this->state_.current_seed + << ", cpu engine: " << &this->state_.cpu_engine; + this->is_init_py_ = false; // TODO(zhiqiu): remove it in future + } + Generator(const Generator& other) = delete; // get random state @@ -83,8 +99,11 @@ struct Generator { uint64_t Random64(); + std::pair IncrementOffset(uint64_t increament_offset); + void SetIsInitPy(bool); bool GetIsInitPy() const; + uint64_t get_device_id() { return this->state_.device; } private: GeneratorState state_; @@ -105,5 +124,8 @@ std::shared_ptr OpDefaultCPUEngine(); std::shared_ptr GetCPURandomEngine(uint64_t); +const std::shared_ptr& GetDefaultCUDAGenerator( + int64_t device_id = -1); + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/framework/op_version_registry.h b/paddle/fluid/framework/op_version_registry.h index 79b15fc87d0b0a0ade8324710b80af634ff8878f..5edd70e035f98f408c0104297e084771cd158f53 100644 --- a/paddle/fluid/framework/op_version_registry.h +++ b/paddle/fluid/framework/op_version_registry.h @@ -133,6 +133,9 @@ class OpVersion { checkpoints_.push_back(Checkpoint({note, op_version_desc})); return *this; } + uint32_t GetVersionID() const { + return static_cast(checkpoints_.size()); + } private: struct Checkpoint { @@ -156,6 +159,14 @@ class OpVersionRegistrar { op_version_map_.insert({op_type, OpVersion()}); return op_version_map_[op_type]; } + uint32_t GetVersionID(const std::string& op_type) const { + auto it = op_version_map_.find(op_type); + if (it == op_version_map_.end()) { + return 0; + } + + return it->second.GetVersionID(); + } private: std::unordered_map op_version_map_; @@ -164,6 +175,125 @@ class OpVersionRegistrar { OpVersionRegistrar& operator=(const OpVersionRegistrar&) = delete; }; +class OpVersionComparator { + public: + virtual bool operator()() = 0; + virtual ~OpVersionComparator() = default; +}; + +#define ADD_OP_VERSION_COMPARATOR(cmp_name, cmp_math) \ + class OpVersion##cmp_name##Comparator : public OpVersionComparator { \ + public: \ + explicit OpVersion##cmp_name##Comparator(const std::string op_name, \ + uint32_t target_version) \ + : op_name_(op_name), target_version_(target_version) {} \ + virtual bool operator()() { \ + return OpVersionRegistrar::GetInstance().GetVersionID(op_name_) \ + cmp_math target_version_; \ + } \ + virtual ~OpVersion##cmp_name##Comparator() {} \ + \ + private: \ + std::string op_name_; \ + uint32_t target_version_; \ + }; + +ADD_OP_VERSION_COMPARATOR(LE, <=); +ADD_OP_VERSION_COMPARATOR(EQ, ==); +ADD_OP_VERSION_COMPARATOR(GE, >=); +ADD_OP_VERSION_COMPARATOR(NE, !=); + +class OpVersionComparatorCombination { + public: + OpVersionComparatorCombination() {} + + OpVersionComparatorCombination& LE(const std::string& op_name, + int target_version) { + op_version_comparators_.push_back(std::shared_ptr( + new OpVersionLEComparator(op_name, target_version))); + return *this; + } + OpVersionComparatorCombination& EQ(const std::string& op_name, + int target_version) { + op_version_comparators_.push_back(std::shared_ptr( + new OpVersionEQComparator(op_name, target_version))); + return *this; + } + OpVersionComparatorCombination& GE(const std::string& op_name, + int target_version) { + op_version_comparators_.push_back(std::shared_ptr( + new OpVersionGEComparator(op_name, target_version))); + return *this; + } + OpVersionComparatorCombination& NE(const std::string& op_name, + int target_version) { + op_version_comparators_.push_back(std::shared_ptr( + new OpVersionNEComparator(op_name, target_version))); + return *this; + } + + bool IsMatched() const { + for (const auto& cmp : op_version_comparators_) { + if (!(*cmp)()) { + return false; + } + } + return true; + } + + private: + std::vector> op_version_comparators_; +}; + +class PassVersionCheckers { + public: + PassVersionCheckers& AddCombination( + const OpVersionComparatorCombination& combinations) { + pass_version_checkers_.push_back(combinations); + return *this; + } + bool IsPassCompatible() const { + if (pass_version_checkers_.empty()) { + return true; + } + for (const auto& checker : pass_version_checkers_) { + if (checker.IsMatched()) { + return true; + } + } + return false; + } + + private: + std::vector pass_version_checkers_; +}; + +class PassVersionCheckerRegistrar { + public: + static PassVersionCheckerRegistrar& GetInstance() { + static PassVersionCheckerRegistrar instance; + return instance; + } + PassVersionCheckers& Register(const std::string& pass_name) { + return pass_version_checkers_map_[pass_name]; + } + bool IsPassCompatible(const std::string& fuse_pass_name) const { + auto iter = pass_version_checkers_map_.find(fuse_pass_name); + if (iter == pass_version_checkers_map_.end()) { + return true; + } + return iter->second.IsPassCompatible(); + } + + private: + std::unordered_map + pass_version_checkers_map_; + + PassVersionCheckerRegistrar() = default; + PassVersionCheckerRegistrar& operator=(const PassVersionCheckerRegistrar&) = + delete; +}; + } // namespace compatible } // namespace framework } // namespace paddle @@ -173,3 +303,9 @@ class OpVersionRegistrar { RegisterOpVersion__##op_type = \ paddle::framework::compatible::OpVersionRegistrar::GetInstance() \ .Register(#op_type) + +#define REGISTER_PASS_CAPABILITY(pass_name) \ + static auto RegisterOpPassVersionChecker__##pass_name = \ + paddle::framework::compatible::PassVersionCheckerRegistrar:: \ + GetInstance() \ + .Register(#pass_name) diff --git a/paddle/fluid/framework/op_version_registry_test.cc b/paddle/fluid/framework/op_version_registry_test.cc index 80ad51ad07b5a84cfabb3ace9b478b1f6ea24f95..239dbc4357854a8962567129b259a64260308b49 100644 --- a/paddle/fluid/framework/op_version_registry_test.cc +++ b/paddle/fluid/framework/op_version_registry_test.cc @@ -55,6 +55,72 @@ TEST(test_operator_version, test_operator_version) { .NewInput("X2", "The second input.") .NewOutput("Y2", "The second output.")); } + +TEST(test_pass_op_version_checker, test_pass_op_version_checker) { + ASSERT_TRUE(PassVersionCheckerRegistrar::GetInstance().IsPassCompatible( + "no_bind_pass")); + + REGISTER_PASS_CAPABILITY(test_pass1) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .LE("mul", 1) + .EQ("fc", 0)); + ASSERT_TRUE(PassVersionCheckerRegistrar::GetInstance().IsPassCompatible( + "test_pass1")); + + REGISTER_PASS_CAPABILITY(test_pass2) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .GE("mul", 0) + .NE("fc", 0)); + ASSERT_FALSE(PassVersionCheckerRegistrar::GetInstance().IsPassCompatible( + "test_pass2")); + + REGISTER_PASS_CAPABILITY(test_pass3) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .GE("mul", 0) + .NE("fc", 0)) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .LE("mul", 1) + .EQ("fc", 0)); + ASSERT_TRUE(PassVersionCheckerRegistrar::GetInstance().IsPassCompatible( + "test_pass3")); + + REGISTER_PASS_CAPABILITY(test_pass4) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .GE("test__", 5) + .EQ("fc", 0)); + ASSERT_FALSE(PassVersionCheckerRegistrar::GetInstance().IsPassCompatible( + "test_pass4")); + + REGISTER_PASS_CAPABILITY(test_pass5) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .GE("test__", 4) + .EQ("fc", 0)); + ASSERT_TRUE(PassVersionCheckerRegistrar::GetInstance().IsPassCompatible( + "test_pass5")); + + REGISTER_PASS_CAPABILITY(test_pass6) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("test__", 4) + .EQ("fc", 0)); + ASSERT_TRUE(PassVersionCheckerRegistrar::GetInstance().IsPassCompatible( + "test_pass6")); + + REGISTER_PASS_CAPABILITY(test_pass7) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .NE("test__", 4) + .EQ("fc", 0)); + ASSERT_FALSE(PassVersionCheckerRegistrar::GetInstance().IsPassCompatible( + "test_pass7")); +} + } // namespace compatible } // namespace framework } // namespace paddle diff --git a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc index 6fbf880356c541e72cae6f3b03efe017042254ff..9eb8478515727cf04f9d16e9a38a8f4c3ec9c683 100644 --- a/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc +++ b/paddle/fluid/inference/analysis/passes/memory_optimize_pass.cc @@ -90,32 +90,6 @@ void MemoryOptimizePass::CollectLifeCycle( } } -// TODO(Superjomn) Make this a general help method. -int DataTypeToSpace(framework::proto::VarType_Type type) { - switch (type) { - case framework::proto::VarType_Type_BOOL: - return sizeof(bool); - case framework::proto::VarType_Type_FP32: - return sizeof(float); - case framework::proto::VarType_Type_INT32: - return sizeof(int32_t); - case framework::proto::VarType_Type_INT64: - return sizeof(int64_t); - case framework::proto::VarType_Type_INT16: - return sizeof(int16_t); - case framework::proto::VarType_Type_FP16: - return sizeof(int16_t); - case framework::proto::VarType_Type_FP64: - return sizeof(double); - case framework::proto::VarType_Type_UINT8: - return sizeof(unsigned char); - case framework::proto::VarType_Type_INT8: - return sizeof(int8_t); - default: - PADDLE_THROW("Unknown data type"); - } -} - void MemoryOptimizePass::CollectVarMemorySize( space_table_t* space_table) const { const int fake_batch_size = 1; @@ -163,7 +137,7 @@ void MemoryOptimizePass::CollectVarMemorySize( int size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()); (*space_table)[node->Var()->Name()] = - size * DataTypeToSpace(node->Var()->GetDataType()); + size * paddle::framework::SizeOfType(node->Var()->GetDataType()); } } } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 127a41aee890808258367fb40804a9547b8fdbb0..500aa8341d6a61056f6f80f82c6f28bb569eb772 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1058,6 +1058,7 @@ USE_TRT_CONVERTER(fused_embedding_eltwise_layernorm); USE_TRT_CONVERTER(skip_layernorm); USE_TRT_CONVERTER(slice); USE_TRT_CONVERTER(scale); +USE_TRT_CONVERTER(stack); #endif namespace paddle_infer { diff --git a/paddle/fluid/inference/lite/test_engine.cc b/paddle/fluid/inference/lite/test_engine.cc index 325c7ab2539f28f5145ee88a1bbf374f333348e1..d29bcb76be78f151dc606d9f335e9df9ed19b16b 100644 --- a/paddle/fluid/inference/lite/test_engine.cc +++ b/paddle/fluid/inference/lite/test_engine.cc @@ -14,15 +14,16 @@ #include -#include "paddle/fluid/inference/lite/engine.h" #include "paddle/fluid/inference/utils/singleton.h" -#include "paddle/fluid/operators/lite/ut_helper.h" #include "paddle/fluid/framework/block_desc.h" #include "paddle/fluid/framework/op_desc.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/inference/lite/engine.h" +#include "paddle/fluid/operators/lite/ut_helper.h" + namespace paddle { namespace inference { namespace lite { diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt index 8b7371490c09068fd4b84ddb541014204806a2b2..39d02909abd1f1d96f73cc9f3e3ea9d26a1f5c72 100644 --- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt @@ -3,8 +3,8 @@ nv_library(tensorrt_converter SRCS mul_op.cc conv2d_op.cc fc_op.cc pool2d_op.cc elementwise_op.cc batch_norm_op.cc activation_op.cc softmax_op.cc concat_op.cc dropout_op.cc pad_op.cc split_op.cc prelu_op.cc leaky_relu_op.cc gelu_op.cc layer_norm_op.cc multihead_matmul_op.cc - shuffle_channel_op.cc swish_op.cc instance_norm_op.cc -emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc + shuffle_channel_op.cc swish_op.cc instance_norm_op.cc stack_op.cc + emb_eltwise_layernorm.cc skip_layernorm.cc scale_op.cc slice_op.cc hard_sigmoid_op.cc hard_swish_op.cc DEPS tensorrt_engine tensorrt_plugin operator scope framework_proto op_registry) nv_test(test_op_converter SRCS test_op_converter.cc DEPS diff --git a/paddle/fluid/inference/tensorrt/convert/scale_op.cc b/paddle/fluid/inference/tensorrt/convert/scale_op.cc index 19e1895635aa7670a0ca453656c3407d132e8db4..f9a1fe41ddc046aad8cc3a5397453b0f68c1a112 100644 --- a/paddle/fluid/inference/tensorrt/convert/scale_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/scale_op.cc @@ -58,6 +58,24 @@ class ScaleOpConverter : public OpConverter { TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr, 0}; nvinfer1::ILayer* layer = nullptr; + + auto input_dim = input->getDimensions(); + PADDLE_ENFORCE_GE(input_dim.nbDims, 3, + platform::errors::Fatal( + "Paddle-TRT scale mode only support dimension >= 3")); + + nvinfer1::IShuffleLayer* expand_layer = nullptr; + nvinfer1::IShuffleLayer* squeeze_layer = nullptr; + + if (input_dim.nbDims == 3) { + // TensorRT scale layer is not supporting input dims < 4 when using + // explicit batch + expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + nvinfer1::Dims4 target_shape(0, 0, 0, 1); // expand 1 dims + expand_layer->setReshapeDimensions(target_shape); + input = expand_layer->getOutput(0); + } + if (bias_after_scale) { layer = TRT_ENGINE_ADD_LAYER( engine_, Scale, *input, nvinfer1::ScaleMode::kUNIFORM, @@ -73,6 +91,18 @@ class ScaleOpConverter : public OpConverter { power_weights.get(), scale_weights.get(), power_weights.get()); } + PADDLE_ENFORCE_EQ(layer != nullptr, true, + platform::errors::Fatal("Create scale layer failed.")); + + if (input_dim.nbDims == 3) { + // TensorRT scale layer is not supporting input dims < 4 when using + // explicit batch + squeeze_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0))); + nvinfer1::Dims3 target_shape(0, 0, 0); // expand 1 dims + squeeze_layer->setReshapeDimensions(target_shape); + layer = static_cast(squeeze_layer); + } RreplenishLayerAndOutput(layer, "scale", {out_name}, test_mode); } }; diff --git a/paddle/fluid/inference/tensorrt/convert/stack_op.cc b/paddle/fluid/inference/tensorrt/convert/stack_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..f35024529c61a253f314e5eca985713227d3f343 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/convert/stack_op.cc @@ -0,0 +1,75 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/inference/tensorrt/convert/op_converter.h" +#include "paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h" + +namespace paddle { +namespace inference { +namespace tensorrt { + +/* + * Stack converter from fluid to tensorRT. + */ +class StackOpConverter : public OpConverter { + public: + void operator()(const framework::proto::OpDesc& op, + const framework::Scope& scope, bool test_mode) override { + VLOG(4) << "convert fluid stack op to tensorrt stack layer"; + + framework::OpDesc op_desc(op, nullptr); + auto input = op_desc.Input("X"); + int input_num = input.size(); + nvinfer1::ITensor** inputs = + (nvinfer1::ITensor**)malloc(input_num * sizeof(nvinfer1::ITensor*)); + + for (int i = 0; i < input_num; ++i) { + inputs[i] = engine_->GetITensor(input[i]); + } + + int axis = BOOST_GET_CONST(int, op_desc.GetAttr("axis")); + if (axis < 0) { + axis = axis + inputs[0]->getDimensions().nbDims + 1; + } + + nvinfer1::ILayer* layer = nullptr; + if (engine_->with_dynamic_shape()) { +#if IS_TRT_VERSION_GE(6000) + plugin::StackPluginDynamic* plugin = + new plugin::StackPluginDynamic(axis, input_num); + layer = engine_->AddPluginV2(inputs, input_num, plugin); + assert(layer != nullptr); +#else + PADDLE_THROW(platform::errors::Fatal( + "You are running the TRT Dynamic Shape mode, need to confirm that " + "your TRT version is no less than 6.0")); +#endif + } else { + PADDLE_THROW(platform::errors::Fatal( + "You are running the Ernie(Bert) model in static" + "shape mode, which is not supported for the time being.\n" + "You can use the config.SetTRTDynamicShapeInfo(...) interface" + " to set the shape information to run the dynamic shape mode.")); + } + auto output_name = op_desc.Output("Y").front(); + RreplenishLayerAndOutput(layer, "stack", {output_name}, test_mode); + free(inputs); + } +}; + +} // namespace tensorrt +} // namespace inference +} // namespace paddle + +REGISTER_TRT_OP_CONVERTER(stack, StackOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index e8cbb9431cb3c79ed6c6269f96c256fd50afb121..a5b71356d0eca43555f4190b8cac2055a3eb679c 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -88,6 +88,7 @@ struct SimpleOpTypeSetTeller : public Teller { "gelu", "layer_norm", "scale", + "stack", }; }; diff --git a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt index e417fcbb2ce9267ad491996063e5725799815f55..98afdbe254a4b0a086d4a4aa88096a06c40138d1 100644 --- a/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt +++ b/paddle/fluid/inference/tensorrt/plugin/CMakeLists.txt @@ -1,7 +1,8 @@ nv_library(tensorrt_plugin SRCS trt_plugin.cc split_op_plugin.cu elementwise_op_plugin.cu - prelu_op_plugin.cu trt_plugin_factory.cc gelu_op_plugin.cu + prelu_op_plugin.cu trt_plugin_factory.cc gelu_op_plugin.cu pool_op_plugin.cu swish_op_plugin.cu layer_norm_op_plugin.cu -instance_norm_op_plugin.cu emb_eltwise_layernorm_plugin.cu -qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu slice_op_plugin.cu hard_swish_op_plugin.cu - DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor) + instance_norm_op_plugin.cu emb_eltwise_layernorm_plugin.cu + qkv_to_context_plugin.cu skip_layernorm_op_plugin.cu slice_op_plugin.cu + hard_swish_op_plugin.cu stack_op_plugin.cu + DEPS enforce tensorrt_engine prelu tensor bert_encoder_functor) diff --git a/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu index 48afcfce347d681fbbb291e478ead1fa28475a22..1fa5b3228e1158fe0423c457d974e0bbf970a30f 100644 --- a/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/pool_op_plugin.cu @@ -104,32 +104,51 @@ nvinfer1::DimsExprs PoolPluginDynamic::getOutputDimensions( auto stri_0 = expr_builder.constant(strides_[0]); auto stri_1 = expr_builder.constant(strides_[1]); + auto one_value = expr_builder.constant(1); - auto tmp1_0 = - expr_builder.constant((-ksize_[0] + 2 * paddings_[0]) / strides_[0] + 1); - auto tmp1_1 = - expr_builder.constant((-ksize_[1] + 2 * paddings_[1]) / strides_[1] + 1); + auto v0_tmp = expr_builder.constant(-ksize_[0] + 2 * paddings_[0]); + auto v1_tmp = expr_builder.constant(-ksize_[1] + 2 * paddings_[1]); - auto tmp2_0 = expr_builder.constant( - (-ksize_[0] + 2 * paddings_[0] + strides_[0] - 1) / strides_[0] + 1); - auto tmp2_1 = expr_builder.constant( - (-ksize_[1] + 2 * paddings_[1] + strides_[1] - 1) / strides_[1] + 1); - - auto *a_d = expr_builder.operation(nvinfer1::DimensionOperation::kCEIL_DIV, - *inputs[0].d[2], *stri_0); - auto *b_d = expr_builder.operation(nvinfer1::DimensionOperation::kCEIL_DIV, - *inputs[0].d[3], *stri_1); + auto ceil_tmp = + expr_builder.constant(-ksize_[0] + 2 * paddings_[0] + strides_[0] - 1); + auto ceil1_tmp = + expr_builder.constant(-ksize_[1] + 2 * paddings_[1] + strides_[1] - 1); if (!ceil_mode_) { - output.d[2] = expr_builder.operation(nvinfer1::DimensionOperation::kSUM, - *a_d, *tmp1_0); - output.d[3] = expr_builder.operation(nvinfer1::DimensionOperation::kSUM, - *b_d, *tmp1_1); + output.d[2] = expr_builder.operation( + nvinfer1::DimensionOperation::kSUM, + *expr_builder.operation( + nvinfer1::DimensionOperation::kFLOOR_DIV, + *expr_builder.operation(nvinfer1::DimensionOperation::kSUM, + *inputs[0].d[2], *v0_tmp), + *stri_0), + *one_value); + output.d[3] = expr_builder.operation( + nvinfer1::DimensionOperation::kSUM, + *expr_builder.operation( + nvinfer1::DimensionOperation::kFLOOR_DIV, + *expr_builder.operation(nvinfer1::DimensionOperation::kSUM, + *inputs[0].d[3], *v1_tmp), + *stri_1), + *one_value); + } else { - output.d[2] = expr_builder.operation(nvinfer1::DimensionOperation::kSUM, - *a_d, *tmp2_0); - output.d[3] = expr_builder.operation(nvinfer1::DimensionOperation::kSUM, - *b_d, *tmp2_1); + output.d[2] = expr_builder.operation( + nvinfer1::DimensionOperation::kSUM, + *expr_builder.operation( + nvinfer1::DimensionOperation::kFLOOR_DIV, + *expr_builder.operation(nvinfer1::DimensionOperation::kSUM, + *inputs[0].d[2], *ceil_tmp), + *stri_0), + *one_value); + output.d[3] = expr_builder.operation( + nvinfer1::DimensionOperation::kSUM, + *expr_builder.operation( + nvinfer1::DimensionOperation::kFLOOR_DIV, + *expr_builder.operation(nvinfer1::DimensionOperation::kSUM, + *inputs[0].d[3], *ceil1_tmp), + *stri_1), + *one_value); } return output; diff --git a/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.cu new file mode 100644 index 0000000000000000000000000000000000000000..1ecbf4be154f01059ef33e2d510d8329d6726314 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.cu @@ -0,0 +1,247 @@ +// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include "paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h" +#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin_factory.h" + +namespace paddle { +namespace inference { +namespace tensorrt { +namespace plugin { + +#if IS_TRT_VERSION_GE(6000) +StackPluginDynamic::StackPluginDynamic(int axis, int num_stack) + : axis_(axis), num_stack_(num_stack) {} + +StackPluginDynamic::StackPluginDynamic(void const* serial_data, + size_t serial_length) { + DeserializeValue(&serial_data, &serial_length, &axis_); + DeserializeValue(&serial_data, &serial_length, &num_stack_); +} + +StackPluginDynamic::~StackPluginDynamic() {} + +nvinfer1::IPluginV2DynamicExt* StackPluginDynamic::clone() const { + return new StackPluginDynamic(axis_, num_stack_); +} + +const char* StackPluginDynamic::getPluginType() const { return "stack_plugin"; } + +int StackPluginDynamic::getNbOutputs() const { return 1; } + +int StackPluginDynamic::initialize() { return 0; } + +size_t StackPluginDynamic::getSerializationSize() const { + size_t serialize_size = 0; + serialize_size += SerializedSize(axis_); + serialize_size += SerializedSize(num_stack_); + return serialize_size; +} + +void StackPluginDynamic::serialize(void* buffer) const { + SerializeValue(&buffer, axis_); + SerializeValue(&buffer, num_stack_); +} + +nvinfer1::DimsExprs StackPluginDynamic::getOutputDimensions( + int output_index, const nvinfer1::DimsExprs* inputs, int nb_inputs, + nvinfer1::IExprBuilder& expr_builder) { + nvinfer1::DimsExprs output(inputs[0]); + output.nbDims = inputs[0].nbDims + 1; + + for (int i = inputs[0].nbDims; i > axis_; --i) { + output.d[i] = inputs[0].d[i - 1]; + } + output.d[axis_] = expr_builder.constant(nb_inputs); + return output; +} + +void StackPluginDynamic::configurePlugin( + const nvinfer1::DynamicPluginTensorDesc* in, int nbInputs, + const nvinfer1::DynamicPluginTensorDesc* out, int nbOutputs) {} + +size_t StackPluginDynamic::getWorkspaceSize( + const nvinfer1::PluginTensorDesc* inputs, int nbInputs, + const nvinfer1::PluginTensorDesc* outputs, int nbOutputs) const { + return num_stack_ * sizeof(uintptr_t); +} + +void StackPluginDynamic::destroy() { delete this; } + +void StackPluginDynamic::terminate() {} + +bool StackPluginDynamic::supportsFormatCombination( + int pos, const nvinfer1::PluginTensorDesc* in_out, int nb_inputs, + int nb_outputs) { + PADDLE_ENFORCE_NOT_NULL( + in_out, platform::errors::InvalidArgument( + "The input of stack plugin should not be nullptr.")); + + PADDLE_ENFORCE_LT( + pos, nb_inputs + nb_outputs, + platform::errors::InvalidArgument("The pos(%d) should be less than the " + "num(%d) of the input and the output.", + pos, nb_inputs + nb_outputs)); + + const nvinfer1::PluginTensorDesc& in = in_out[pos]; + if (pos == 0) { +#ifdef SUPPORTS_CUDA_FP16 + return (in.type == nvinfer1::DataType::kFLOAT || + in.type == nvinfer1::DataType::kHALF) && + (in.format == nvinfer1::TensorFormat::kLINEAR); +#else + return (in.type == nvinfer1::DataType::kFLOAT) && + (in.format == nvinfer1::TensorFormat::kLINEAR); +#endif + } + const nvinfer1::PluginTensorDesc& prev = in_out[pos - 1]; + // output + return in.type == prev.type && in.format == prev.format; +} + +nvinfer1::DataType StackPluginDynamic::getOutputDataType( + int index, const nvinfer1::DataType* input_types, int nb_inputs) const { + PADDLE_ENFORCE_EQ(index, 0, platform::errors::InvalidArgument( + "The index should be equal to 0")); + return input_types[0]; +} + +template +__global__ void StackKernel(const T* const* input, T* output, int num_stack, + int base_unit) { + int stack_id = blockIdx.x; + int lead_id = blockIdx.y; + + for (int i = threadIdx.x; i < base_unit; i += blockDim.x) { + output[lead_id * num_stack * base_unit + stack_id * base_unit + i] = + input[stack_id][lead_id * base_unit + i]; + } +} + +int StackPluginDynamic::enqueue(const nvinfer1::PluginTensorDesc* input_desc, + const nvinfer1::PluginTensorDesc* output_desc, + const void* const* inputs, void* const* outputs, + void* workspace, cudaStream_t stream) { + auto input_dims = input_desc[0].dims; // (batch, seq, seq) + auto out_dims = output_desc[0].dims; // (batch, num_head, seq, seq) + auto out_num_dims = out_dims.nbDims; + + int base_unit = 1; + for (int i = axis_ + 1; i < out_num_dims; ++i) { + PADDLE_ENFORCE_GT(out_dims.d[i], 0, + platform::errors::InvalidArgument( + "Input dimensions should be greater than 0")); + base_unit *= out_dims.d[i]; + } + + int lead_unit = 1; + for (int i = 0; i < axis_; ++i) { + PADDLE_ENFORCE_GT(out_dims.d[i], 0, + platform::errors::InvalidArgument( + "Input dimensions should be greater than 0")); + lead_unit *= out_dims.d[i]; + } + + PADDLE_ENFORCE_EQ( + out_dims.d[axis_], num_stack_, + platform::errors::InvalidArgument("number of stack axis should be same")); + + cudaMemcpyAsync(workspace, reinterpret_cast(inputs), + sizeof(void*) * out_dims.d[axis_], cudaMemcpyHostToDevice, + stream); + + const int num_stacks = out_dims.d[axis_]; + dim3 num_blocks(num_stacks, lead_unit); + const int num_threads = 256; + auto infer_type = input_desc[0].type; + + if (infer_type == nvinfer1::DataType::kFLOAT) { + float* output = static_cast(outputs[0]); + StackKernel<<>>( + reinterpret_cast(workspace), output, num_stacks, + base_unit); + } else if (infer_type == nvinfer1::DataType::kHALF) { +#ifdef SUPPORTS_CUDA_FP16 + __half* output = static_cast<__half*>(outputs[0]); + StackKernel<__half><<>>( + reinterpret_cast(workspace), output, num_stacks, + base_unit); +#else + PADDLE_THROW(platform::errors::Fatal( + "The cuda archs you specific should greater than 600.")); +#endif + } else { + PADDLE_THROW( + platform::errors::Fatal("The Stack TRT Plugin's input type only " + "support float or half currently.")); + } + return cudaGetLastError() != cudaSuccess; +} + +StackPluginDynamicCreator::StackPluginDynamicCreator() {} + +const char* StackPluginDynamicCreator::getPluginName() const { + return "stack_plugin"; +} + +const char* StackPluginDynamicCreator::getPluginVersion() const { return "1"; } + +const nvinfer1::PluginFieldCollection* +StackPluginDynamicCreator::getFieldNames() { + return &field_collection_; +} + +nvinfer1::IPluginV2* StackPluginDynamicCreator::createPlugin( + const char* name, const nvinfer1::PluginFieldCollection* fc) { + int axis = -1; + int num_stack = -1; + + for (int i = 0; i < fc->nbFields; ++i) { + const std::string name(fc->fields[i].name); + if (name == "axis") { + axis = static_cast(fc->fields[i].data)[0]; + } else if (name == "num_stack") { + num_stack = static_cast(fc->fields[i].data)[0]; + } else { + PADDLE_THROW(platform::errors::Fatal("Meet an unknown plugin field '" + + name + + "' when creating stack op plugin.")); + } + } + return new StackPluginDynamic(axis, num_stack); +} + +nvinfer1::IPluginV2* StackPluginDynamicCreator::deserializePlugin( + const char* name, const void* serial_data, size_t serial_length) { + auto plugin = new StackPluginDynamic(serial_data, serial_length); + return plugin; +} + +void StackPluginDynamicCreator::setPluginNamespace(const char* lib_namespace) { + plugin_namespace_ = lib_namespace; +} + +const char* StackPluginDynamicCreator::getPluginNamespace() const { + return plugin_namespace_.c_str(); +} + +#endif + +} // namespace plugin +} // namespace tensorrt +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h new file mode 100644 index 0000000000000000000000000000000000000000..f4f6cde6f87ea97c514e68bc2862bb163b0aa448 --- /dev/null +++ b/paddle/fluid/inference/tensorrt/plugin/stack_op_plugin.h @@ -0,0 +1,96 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include +#include "paddle/fluid/framework/tensor.h" +#include "paddle/fluid/inference/tensorrt/plugin/trt_plugin.h" + +namespace paddle { +namespace inference { +namespace tensorrt { +namespace plugin { + +#if IS_TRT_VERSION_GE(6000) +class StackPluginDynamic : public DynamicPluginTensorRT { + public: + explicit StackPluginDynamic(int axis, int num_stack); + StackPluginDynamic(void const* serial_data, size_t serial_length); + ~StackPluginDynamic(); + nvinfer1::IPluginV2DynamicExt* clone() const override; + nvinfer1::DimsExprs getOutputDimensions( + int outputIndex, const nvinfer1::DimsExprs* inputs, int nbInputs, + nvinfer1::IExprBuilder& exprBuilder) override; + bool supportsFormatCombination(int pos, + const nvinfer1::PluginTensorDesc* inOut, + int nbInputs, int nbOutputs) override; + void configurePlugin(const nvinfer1::DynamicPluginTensorDesc* in, + int nbInputs, + const nvinfer1::DynamicPluginTensorDesc* out, + int nbOutputs) override; + size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc* inputs, + int nbInputs, + const nvinfer1::PluginTensorDesc* outputs, + int nbOutputs) const override; + int enqueue(const nvinfer1::PluginTensorDesc* inputDesc, + const nvinfer1::PluginTensorDesc* outputDesc, + const void* const* inputs, void* const* outputs, void* workspace, + cudaStream_t stream) override; + + nvinfer1::DataType getOutputDataType(int index, + const nvinfer1::DataType* inputTypes, + int nbInputs) const override; + + const char* getPluginType() const override; + int getNbOutputs() const override; + int initialize() override; + void terminate() override; + size_t getSerializationSize() const override; + void serialize(void* buffer) const override; + void destroy() override; + + private: + int axis_; + int num_stack_; +}; + +class StackPluginDynamicCreator : public nvinfer1::IPluginCreator { + public: + StackPluginDynamicCreator(); + const char* getPluginName() const override; + const char* getPluginVersion() const override; + const nvinfer1::PluginFieldCollection* getFieldNames() override; + nvinfer1::IPluginV2* createPlugin( + const char* name, const nvinfer1::PluginFieldCollection* fc) override; + nvinfer1::IPluginV2* deserializePlugin(const char* name, + const void* serial_data, + size_t serial_length) override; + void setPluginNamespace(const char* lib_namespace) override; + const char* getPluginNamespace() const override; + + private: + std::string plugin_namespace_; + nvinfer1::PluginFieldCollection field_collection_{0, nullptr}; + std::vector plugin_attributes_; +}; +REGISTER_TRT_PLUGIN_V2(StackPluginDynamicCreator); +#endif + +} // namespace plugin +} // namespace tensorrt +} // namespace inference +} // namespace paddle diff --git a/paddle/fluid/inference/tests/api/CMakeLists.txt b/paddle/fluid/inference/tests/api/CMakeLists.txt index 07af5c152b1cd42d1034ed9c5a1d8d8bc3782827..ac05b08b8f2a038234e7192f47a37b3ef3bcf461 100644 --- a/paddle/fluid/inference/tests/api/CMakeLists.txt +++ b/paddle/fluid/inference/tests/api/CMakeLists.txt @@ -132,7 +132,9 @@ if(NOT APPLE AND WITH_MKLML) set(SEQ_POOL1_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/seq_pool") download_model_and_data(${SEQ_POOL1_INSTALL_DIR} "seq_pool1_model_.tar.gz" "seq_pool1_data.txt.tar.gz") inference_analysis_api_test(test_analyzer_seq_pool1 ${SEQ_POOL1_INSTALL_DIR} analyzer_seq_pool1_tester.cc) - set_tests_properties(test_analyzer_seq_pool1 PROPERTIES TIMEOUT 150) + if(NOT WIN32) + set_tests_properties(test_analyzer_seq_pool1 PROPERTIES TIMEOUT 150) + endif() else() # TODO: fix this test on MACOS and OPENBLAS, the reason is that # fusion_seqexpand_concat_fc_op is not supported on MACOS and OPENBLAS @@ -192,8 +194,9 @@ download_result(${ERNIE_INSTALL_DIR} "Ernie_large_result.txt.tar.gz") inference_analysis_test(test_analyzer_ernie_large SRCS analyzer_ernie_tester.cc EXTRA_DEPS ${INFERENCE_EXTRA_DEPS} ARGS --infer_model=${ERNIE_INSTALL_DIR}/model --infer_data=${ERNIE_INSTALL_DIR}/data.txt --refer_result=${ERNIE_INSTALL_DIR}/result.txt --ernie_large=true) - -set_tests_properties(test_analyzer_ernie_large PROPERTIES TIMEOUT 150 LABELS "RUN_TYPE=NIGHTLY") +if(NOT WIN32 AND NOT APPLE) + set_tests_properties(test_analyzer_ernie_large PROPERTIES TIMEOUT 150 LABELS "RUN_TYPE=NIGHTLY") +endif() # text_classification set(TEXT_CLASSIFICATION_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/text_classification") @@ -215,7 +218,7 @@ inference_analysis_test(test_analyzer_transformer SRCS analyzer_transformer_test # ocr set(OCR_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/ocr") -if (NOT EXISTS ${OCR_INSTALL_DIR}) +if (NOT EXISTS ${OCR_INSTALL_DIR}/ocr.tar.gz) inference_download_and_uncompress(${OCR_INSTALL_DIR} "http://paddlemodels.bj.bcebos.com/" "inference-vis-demos%2Focr.tar.gz") endif() inference_analysis_api_test(test_analyzer_ocr ${OCR_INSTALL_DIR} analyzer_vis_tester.cc) @@ -231,7 +234,7 @@ set_property(TEST test_analyzer_detect PROPERTY ENVIRONMENT GLOG_vmodule=analysi # mobilenet with transpose op set(MOBILENET_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/mobilenet") -if (NOT EXISTS ${MOBILENET_INSTALL_DIR}) +if (NOT EXISTS ${MOBILENET_INSTALL_DIR}/mobilenet.tar.gz) inference_download_and_uncompress(${MOBILENET_INSTALL_DIR} "http://paddlemodels.bj.bcebos.com/" "inference-vis-demos%2Fmobilenet.tar.gz") endif() inference_analysis_api_test(test_analyzer_mobilenet_transpose ${MOBILENET_INSTALL_DIR} analyzer_vis_tester.cc) @@ -395,15 +398,15 @@ inference_analysis_api_test(test_analyzer_bert ${BERT_INSTALL_DIR} analyzer_bert if(WITH_GPU AND TENSORRT_FOUND) set(TRT_MODEL_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/trt_models") - if (NOT EXISTS ${TRT_MODEL_INSTALL_DIR}) + if (NOT EXISTS ${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models.tar.gz) inference_download_and_uncompress(${TRT_MODEL_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "trt_inference_test_models.tar.gz") endif() set(TEST_SPLIT_CONVERTER_MODEL "${TRT_MODEL_INSTALL_DIR}/trt_split_op_converter_test") - if (NOT EXISTS ${TEST_SPLIT_CONVERTER_MODEL}) + if (NOT EXISTS ${TEST_SPLIT_CONVERTER_MODEL}/split_converter.tgz) inference_download_and_uncompress(${TEST_SPLIT_CONVERTER_MODEL} ${INFERENCE_URL}/tensorrt_test "split_converter.tgz") endif() set(TEST_INSTANCE_NORM_MODEL "${TRT_MODEL_INSTALL_DIR}/trt_instance_norm_test") - if (NOT EXISTS ${TEST_INSTANCE_NORM_MODEL}) + if (NOT EXISTS ${TEST_INSTANCE_NORM_MODEL}/instance_norm.tgz) inference_download_and_uncompress(${TEST_INSTANCE_NORM_MODEL} ${INFERENCE_URL}/tensorrt_test "instance_norm.tgz") endif() inference_analysis_test(trt_mobilenet_test SRCS trt_mobilenet_test.cc @@ -432,7 +435,7 @@ if(WITH_GPU AND TENSORRT_FOUND) ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}/trt_inference_test_models) set(TRT_MODEL_QUANT_RESNET_DIR "${INFERENCE_DEMO_INSTALL_DIR}/small_quant_model") - if (NOT EXISTS ${TRT_MODEL_QUANT_RESNET_DIR}) + if (NOT EXISTS ${TRT_MODEL_QUANT_RESNET_DIR}/small_quant_model.tgz) inference_download_and_uncompress(${INFERENCE_DEMO_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "small_quant_model.tgz") endif() inference_analysis_test(trt_quant_int8_test SRCS trt_quant_int8_test.cc @@ -440,7 +443,7 @@ if(WITH_GPU AND TENSORRT_FOUND) ARGS --infer_model=${TRT_MODEL_QUANT_RESNET_DIR}) set(TRT_MODEL_QUANT_YOLOV3_DIR "${INFERENCE_DEMO_INSTALL_DIR}/yolov3_r50_quant_aware") - if (NOT EXISTS ${TRT_MODEL_QUANT_YOLOV3_DIR}) + if (NOT EXISTS ${TRT_MODEL_QUANT_YOLOV3_DIR}/yolov3_r50_quant_aware.tgz) inference_download_and_uncompress(${INFERENCE_DEMO_INSTALL_DIR} ${INFERENCE_URL}/tensorrt_test "yolov3_r50_quant_aware.tgz") endif() inference_analysis_test(trt_quant_int8_yolov3_r50_test SRCS trt_quant_int8_yolov3_r50_test.cc @@ -448,12 +451,12 @@ if(WITH_GPU AND TENSORRT_FOUND) ARGS --infer_model=${TRT_MODEL_QUANT_YOLOV3_DIR}) set(TEST_TRT_DYNAMIC_MODEL2 "${TRT_MODEL_INSTALL_DIR}/complex_model_dynamic") - if (NOT EXISTS ${TEST_TRT_DYNAMIC_MODEL2}) + if (NOT EXISTS ${TEST_TRT_DYNAMIC_MODEL2}/complex_model_dynamic2.tar.gz) inference_download_and_uncompress(${TEST_TRT_DYNAMIC_MODEL2} ${INFERENCE_URL}/tensorrt_test "complex_model_dynamic2.tar.gz") endif() set(TEST_TRT_DYNAMIC_MODEL "${TRT_MODEL_INSTALL_DIR}/conv_bn_swish_split_gelu") - if (NOT EXISTS ${TEST_TRT_DYNAMIC_MODEL}) + if (NOT EXISTS ${TEST_TRT_DYNAMIC_MODEL}/conv_bn_swish_split_gelu.tar.gz) inference_download_and_uncompress(${TEST_TRT_DYNAMIC_MODEL} ${INFERENCE_URL}/tensorrt_test "conv_bn_swish_split_gelu.tar.gz") endif() inference_analysis_test(trt_dynamic_shape_test SRCS trt_dynamic_shape_test.cc @@ -461,7 +464,7 @@ if(WITH_GPU AND TENSORRT_FOUND) ARGS --infer_model=${TRT_MODEL_INSTALL_DIR}) set(TEST_TRT_ERNIE_MODEL "${TRT_MODEL_INSTALL_DIR}/ernie_test") - if (NOT EXISTS ${TEST_TRT_ERNIE_MODEL}) + if (NOT EXISTS ${TEST_TRT_ERNIE_MODEL}/ernie_model_4.tar.gz) inference_download_and_uncompress(${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test "ernie_model_4.tar.gz") endif() @@ -470,7 +473,7 @@ if(WITH_GPU AND TENSORRT_FOUND) ARGS --infer_model=${TEST_TRT_ERNIE_MODEL}/ernie_model_4) set(TEST_TRT_ERNIE_UNSER_MODEL "${TRT_MODEL_INSTALL_DIR}/ernie_test/ernie_model_4_unserialized/") - if (NOT EXISTS ${TEST_TRT_ERNIE_UNSER_MODEL}) + if (NOT EXISTS ${TEST_TRT_ERNIE_UNSER_MODEL}/ernie_model_4_unserialized.tgz) inference_download_and_uncompress(${TEST_TRT_ERNIE_MODEL} ${INFERENCE_URL}/tensorrt_test "ernie_model_4_unserialized.tgz") endif() diff --git a/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc b/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc index da0c93d21b7852e06b6805230078540063c2b243..c60e0a25f28c01c453276a8ef04eb79b35b7dda2 100644 --- a/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_capi_gpu_tester.cc @@ -54,9 +54,6 @@ TEST(PD_AnalysisConfig, use_gpu) { PD_SwitchIrOptim(config, true); bool ir_optim = PD_IrOptim(config); CHECK(ir_optim) << "NO"; - PD_EnableMkldnnBfloat16(config); - bool bfloat16_enable = PD_MkldnnBfloat16Enabled(config); - CHECK(!bfloat16_enable) << "NO"; PD_EnableTensorRtEngine(config, 1 << 20, 1, 3, Precision::kFloat32, false, false); bool trt_enable = PD_TensorrtEngineEnabled(config); diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc index 7e5dfa2424dbca4fb3a8a08e3d7fa7fbc3060d3d..524e08891f4e90d8a322822e26d75689526d30f5 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_deserialize_test.cc @@ -90,7 +90,6 @@ void trt_ernie(bool with_fp16, std::vector result) { config.SwitchUseFeedFetchOps(false); - int head_number = 12; int batch = 1; int min_seq_len = 1; int max_seq_len = 128; @@ -104,17 +103,17 @@ void trt_ernie(bool with_fp16, std::vector result) { {"read_file_0.tmp_0", min_shape}, {"read_file_0.tmp_1", min_shape}, {"read_file_0.tmp_2", min_shape}, - {"stack_0.tmp_0", {batch, head_number, min_seq_len, min_seq_len}}}; + {"matmul_0.tmp_0", {batch, min_seq_len, min_seq_len}}}; std::map> max_input_shape = { {"read_file_0.tmp_0", max_shape}, {"read_file_0.tmp_1", max_shape}, {"read_file_0.tmp_2", max_shape}, - {"stack_0.tmp_0", {batch, head_number, max_seq_len, max_seq_len}}}; + {"matmul_0.tmp_0", {batch, max_seq_len, max_seq_len}}}; std::map> opt_input_shape = { {"read_file_0.tmp_0", opt_shape}, {"read_file_0.tmp_1", opt_shape}, {"read_file_0.tmp_2", opt_shape}, - {"stack_0.tmp_0", {batch, head_number, opt_seq_len, opt_seq_len}}}; + {"matmul_0.tmp_0", {batch, opt_seq_len, opt_seq_len}}}; auto precision = AnalysisConfig::Precision::kFloat32; if (with_fp16) { diff --git a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc index c99ebcdcb5f319f73b7fd931d13f27684db39cad..17fedc3d3b8bb8451fac76f6c7dec4ac057fd1d2 100644 --- a/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc +++ b/paddle/fluid/inference/tests/api/trt_dynamic_shape_ernie_test.cc @@ -90,7 +90,6 @@ void trt_ernie(bool with_fp16, std::vector result) { config.SwitchUseFeedFetchOps(false); - int head_number = 12; int batch = 1; int min_seq_len = 1; int max_seq_len = 128; @@ -104,17 +103,17 @@ void trt_ernie(bool with_fp16, std::vector result) { {"read_file_0.tmp_0", min_shape}, {"read_file_0.tmp_1", min_shape}, {"read_file_0.tmp_2", min_shape}, - {"stack_0.tmp_0", {batch, head_number, min_seq_len, min_seq_len}}}; + {"matmul_0.tmp_0", {batch, min_seq_len, min_seq_len}}}; std::map> max_input_shape = { {"read_file_0.tmp_0", max_shape}, {"read_file_0.tmp_1", max_shape}, {"read_file_0.tmp_2", max_shape}, - {"stack_0.tmp_0", {batch, head_number, max_seq_len, max_seq_len}}}; + {"matmul_0.tmp_0", {batch, max_seq_len, max_seq_len}}}; std::map> opt_input_shape = { {"read_file_0.tmp_0", opt_shape}, {"read_file_0.tmp_1", opt_shape}, {"read_file_0.tmp_2", opt_shape}, - {"stack_0.tmp_0", {batch, head_number, opt_seq_len, opt_seq_len}}}; + {"matmul_0.tmp_0", {batch, opt_seq_len, opt_seq_len}}}; auto precision = AnalysisConfig::Precision::kFloat32; if (with_fp16) { diff --git a/paddle/fluid/inference/tests/test.cmake b/paddle/fluid/inference/tests/test.cmake index b9f979f96d4b106642795151fb8e34b025b2caef..8bc10f2147fa29102b242ce22e78a88453d6cee4 100644 --- a/paddle/fluid/inference/tests/test.cmake +++ b/paddle/fluid/inference/tests/test.cmake @@ -45,7 +45,7 @@ function(inference_download_and_uncompress INSTALL_DIR URL FILENAME) endfunction() set(WORD2VEC_INSTALL_DIR "${INFERENCE_DEMO_INSTALL_DIR}/word2vec") -if(NOT EXISTS ${WORD2VEC_INSTALL_DIR}) +if(NOT EXISTS ${WORD2VEC_INSTALL_DIR}/word2vec.inference.model.tar.gz) inference_download_and_uncompress(${WORD2VEC_INSTALL_DIR} ${INFERENCE_URL} "word2vec.inference.model.tar.gz") endif() set(WORD2VEC_MODEL_DIR "${WORD2VEC_INSTALL_DIR}/word2vec.inference.model") diff --git a/paddle/fluid/operators/affine_grid_op.cu b/paddle/fluid/operators/affine_grid_op.cu index 7aaaa0002c5ab31af72c75e69f5a283c09633ba4..58b56bdcf5614ed9183ce3bf11c1767f92650d20 100644 --- a/paddle/fluid/operators/affine_grid_op.cu +++ b/paddle/fluid/operators/affine_grid_op.cu @@ -62,11 +62,11 @@ __global__ void affine_grid_kernel(const int count, int n, int out_h, int out_w, int theta_offset = n * 6; // 2 * 3; // affine from (h_coor, w_coor) to (x, y) - output[index * 2] = theta[theta_offset] * h_coor + - theta[theta_offset + 1] * w_coor + + output[index * 2] = theta[theta_offset] * w_coor + + theta[theta_offset + 1] * h_coor + theta[theta_offset + 2]; - output[index * 2 + 1] = theta[theta_offset + 3] * h_coor + - theta[theta_offset + 4] * w_coor + + output[index * 2 + 1] = theta[theta_offset + 3] * w_coor + + theta[theta_offset + 4] * h_coor + theta[theta_offset + 5]; } } @@ -86,13 +86,13 @@ __global__ void affine_grid_grad_kernel(const int count, int n, int out_h, int theta_offset = n * 6; // 2 * 3; T out_grad_x = out_grad[index * 2]; - platform::CudaAtomicAdd(theta_grad + theta_offset, out_grad_x * h_coor); - platform::CudaAtomicAdd(theta_grad + theta_offset + 1, out_grad_x * w_coor); + platform::CudaAtomicAdd(theta_grad + theta_offset, out_grad_x * w_coor); + platform::CudaAtomicAdd(theta_grad + theta_offset + 1, out_grad_x * h_coor); platform::CudaAtomicAdd(theta_grad + theta_offset + 2, out_grad_x); T out_grad_y = out_grad[index * 2 + 1]; - platform::CudaAtomicAdd(theta_grad + theta_offset + 3, out_grad_y * h_coor); - platform::CudaAtomicAdd(theta_grad + theta_offset + 4, out_grad_y * w_coor); + platform::CudaAtomicAdd(theta_grad + theta_offset + 3, out_grad_y * w_coor); + platform::CudaAtomicAdd(theta_grad + theta_offset + 4, out_grad_y * h_coor); platform::CudaAtomicAdd(theta_grad + theta_offset + 5, out_grad_y); } } diff --git a/paddle/fluid/operators/arg_max_op.cc b/paddle/fluid/operators/arg_max_op.cc index fd7fa17ac9ae5e540176bb583cf87fa3d00d2945..a82134921ef64f89151eb9c521ea3cbb6f83ee7b 100644 --- a/paddle/fluid/operators/arg_max_op.cc +++ b/paddle/fluid/operators/arg_max_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/arg_min_max_op_base.h" REGISTER_OPERATOR( @@ -31,3 +32,20 @@ REGISTER_OP_CPU_KERNEL( int16_t>, paddle::operators::ArgMaxKernel); +REGISTER_OP_VERSION(arg_max) + .AddCheckpoint( + R"ROC( + Upgrade argmax add a new attribute [flatten] and modify the attribute of dtype)ROC", + paddle::framework::compatible::OpVersionDesc() + .NewAttr("flatten", + "In order to compute the argmax over the flattened array " + "when the " + "argument `axis` in python API is None.", + false) + .ModifyAttr( + "dtype", + "change the default value of dtype, the older version " + "is -1, means return the int64 indices." + "The new version is 3, return the int64 indices directly." + "And supporting the dtype of -1 in new version.", + 3)); diff --git a/paddle/fluid/operators/arg_min_max_op_base.h b/paddle/fluid/operators/arg_min_max_op_base.h index ae3637f6f99783d70bd57a3935a979b0387692de..c296ddcfbef703e8484b6ea0b7f96f037e415186 100644 --- a/paddle/fluid/operators/arg_min_max_op_base.h +++ b/paddle/fluid/operators/arg_min_max_op_base.h @@ -70,6 +70,8 @@ struct VisitDataArgMinMaxFunctor { auto axis = ctx.Attr("axis"); auto keepdims = ctx.Attr("keepdims"); const bool& flatten = ctx.Attr("flatten"); + // paddle do not have the scalar tensor, just return the shape [1] tensor + if (flatten) keepdims = true; // if flatten, will construct the new dims for the cacluate framework::DDim x_dims; @@ -164,15 +166,42 @@ class ArgMinMaxOp : public framework::OperatorWithKernel { platform::errors::InvalidArgument( "'axis'(%d) must be less than Rank(X)(%d).", axis, x_dims.size())); + const int& dtype = ctx->Attrs().Get("dtype"); + PADDLE_ENFORCE_EQ( + (dtype < 0 || dtype == 2 || dtype == 3), true, + platform::errors::InvalidArgument( + "The attribute of dtype in argmin/argmax must be [%s] or [%s], but " + "received [%s]", + paddle::framework::DataTypeToString( + framework::proto::VarType::INT32), + paddle::framework::DataTypeToString( + framework::proto::VarType::INT64), + paddle::framework::DataTypeToString( + static_cast(dtype)))); + + auto x_rank = x_dims.size(); + if (axis < 0) axis += x_rank; + if (ctx->IsRuntime()) { + if (dtype == framework::proto::VarType::INT32) { + int64_t all_element_num = 0; + if (flatten) { + all_element_num = framework::product(x_dims); + + } else { + all_element_num = x_dims[axis]; + } + PADDLE_ENFORCE_LE( + all_element_num, INT_MAX, + "The element num of the argmin/argmax input at axis is " + "%d, is larger than int32 maximum value:%d, you must " + "set the dtype of argmin/argmax to 'int64'.", + all_element_num, INT_MAX); + } + } std::vector vec; if (flatten) { - // if is flatten, will return the only on element - if (keepdims) { - vec.emplace_back(static_cast(1)); - } + vec.emplace_back(static_cast(1)); } else { - auto x_rank = x_dims.size(); - if (axis < 0) axis += x_rank; for (int64_t i = 0; i < axis; i++) vec.emplace_back(x_dims[i]); if (keepdims) { vec.emplace_back(static_cast(1)); @@ -194,10 +223,14 @@ class BaseArgMinMaxOpMaker : public framework::OpProtoAndCheckerMaker { AddOutput("Out", "Output tensor."); AddAttr("axis", "The axis in which to compute the arg indics."); AddAttr("keepdims", "Keep the dim that to reduce.").SetDefault(false); - AddAttr("dtype", "Keep the dim that to reduce.").SetDefault(-1); AddAttr("flatten", "Flatten the input value, and search the min or max indices") .SetDefault(false); + AddAttr("dtype", + "(int, 3), the dtype of indices, the indices dtype must be " + "int32, int64." + "default dtype is int64, and proto value is 3.") + .SetDefault(3); AddComment(string::Sprintf(R"DOC( %s Operator. diff --git a/paddle/fluid/operators/arg_min_op.cc b/paddle/fluid/operators/arg_min_op.cc index 74fc3292746d26a983fa81ed8cac67b30e23d476..23ed7d727c536225a98a1ea9e6e3af723b4352c3 100644 --- a/paddle/fluid/operators/arg_min_op.cc +++ b/paddle/fluid/operators/arg_min_op.cc @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/operators/arg_min_max_op_base.h" REGISTER_OPERATOR( @@ -31,3 +32,20 @@ REGISTER_OP_CPU_KERNEL( int16_t>, paddle::operators::ArgMinKernel); +REGISTER_OP_VERSION(arg_min) + .AddCheckpoint( + R"ROC( + Upgrade argmin add a new attribute [flatten] and modify the attribute of dtype)ROC", + paddle::framework::compatible::OpVersionDesc() + .NewAttr("flatten", + "In order to compute the argmin over the flattened array " + "when the " + "argument `axis` in python API is None.", + false) + .ModifyAttr( + "dtype", + "change the default value of dtype, the older version " + "is -1, means return the int64 indices." + "The new version is 3, return the int64 indices directly." + "And supporting the dtype of -1 in new version.", + 3)); diff --git a/paddle/fluid/operators/bernoulli_op.cu b/paddle/fluid/operators/bernoulli_op.cu index d0837071d456068f64ebc74b115f1a7904eba41c..6565f5a9a2176972e9e5085c6646097e8349f259 100644 --- a/paddle/fluid/operators/bernoulli_op.cu +++ b/paddle/fluid/operators/bernoulli_op.cu @@ -16,7 +16,6 @@ limitations under the License. */ #include #include -#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/bernoulli_op.h" @@ -31,6 +30,10 @@ struct BernoulliCudaFunctor { __host__ __device__ BernoulliCudaFunctor(int seed) : seed_(seed) {} __host__ __device__ T operator()(const unsigned int n, const T p) const { + // NOTE(zhiqiu): currently, PADDLE_ENFORCE in cuda kernel may print several + // lines of error messages if, and it should be refined. + PADDLE_ENFORCE(p >= 0.0 && p <= 1.0, + "The probability should be >=0 and <= 1, but got %f", p); thrust::minstd_rand rng; rng.seed(seed_); thrust::uniform_real_distribution dist(0.0, 1.0); diff --git a/paddle/fluid/operators/bernoulli_op.h b/paddle/fluid/operators/bernoulli_op.h index 06a83ada17bb926d6f7d4eef10750986d00f048c..40f285d11f194057d950f45798bea07439398ab0 100644 --- a/paddle/fluid/operators/bernoulli_op.h +++ b/paddle/fluid/operators/bernoulli_op.h @@ -25,10 +25,12 @@ namespace operators { template inline HOSTDEVICE T BernoulliFunctor(T p, T rand) { - PADDLE_ENFORCE_LE(p, 1, platform::errors::OutOfRange( - "The probability should be <= 1, but got %f", p)); - PADDLE_ENFORCE_GE(p, 0, platform::errors::OutOfRange( - "The probability should be >= 1, but got %f", p)); + PADDLE_ENFORCE_LE(p, 1.0, + platform::errors::OutOfRange( + "The probability should be <= 1, but got %f", p)); + PADDLE_ENFORCE_GE(p, 0.0, + platform::errors::OutOfRange( + "The probability should be >= 0, but got %f", p)); return static_cast(rand < p); } diff --git a/paddle/fluid/operators/clip_op.h b/paddle/fluid/operators/clip_op.h index 03abfe7eb703b021dac2261dcd9c87d440b04001..68f5d5460efd16a79d6e1553c2fb78da31fc704a 100644 --- a/paddle/fluid/operators/clip_op.h +++ b/paddle/fluid/operators/clip_op.h @@ -66,7 +66,7 @@ template class ClipKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto max = context.Attr("max"); + auto max = static_cast(context.Attr("max")); Tensor max_cpu; if (context.HasInput("Max")) { auto* max_t = context.Input("Max"); @@ -77,8 +77,9 @@ class ClipKernel : public framework::OpKernel { } max = max_data[0]; } + max = static_cast(max); - auto min = context.Attr("min"); + auto min = context.Attr("min"); Tensor min_cpu; if (context.HasInput("Min")) { auto* min_t = context.Input("Min"); @@ -141,7 +142,7 @@ template class ClipGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto max = context.Attr("max"); + auto max = static_cast(context.Attr("max")); Tensor max_cpu; if (context.HasInput("Max")) { auto* max_t = context.Input("Max"); @@ -152,8 +153,9 @@ class ClipGradKernel : public framework::OpKernel { } max = max_data[0]; } + max = static_cast(max); - auto min = context.Attr("min"); + auto min = context.Attr("min"); Tensor min_cpu; if (context.HasInput("Min")) { auto* min_t = context.Input("Min"); @@ -164,6 +166,7 @@ class ClipGradKernel : public framework::OpKernel { } min = min_data[0]; } + min = static_cast(min); auto* d_out = context.Input(framework::GradVarName("Out")); diff --git a/paddle/fluid/operators/distributed/variable_response.cc b/paddle/fluid/operators/distributed/variable_response.cc index c9c42e0938d51991c53b74ac6ad59c350f4a3ced..de77121ee3990366771723e3c43e53362c832ef7 100644 --- a/paddle/fluid/operators/distributed/variable_response.cc +++ b/paddle/fluid/operators/distributed/variable_response.cc @@ -62,6 +62,34 @@ bool VariableResponse::ReadRaw(::google::protobuf::io::CodedInputStream* input, gpu_dev_ctx.Wait(); #else PADDLE_THROW("Unexpected branch"); +#endif + return true; + } else if (platform::is_xpu_place(place)) { +#ifdef PADDLE_WITH_XPU + auto& xpu_dev_ctx = static_cast(dev_ctx); + platform::CPUPlace cpu; + char* p = reinterpret_cast(dest); + while (total_written < length) { + if (!input->GetDirectBufferPointer(&data, &size_to_write)) { + return false; + } + + if (total_written + size_to_write > length) { + size_to_write = length - total_written; + } + + memory::Copy(BOOST_GET_CONST(platform::XPUPlace, place), + reinterpret_cast(p), cpu, data, size_to_write); + p += size_to_write; + total_written += size_to_write; + input->Skip(size_to_write); + } + xpu_dev_ctx.Wait(); +#else + PADDLE_ENFORCE_NOT_NULL( + nullptr, + platform::errors::Unimplemented( + "Not supported XPU, please compile with option WITH_XPU=ON.")); #endif return true; } diff --git a/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.cc b/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.cc index 8c093d12585981ee681ae13f0d2e493197c6b9b3..6dfa2670c140fcfb4c409c0f9e9cef49c02a7064 100644 --- a/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.cc +++ b/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.cc @@ -25,25 +25,32 @@ class DistributedLookupTableOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext *ctx) const override { - PADDLE_ENFORCE(ctx->HasInputs("Ids"), - "Input(Ids) of LookupTableOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("W"), - "Input(W) of LookupTableOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutputs("Outputs"), - "Output(Outs) of LookupTableOp should not be null."); + PADDLE_ENFORCE_EQ(ctx->HasInputs("Ids"), true, + platform::errors::InvalidArgument( + "Input(Ids) of LookupTableOp should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasInput("W"), true, + platform::errors::InvalidArgument( + "Input(W) of LookupTableOp should not be null.")); + PADDLE_ENFORCE_EQ(ctx->HasOutputs("Outputs"), true, + platform::errors::InvalidArgument( + "Output(Outs) of LookupTableOp should not be null.")); auto ids_dims = ctx->GetInputsDim("Ids"); auto table_dims = ctx->GetInputDim("W"); - PADDLE_ENFORCE_EQ(table_dims.size(), 2, - "Only 2 dimensions of the 'Embedding' is supported."); + PADDLE_ENFORCE_EQ( + table_dims.size(), 2, + platform::errors::InvalidArgument( + "Only 2 dimensions of the 'Embedding' is supported.")); for (auto &ids_dim : ids_dims) { PADDLE_ENFORCE_EQ(ids_dim.size(), 2, - "The dimension of the 'Ids' tensor must be 2."); + platform::errors::InvalidArgument( + "The dimension of the 'Ids' tensor must be 2.")); } auto endpoints = ctx->Attrs().Get>("endpoints"); + // for fluid.embedding auto lookup_table_version = ctx->Attrs().Get("lookup_table_version"); diff --git a/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.h b/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.h index a71451c78a870b71c05b41bdcfb34a85b3e2213b..6387120bc87fc94f40574a3ab7f0aabc98f41e95 100644 --- a/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.h +++ b/paddle/fluid/operators/distributed_ops/distributed_lookup_table_op.h @@ -35,9 +35,30 @@ class DistributedLookupTableKernel : public framework::OpKernel { auto endpoints = context.Attr>("endpoints"); auto is_distributed = context.Attr("is_distributed"); + auto lookup_table_version = + context.Attr("lookup_table_version"); + operators::distributed::prefetchs(id_names, out_names, embedding_name, is_distributed, lookup_tables, endpoints, context, context.scope()); + + if (lookup_table_version == "lookup_table_v2") { + auto &scope = context.scope(); + auto emb_dim = + scope.FindVar(embedding_name)->Get().dims()[1]; + + for (size_t i = 0; i < id_names.size(); ++i) { + auto *id_var = scope.FindVar(id_names[i]); + auto *out_var = scope.FindVar(out_names[i]); + auto *id_tensor = id_var->GetMutable(); + auto *out_tensor = out_var->GetMutable(); + + auto id_dims = id_tensor->dims(); + out_tensor->Resize(framework::make_ddim( + {static_cast(id_dims[0]), static_cast(id_dims[1]), + static_cast(emb_dim)})); + } + } } }; diff --git a/paddle/fluid/operators/dropout_op.cu b/paddle/fluid/operators/dropout_op.cu index 4d5e4c4f600314d307125f9b2031026b6aa94f10..49ad67bbca353acc4a79c9e8912d7ae5a70c0021 100644 --- a/paddle/fluid/operators/dropout_op.cu +++ b/paddle/fluid/operators/dropout_op.cu @@ -96,6 +96,42 @@ __global__ void RandomGeneratorWithSeed(const size_t n, const int* seed, } } +template +__global__ void RandomGeneratorWithGenerator(const size_t n, uint64_t seed, + const float dropout_prob, + const T* src, MaskType* mask_data, + T* dst, bool is_upscale_in_train, + uint64_t increment) { + curandStatePhilox4_32_10_t state; + int idx = blockDim.x * blockIdx.x + threadIdx.x; + int step_size = 0; + + MaskType mask; + T dest; + for (; idx < n; idx += blockDim.x * gridDim.x) { + T s = src[idx]; + if (step_size == 0) { + curand_init(seed, idx, increment, &state); + step_size = blockDim.x * gridDim.x; + } else { + curand_init(seed, idx, increment, &state); + } + if (curand_uniform(&state) < dropout_prob) { + mask = 0; + dest = 0; + } else { + mask = 1; + if (is_upscale_in_train) { + dest = s / static_cast(1.0f - dropout_prob); + } else { + dest = s; + } + } + mask_data[idx] = mask; + dst[idx] = dest; + } +} + // It seems that Eigen::Tensor::setRandom in GPU will SEGFAULT. // Use std::random and thrust::random(thrust is a std library in CUDA) to // implement uniform random. @@ -150,6 +186,17 @@ class GPUDropoutKernel : public framework::OpKernel { context.Attr("fix_seed") ? context.Attr("seed") : rnd(); } + int device_id = BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace()) + .GetDeviceId(); + auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id); + if (gen_cuda->GetIsInitPy() && (!context.Attr("fix_seed"))) { + auto seed_offset = gen_cuda->IncrementOffset(1); + RandomGeneratorWithGenerator<<>>( + size, seed_offset.first, dropout_prob, x_data, mask_data, y_data, + upscale_in_train, seed_offset.second); + return; + } + RandomGenerator<<>>( size, seed_data, dropout_prob, x_data, mask_data, y_data, upscale_in_train); diff --git a/paddle/fluid/operators/dropout_op.h b/paddle/fluid/operators/dropout_op.h index 9d9eb4a82a075f27764a73d0e976dbf3f7181cb1..161c4282ec277a19c19921267eaa4cb46b859900 100644 --- a/paddle/fluid/operators/dropout_op.h +++ b/paddle/fluid/operators/dropout_op.h @@ -29,6 +29,10 @@ template using EigenMatrix = framework::EigenMatrix; +template +using EigenVector = framework::EigenVector; + template class CPUDropoutKernel : public framework::OpKernel { public: @@ -116,9 +120,9 @@ class DropoutGradKernel : public framework::OpKernel { auto* mask = context.Input("Mask"); grad_x->mutable_data(context.GetPlace()); - auto M = EigenMatrix::Reshape(*mask, 1); - auto dX = EigenMatrix::Reshape(*grad_x, 1); - auto dY = EigenMatrix::Reshape(*grad_y, 1); + auto M = EigenVector::Flatten(*mask); + auto dX = EigenVector::Flatten(*grad_x); + auto dY = EigenVector::Flatten(*grad_y); auto& place = *context.template device_context().eigen_device(); diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op.h b/paddle/fluid/operators/elementwise/elementwise_mod_op.h index 47bd6af0b95ace2b9b753e38cfc5f191bc1bb942..87e940e2ed6319c4f2957cd846735adb210cd23d 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op.h @@ -31,6 +31,15 @@ struct ModFunctor { } }; +template +struct InverseModFunctor { + inline HOSTDEVICE T operator()(T a, T b) const { + T res = b % a; + if ((res != 0) && ((res < 0) != (a < 0))) res += a; + return res; + } +}; + template struct ModFunctorFP { inline HOSTDEVICE T operator()(T a, T b) const { @@ -40,13 +49,29 @@ struct ModFunctorFP { } }; +template +struct InverseModFunctorFP { + inline HOSTDEVICE T operator()(T a, T b) const { + T res = fmod(b, a); + if ((res != 0) && ((a < 0) != (res < 0))) res += a; + return res; + } +}; + template void elementwise_mod(const framework::ExecutionContext &ctx, const framework::Tensor *x, const framework::Tensor *y, framework::Tensor *z) { int axis = ctx.Attr("axis"); - ElementwiseComputeEx, DeviceContext, T>(ctx, x, y, axis, - ModFunctor(), z); + auto x_dims = x->dims(); + auto y_dims = y->dims(); + if (x_dims.size() >= y_dims.size()) { + ElementwiseComputeEx, DeviceContext, T>(ctx, x, y, axis, + ModFunctor(), z); + } else { + ElementwiseComputeEx, DeviceContext, T>( + ctx, x, y, axis, InverseModFunctor(), z); + } } template @@ -54,8 +79,15 @@ void elementwise_mod_fp(const framework::ExecutionContext &ctx, const framework::Tensor *x, const framework::Tensor *y, framework::Tensor *z) { int axis = ctx.Attr("axis"); - ElementwiseComputeEx, DeviceContext, T>(ctx, x, y, axis, - ModFunctorFP(), z); + auto x_dims = x->dims(); + auto y_dims = y->dims(); + if (x_dims.size() >= y_dims.size()) { + ElementwiseComputeEx, DeviceContext, T>( + ctx, x, y, axis, ModFunctorFP(), z); + } else { + ElementwiseComputeEx, DeviceContext, T>( + ctx, x, y, axis, InverseModFunctorFP(), z); + } } template diff --git a/paddle/fluid/operators/gaussian_random_op.cu b/paddle/fluid/operators/gaussian_random_op.cu index c144481f8dedc9317f7657a22ce82e56022d5b89..69c8b60040651179784cd6b77c31c66e892231be 100644 --- a/paddle/fluid/operators/gaussian_random_op.cu +++ b/paddle/fluid/operators/gaussian_random_op.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include #include +#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/operators/fill_constant_op.h" @@ -24,15 +25,20 @@ template struct GaussianGenerator { T mean_, std_; unsigned int seed_; + unsigned int offset_ = 0; __host__ __device__ GaussianGenerator(T mean, T std, int seed) : mean_(mean), std_(std), seed_(seed) {} + __host__ __device__ GaussianGenerator(T mean, T std, int seed, int offset) + : mean_(mean), std_(std), seed_(seed), offset_(offset) {} + __host__ __device__ T operator()(const unsigned int n) const { thrust::minstd_rand rng; rng.seed(seed_); thrust::normal_distribution dist(mean_, std_); - rng.discard(n); + unsigned int new_n = n + offset_; + rng.discard(new_n); return dist(rng); } }; @@ -43,9 +49,11 @@ class GPUGaussianRandomKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& context) const override { auto* tensor = context.Output("Out"); unsigned int seed = static_cast(context.Attr("seed")); + bool seed_flag = false; if (seed == 0) { std::random_device rd; seed = rd(); + seed_flag = true; } T mean = static_cast(context.Attr("mean")); T std = static_cast(context.Attr("std")); @@ -56,9 +64,23 @@ class GPUGaussianRandomKernel : public framework::OpKernel { T* data = tensor->mutable_data(context.GetPlace()); int64_t size = tensor->numel(); - thrust::transform(index_sequence_begin, index_sequence_begin + size, - thrust::device_ptr(data), - GaussianGenerator(mean, std, seed)); + + int device_id = + BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace()).GetDeviceId(); + auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id); + + if (gen_cuda->GetIsInitPy() && seed_flag) { + auto seed_offset = gen_cuda->IncrementOffset(1); + int gen_offset = size * seed_offset.second; + thrust::transform( + index_sequence_begin, index_sequence_begin + size, + thrust::device_ptr(data), + GaussianGenerator(mean, std, seed_offset.first, gen_offset)); + } else { + thrust::transform(index_sequence_begin, index_sequence_begin + size, + thrust::device_ptr(data), + GaussianGenerator(mean, std, seed)); + } } }; @@ -69,17 +91,33 @@ class GPUGaussianRandomBatchSizeLikeKernel : public framework::OpKernel { auto* tensor = context.Output("Out"); T* data = tensor->mutable_data(context.GetPlace()); unsigned int seed = static_cast(context.Attr("seed")); + bool seed_flag = false; if (seed == 0) { std::random_device rd; seed = rd(); + seed_flag = true; } T mean = static_cast(context.Attr("mean")); T std = static_cast(context.Attr("std")); thrust::counting_iterator index_sequence_begin(0); int64_t size = tensor->numel(); - thrust::transform(index_sequence_begin, index_sequence_begin + size, - thrust::device_ptr(data), - GaussianGenerator(mean, std, seed)); + + int device_id = + BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace()).GetDeviceId(); + auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id); + + if (gen_cuda->GetIsInitPy() && seed_flag) { + auto seed_offset = gen_cuda->IncrementOffset(1); + int gen_offset = size * seed_offset.second; + thrust::transform(index_sequence_begin, index_sequence_begin + size, + thrust::device_ptr(data), + GaussianGenerator(mean, std, seed_offset.first, + seed_offset.second)); + } else { + thrust::transform(index_sequence_begin, index_sequence_begin + size, + thrust::device_ptr(data), + GaussianGenerator(mean, std, seed)); + } } }; } // namespace operators diff --git a/paddle/fluid/operators/linspace_op.cu b/paddle/fluid/operators/linspace_op.cu index 8aca892a81d41b1e0a9f7f9c14169c2817ae9452..793253b6b8894de8d89b301921383ebfd53d66fc 100644 --- a/paddle/fluid/operators/linspace_op.cu +++ b/paddle/fluid/operators/linspace_op.cu @@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/linspace_op.h" #include "paddle/fluid/platform/cuda_primitives.h" @@ -19,6 +20,8 @@ limitations under the License. */ namespace paddle { namespace operators { +using Tensor = framework::Tensor; + template __global__ void LinspaceKernel(T start, double step, int64_t size, T* out) { CUDA_KERNEL_LOOP(index, size) { @@ -35,15 +38,27 @@ template class CUDALinspaceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - auto* start_t = context.Input("Start"); - auto* stop_t = context.Input("Stop"); + auto* pre_start = context.Input("Start"); + auto* pre_stop = context.Input("Stop"); auto* num_t = context.Input("Num"); auto* out = context.Output("Out"); + auto dtype = static_cast( + context.Attr("dtype")); + + Tensor start_t; + Tensor stop_t; + auto start_dtype = + framework::OpKernelType(pre_start->type(), context.GetPlace()); + auto stop_dtype = + framework::OpKernelType(pre_stop->type(), context.GetPlace()); + auto out_dtype = framework::OpKernelType(dtype, context.GetPlace()); + framework::TransDataType(start_dtype, out_dtype, *pre_start, &start_t); + framework::TransDataType(stop_dtype, out_dtype, *pre_stop, &stop_t); framework::Tensor n; - framework::TensorCopy(*start_t, platform::CPUPlace(), &n); + framework::TensorCopy(start_t, platform::CPUPlace(), &n); T start = n.data()[0]; - framework::TensorCopy(*stop_t, platform::CPUPlace(), &n); + framework::TensorCopy(stop_t, platform::CPUPlace(), &n); T stop = n.data()[0]; framework::TensorCopy(*num_t, platform::CPUPlace(), &n); int32_t num = n.data()[0]; diff --git a/paddle/fluid/operators/linspace_op.h b/paddle/fluid/operators/linspace_op.h index 9fb4960375ed7be60598d558c65310bd4a4b84bc..898f611f864dc8bfac2ba7e41b91f5f5bbe524ab 100644 --- a/paddle/fluid/operators/linspace_op.h +++ b/paddle/fluid/operators/linspace_op.h @@ -14,20 +14,38 @@ limitations under the License. */ #pragma once #include +#include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/math_function.h" namespace paddle { namespace operators { +using Tensor = framework::Tensor; + template class CPULinspaceKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& context) const override { - T start = context.Input("Start")->data()[0]; - T stop = context.Input("Stop")->data()[0]; + auto* pre_start = context.Input("Start"); + auto* pre_stop = context.Input("Stop"); int32_t num = context.Input("Num")->data()[0]; auto* out = context.Output("Out"); + auto dtype = static_cast( + context.Attr("dtype")); + + Tensor start_t; + Tensor stop_t; + auto start_dtype = + framework::OpKernelType(pre_start->type(), context.GetPlace()); + auto stop_dtype = + framework::OpKernelType(pre_stop->type(), context.GetPlace()); + auto out_dtype = framework::OpKernelType(dtype, context.GetPlace()); + framework::TransDataType(start_dtype, out_dtype, *pre_start, &start_t); + framework::TransDataType(stop_dtype, out_dtype, *pre_stop, &stop_t); + + T start = start_t.data()[0]; + T stop = stop_t.data()[0]; PADDLE_ENFORCE(num > 0, "The num of linspace op should be larger than 0."); out->Resize(framework::make_ddim({num})); diff --git a/paddle/fluid/operators/load_combine_op.h b/paddle/fluid/operators/load_combine_op.h index 1b4db94b298c53382ee4c657e24b1b6fe6b7f62b..589df8821b3e7fc034df7504fd8d4ce802cc4ecb 100644 --- a/paddle/fluid/operators/load_combine_op.h +++ b/paddle/fluid/operators/load_combine_op.h @@ -70,6 +70,7 @@ class LoadCombineOpKernel : public framework::OpKernel { auto out_vars = context.MultiOutputVar("Out"); for (size_t i = 0; i < out_var_names.size(); i++) { + VLOG(4) << "loading tensor: " << out_var_names[i]; PADDLE_ENFORCE_NOT_NULL( out_vars[i], platform::errors::InvalidArgument( "The variable %s to be loaded cannot be found.", diff --git a/paddle/fluid/operators/lookup_table_v2_op.cc b/paddle/fluid/operators/lookup_table_v2_op.cc index 122e01f146ccddbdc8e72aba67d47855ad30b0eb..4a6680d76c4de7f7f47445b593b1cf50cd6e1311 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.cc +++ b/paddle/fluid/operators/lookup_table_v2_op.cc @@ -15,8 +15,8 @@ limitations under the License. */ #include "paddle/fluid/operators/lookup_table_v2_op.h" #include - #include "paddle/fluid/framework/no_need_buffer_vars_inference.h" +#include "paddle/fluid/framework/op_version_registry.h" #include "paddle/fluid/framework/var_type_inference.h" namespace paddle { @@ -196,3 +196,14 @@ REGISTER_OP_CPU_KERNEL(lookup_table_v2, ops::LookupTableV2Kernel, REGISTER_OP_CPU_KERNEL(lookup_table_v2_grad, ops::LookupTableV2GradKernel, ops::LookupTableV2GradKernel); + +/* ========================== register checkpoint ===========================*/ +REGISTER_OP_VERSION(lookup_table_v2) + .AddCheckpoint( + R"ROC(fix lookup_table_v2, add input type `int32`)ROC", + paddle::framework::compatible::OpVersionDesc() + .BugfixWithBehaviorChanged("lookup_table_v2 support input type " + "`int64`; after support input type " + "`int32/int64`")); + +/* ========================================================================== */ diff --git a/paddle/fluid/operators/lookup_table_v2_op.cu b/paddle/fluid/operators/lookup_table_v2_op.cu index b3b0f8f1960901226a2f4d5e59e7aac47907a5bf..551f0d3c6412e46deb311fac58e5b9638feb30a6 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.cu +++ b/paddle/fluid/operators/lookup_table_v2_op.cu @@ -85,6 +85,14 @@ __global__ void LookupTableV2Grad(T *table, const T *output, const int64_t *ids, } } +template +__global__ void InputTypeCovert(const T *in_ids, const int64_t K, + int64_t *out_ids) { + for (int i = 0; i < K; i++) { + out_ids[i] = (int64_t)(in_ids[i]); + } +} + template class LookupTableV2CUDAKernel : public framework::OpKernel { public: @@ -101,23 +109,37 @@ class LookupTableV2CUDAKernel : public framework::OpKernel { size_t D = table_t->dims()[1]; size_t K = ids_t->numel(); - auto *ids = ids_t->data(); - auto *table = table_t->data(); - auto *output = output_t->mutable_data(context.GetPlace()); - dim3 threads(256, 4); dim3 grids(80, 1); + // copy GPU memory to CPU pinned memory + framework::Vector ids; + ids.resize(K); + + const int64_t *ids_p = nullptr; + + if (ids_t->type() == framework::proto::VarType::INT32) { + InputTypeCovert< + int><<>>( + ids_t->data(), K, ids.MutableData(context.GetPlace())); + ids_p = ids.MutableData(context.GetPlace()); + } else { + ids_p = ids_t->data(); + } + + auto *table = table_t->data(); + auto *output = output_t->mutable_data(context.GetPlace()); + if (padding_idx == -1) LookupTableV2< T, 256, 4, 80, false><<>>( - output, table, ids, N, K, D, padding_idx); + output, table, ids_p, N, K, D, padding_idx); else LookupTableV2< T, 256, 4, 80, true><<>>( - output, table, ids, N, K, D, padding_idx); + output, table, ids_p, N, K, D, padding_idx); } }; @@ -139,16 +161,24 @@ class LookupTableV2GradCUDAKernel : public framework::OpKernel { auto *ids_data = ids->data(); int64_t ids_num = ids->numel(); - + dim3 threads(128, 8); + dim3 grids(8, 1); auto stream = dev_ctx.stream(); // copy GPU memory to CPU pinned memory framework::Vector new_rows; new_rows.resize(ids_num); auto gpu_place = BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace()); - // TODO(yuyang18): Strange code here. - memory::Copy(gpu_place, new_rows.CUDAMutableData(context.GetPlace()), - gpu_place, ids_data, ids_num * sizeof(int64_t), stream); + if (ids->type() == framework::proto::VarType::INT32) { + InputTypeCovert< + int><<>>( + ids->data(), ids_num, + new_rows.MutableData(context.GetPlace())); + } else { + memory::Copy(gpu_place, new_rows.CUDAMutableData(context.GetPlace()), + gpu_place, ids_data, ids_num * sizeof(int64_t), stream); + } + d_table->set_rows(new_rows); auto *d_table_value = d_table->mutable_value(); @@ -177,17 +207,32 @@ class LookupTableV2GradCUDAKernel : public framework::OpKernel { int N = d_table_t->dims()[0]; int D = d_table_t->dims()[1]; int K = ids_t->numel(); - const int64_t *ids = ids_t->data(); + + dim3 threads(128, 8); + dim3 grids(8, 1); + // copy GPU memory to CPU pinned memory + framework::Vector ids; + ids.resize(K); + + const int64_t *ids_p = nullptr; + + if (ids_t->type() == framework::proto::VarType::INT32) { + InputTypeCovert< + int><<>>( + ids_t->data(), K, ids.MutableData(context.GetPlace())); + ids_p = ids.MutableData(context.GetPlace()); + } else { + ids_p = ids_t->data(); + } + const T *d_output = d_output_t->data(); T *d_table = d_table_t->mutable_data(context.GetPlace()); auto t = framework::EigenVector::Flatten(*d_table_t); t.device(*dev_ctx.eigen_device()) = t.constant(static_cast(0)); - dim3 threads(128, 8); - dim3 grids(8, 1); LookupTableV2Grad<<>>( - d_table, d_output, ids, N, K, D); + d_table, d_output, ids_p, N, K, D); } } }; diff --git a/paddle/fluid/operators/lookup_table_v2_op.h b/paddle/fluid/operators/lookup_table_v2_op.h index 9aab90d84796ca5c7f37a818595ce87fb3a554b5..092c5f3b03305608f96fcc2834ad74a3388ed7ed 100644 --- a/paddle/fluid/operators/lookup_table_v2_op.h +++ b/paddle/fluid/operators/lookup_table_v2_op.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include #include #include @@ -45,84 +46,70 @@ class LookupTableV2Kernel : public framework::OpKernel { auto *output_t = context.Output("Out"); // float tensor auto *table_var = context.InputVar("W"); - auto id_name = context.InputNames("Ids").front(); - auto embedding_name = context.InputNames("W").front(); - auto out_name = context.OutputNames("Out").front(); - - // for remote prefetch - auto epmap = context.Attr>("epmap"); - auto remote_prefetch = context.Attr("remote_prefetch"); - auto table_names = context.Attr>("table_names"); + int64_t padding_idx = context.Attr("padding_idx"); + int64_t ids_numel = ids_t->numel(); - if (remote_prefetch && !epmap.empty()) { -// if epmap is not empty, then the parameter will be fetched from remote -// parameter server + std::vector ids; + ids.reserve(ids_numel); -#ifdef PADDLE_WITH_DISTRIBUTE - operators::distributed::prefetch(id_name, out_name, embedding_name, false, - table_names, epmap, context, - context.scope()); -#else - PADDLE_THROW( - "paddle is not compiled with distribute support, can not do " - "parameter prefetch!"); -#endif + if (ids_t->type() == framework::proto::VarType::INT32) { + std::transform(ids_t->data(), ids_t->data() + ids_numel, + std::back_inserter(ids), + [&](int id) { return static_cast(id); }); } else { - int64_t padding_idx = context.Attr("padding_idx"); - int64_t *ids = const_cast(ids_t->data()); - int64_t ids_numel = ids_t->numel(); - - if (table_var->IsType()) { - auto *table_t = context.Input("W"); - int64_t row_number = table_t->dims()[0]; - int64_t row_width = table_t->dims()[1]; - - auto *table = table_t->data(); - auto *output = output_t->mutable_data(context.GetPlace()); - - for (int64_t i = 0; i < ids_numel; ++i) { - if (padding_idx != kNoPadding && ids[i] == padding_idx) { - memset(output + i * row_width, 0, row_width * sizeof(T)); - } else { - PADDLE_ENFORCE_LT( - ids[i], row_number, - "Variable value (input) of OP(fluid.layers.embedding) " - "expected >= 0 and < %ld, but got %ld. Please check input " - "value.", - row_number, ids[i]); - PADDLE_ENFORCE_GE( - ids[i], 0, - "Variable value (input) of OP(fluid.layers.embedding) " - "expected >= 0 and < %ld, but got %ld. Please check input " - "value.", - row_number, ids[i]); - memcpy(output + i * row_width, table + ids[i] * row_width, - row_width * sizeof(T)); - } + framework::TensorToVector(*ids_t, &ids); + } + + if (table_var->IsType()) { + auto *table_t = context.Input("W"); + int64_t row_number = table_t->dims()[0]; + int64_t row_width = table_t->dims()[1]; + + auto *table = table_t->data(); + auto *output = output_t->mutable_data(context.GetPlace()); + + for (int64_t i = 0; i < ids_numel; ++i) { + if (padding_idx != kNoPadding && ids[i] == padding_idx) { + memset(output + i * row_width, 0, row_width * sizeof(T)); + } else { + PADDLE_ENFORCE_LT( + ids[i], row_number, + "Variable value (input) of OP(fluid.layers.embedding) " + "expected >= 0 and < %ld, but got %ld. Please check input " + "value.", + row_number, ids[i]); + PADDLE_ENFORCE_GE( + ids[i], 0, + "Variable value (input) of OP(fluid.layers.embedding) " + "expected >= 0 and < %ld, but got %ld. Please check input " + "value.", + row_number, ids[i]); + memcpy(output + i * row_width, table + ids[i] * row_width, + row_width * sizeof(T)); } - } else if (table_var->IsType()) { - const auto &table_t = table_var->Get(); - int64_t row_width = table_t.value().dims()[1]; - const auto *table = table_t.value().data(); - auto *output = output_t->mutable_data(context.GetPlace()); - - auto blas = math::GetBlas(context); - for (int64_t i = 0; i < ids_numel; ++i) { - if (padding_idx != kNoPadding && ids[i] == padding_idx) { - memset(output + i * row_width, 0, row_width * sizeof(T)); - } else { - PADDLE_ENFORCE_GE( - ids[i], 0, - "Variable value (input) of OP(fluid.layers.embedding) " - "expected >= 0. But received %ld", - ids[i]); - auto id_index = table_t.Index(ids[i]); - PADDLE_ENFORCE_GE( - id_index, 0, "the input key should be exists. But received %d.", - id_index); - blas.VCOPY(row_width, table + id_index * row_width, - output + i * row_width); - } + } + } else if (table_var->IsType()) { + const auto &table_t = table_var->Get(); + int64_t row_width = table_t.value().dims()[1]; + const auto *table = table_t.value().data(); + auto *output = output_t->mutable_data(context.GetPlace()); + + auto blas = math::GetBlas(context); + for (int64_t i = 0; i < ids_numel; ++i) { + if (padding_idx != kNoPadding && ids[i] == padding_idx) { + memset(output + i * row_width, 0, row_width * sizeof(T)); + } else { + PADDLE_ENFORCE_GE( + ids[i], 0, + "Variable value (input) of OP(fluid.layers.embedding) " + "expected >= 0. But received %ld", + ids[i]); + auto id_index = table_t.Index(ids[i]); + PADDLE_ENFORCE_GE(id_index, 0, + "the input key should be exists. But received %d.", + id_index); + blas.VCOPY(row_width, table + id_index * row_width, + output + i * row_width); } } } @@ -151,17 +138,23 @@ class LookupTableV2GradKernel : public framework::OpKernel { // Since paddings are not trainable and fixed in forward, the gradient of // paddings makes no sense and we don't deal with it in backward. if (is_sparse) { - auto *ids = context.Input("Ids"); + auto *ids_t = context.Input("Ids"); auto *d_output = context.Input(framework::GradVarName("Out")); auto *d_table = context.Output(framework::GradVarName("W")); + int64_t ids_num = ids_t->numel(); + + std::vector ids; + ids.reserve(ids_num); - auto *ids_data = ids->data(); - int64_t ids_num = ids->numel(); + if (ids_t->type() == framework::proto::VarType::INT32) { + std::transform(ids_t->data(), ids_t->data() + ids_num, + std::back_inserter(ids), + [&](int id) { return static_cast(id); }); + } else { + framework::TensorToVector(*ids_t, &ids); + } - std::vector new_rows; - new_rows.resize(ids_num); - std::memcpy(&new_rows[0], ids_data, ids_num * sizeof(int64_t)); - d_table->set_rows(new_rows); + d_table->set_rows(ids); auto *d_table_value = d_table->mutable_value(); d_table_value->Resize({ids_num, table_dim[1]}); @@ -185,11 +178,23 @@ class LookupTableV2GradKernel : public framework::OpKernel { memcpy(d_table_data, d_output_data, sizeof(T) * d_output->numel()); } else { - auto *ids = context.Input("Ids"); + auto *ids_t = context.Input("Ids"); auto *d_output = context.Input(framework::GradVarName("Out")); auto *d_table = context.Output(framework::GradVarName("W")); + int64_t ids_num = ids_t->numel(); + + std::vector ids; + ids.reserve(ids_num); + + if (ids_t->type() == framework::proto::VarType::INT32) { + std::transform(ids_t->data(), ids_t->data() + ids_num, + std::back_inserter(ids), + [&](int id) { return static_cast(id); }); + } else { + framework::TensorToVector(*ids_t, &ids); + } - auto *ids_data = ids->data(); + auto *ids_data = ids.data(); int64_t N = table_dim[0]; int64_t D = table_dim[1]; @@ -199,7 +204,7 @@ class LookupTableV2GradKernel : public framework::OpKernel { memset(d_table_data, 0, d_table->numel() * sizeof(T)); - for (int64_t i = 0; i < ids->numel(); ++i) { + for (int64_t i = 0; i < ids_num; ++i) { if (padding_idx != kNoPadding && ids_data[i] == padding_idx) { // the gradient of padding_idx should be 0, already done by memset, so // do nothing. diff --git a/paddle/fluid/operators/math/concat_and_split.h b/paddle/fluid/operators/math/concat_and_split.h index 3a5eddcbf4af699a89ae1a21571337155699a1f3..18d9a6310dd6c09905ca7fa84d98f391a84dfa2d 100644 --- a/paddle/fluid/operators/math/concat_and_split.h +++ b/paddle/fluid/operators/math/concat_and_split.h @@ -65,13 +65,14 @@ class SplitFunctor { } // namespace operators } // namespace paddle -#define FOR_ALL_TYPES(macro) \ - macro(int); \ - macro(float); \ - macro(double); \ - macro(bool); \ - macro(int64_t); \ - macro(int16_t); \ - macro(uint8_t); \ - macro(int8_t); \ - macro(::paddle::platform::float16) +#define FOR_ALL_TYPES(macro) \ + macro(int); \ + macro(float); \ + macro(double); \ + macro(bool); \ + macro(int64_t); \ + macro(int16_t); \ + macro(uint8_t); \ + macro(int8_t); \ + macro(::paddle::platform::float16); \ + macro(::paddle::platform::bfloat16) diff --git a/paddle/fluid/operators/math/math_function.cc b/paddle/fluid/operators/math/math_function.cc index 6748d0ab43f70f997b3008f34f4be743b81e8946..824e66b1eb4ae05cc74dc1cd8c21f16f286592e6 100644 --- a/paddle/fluid/operators/math/math_function.cc +++ b/paddle/fluid/operators/math/math_function.cc @@ -34,6 +34,7 @@ namespace math { using float16 = paddle::platform::float16; template struct SetConstant; +template struct SetConstant; template struct SetConstant; template struct SetConstant; template struct SetConstant; @@ -41,16 +42,18 @@ template struct SetConstant; template struct SetConstant; template struct SetConstant; -#define DEFINE_CPU_TRANS(RANK) \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ - template struct Transpose; \ +#define DEFINE_CPU_TRANS(RANK) \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ + template struct Transpose; \ template struct Transpose; DEFINE_CPU_TRANS(1); diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index 5ca9216d0c8d6b3f773a1eb1a0cec216ca6ed4f3..487deb11b48687a91174c8d9baf072a5ca929de8 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -33,10 +33,12 @@ class MKLDNNActivationKernel public: void Compute(const framework::ExecutionContext &ctx) const override { const auto *x = ctx.Input("X"); - PADDLE_ENFORCE_EQ(x->layout(), DataLayout::kMKLDNN, - "Wrong layout set for X tensor"); - PADDLE_ENFORCE_NE(x->format(), MKLDNNMemoryFormat::undef, - "Wrong format set for X tensor"); + PADDLE_ENFORCE_EQ( + x->layout(), DataLayout::kMKLDNN, + platform::errors::InvalidArgument("Wrong layout set for X tensor")); + PADDLE_ENFORCE_NE( + x->format(), MKLDNNMemoryFormat::undef, + platform::errors::InvalidArgument("Wrong format set for X tensor")); Functor functor; functor(ctx); @@ -50,9 +52,11 @@ class MKLDNNActivationGradKernel void Compute(const framework::ExecutionContext &ctx) const override { const auto *diff_y = ctx.Input(framework::GradVarName("Out")); PADDLE_ENFORCE_EQ(diff_y->layout(), DataLayout::kMKLDNN, - "Wrong layout set for Input OutGrad tensor"); + platform::errors::InvalidArgument( + "Wrong layout set for Input OutGrad tensor")); PADDLE_ENFORCE_NE(diff_y->format(), MKLDNNMemoryFormat::undef, - "Wrong format set for Input OutGrad tensor"); + platform::errors::InvalidArgument( + "Wrong format set for Input OutGrad tensor")); Functor functor; functor(ctx); @@ -82,7 +86,7 @@ void eltwise_forward(const framework::ExecutionContext &ctx, PADDLE_ENFORCE( x->dims().size() == 2 || x->dims().size() == 3 || x->dims().size() == 4, - "Input dim must be with 2, 3 or 4"); + platform::errors::Unimplemented("Input dim must be with 2, 3 or 4")); auto src_tz = framework::vectorize(x->dims()); diff --git a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc index 33cf00b2c01da8e346e4c7e6be81fce3fd47f54f..8a02a697cbb21b28e14f19c6202ae0777b5102de 100644 --- a/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/batch_norm_mkldnn_op.cc @@ -262,9 +262,11 @@ class BatchNormMKLDNNGradOpKernel : public paddle::framework::OpKernel { auto *diff_shift = ctx.Output(framework::GradVarName("Bias")); PADDLE_ENFORCE_EQ(diff_y->layout(), DataLayout::kMKLDNN, - "Wrong layout set for Input diff_y tensor"); + platform::errors::InvalidArgument( + "Wrong layout set for Input diff_y tensor")); PADDLE_ENFORCE_NE(diff_y->format(), MKLDNNMemoryFormat::undef, - "Wrong format set for Input diff_y tensor"); + platform::errors::InvalidArgument( + "Wrong format set for Input diff_y tensor")); auto src_tz = paddle::framework::vectorize(x->dims()); auto scale_tz = paddle::framework::vectorize(scale->dims()); diff --git a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc index 40f64800a0b81a161805857cb3e0a3855f386720..3cafb0e9fc6147626f066bbeba1b10d074a37b87 100644 --- a/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/concat_mkldnn_op.cc @@ -30,10 +30,12 @@ using platform::to_void_cast; static void EnforceLayouts(const std::vector inputs) { for (auto* input : inputs) { - PADDLE_ENFORCE_EQ(input->layout(), DataLayout::kMKLDNN, - "Wrong layout set for Input tensor"); - PADDLE_ENFORCE_NE(input->format(), MKLDNNMemoryFormat::undef, - "Wrong format set for Input tensor"); + PADDLE_ENFORCE_EQ( + input->layout(), DataLayout::kMKLDNN, + platform::errors::InvalidArgument("Wrong layout set for Input tensor")); + PADDLE_ENFORCE_NE( + input->format(), MKLDNNMemoryFormat::undef, + platform::errors::InvalidArgument("Wrong format set for Input tensor")); } } @@ -49,7 +51,7 @@ static platform::CPUPlace GetCpuPlace( const paddle::framework::ExecutionContext& ctx) { auto place = ctx.GetPlace(); PADDLE_ENFORCE(paddle::platform::is_cpu_place(place), - "It must use CPUPlace."); + platform::errors::InvalidArgument("It must use CPUPlace.")); return BOOST_GET_CONST(platform::CPUPlace, place); } diff --git a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc index 7d99bb7d2b7a7049c67788df4c507afc14880815..19ee8764e27b235a2fa8e0720c11bce601b030db 100644 --- a/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_mkldnn_op.cc @@ -561,7 +561,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { PADDLE_ENFORCE_EQ( !fuse_residual_conn || !force_fp32_output, true, - "residual fusion does not support force output with fp32"); + platform::errors::Unimplemented( + "residual fusion does not support force output with fp32")); auto* bias = ctx.HasInput("Bias") ? ctx.Input("Bias") : nullptr; @@ -625,7 +626,8 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel { ? dilations.size() == 3 && dilations[0] == 1 && dilations[1] == 1 && dilations[2] == 1 : dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1, - true, "dilation in convolution is not implemented yet"); + true, platform::errors::Unimplemented( + "dilation in convolution is not implemented yet")); const K* filter_data = filter->data(); auto scale_in_data = ctx.Attr("Scale_in"); @@ -887,7 +889,8 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { "The output_grad tensor's layout should be %d, but got %d.", DataLayout::kMKLDNN, output_grad->layout())); PADDLE_ENFORCE_NE(output_grad->format(), MKLDNNMemoryFormat::undef, - "Wrong format set for output_grad tensor"); + platform::errors::InvalidArgument( + "Wrong format set for output_grad tensor")); PADDLE_ENFORCE_EQ( ctx.Attr("is_test"), false, @@ -1052,7 +1055,11 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel { astream.wait(); filter_grad->set_layout(DataLayout::kMKLDNN); - filter_grad->set_format(GetMKLDNNFormat(*diff_weights_memory_p)); + // in OneDNN groups in convolution are treated as separate dimension + // which is not the case in paddlepaddle + auto filter_fmt = GetMKLDNNFormat(*diff_weights_memory_p); + filter_grad->set_format(platform::MKLDNNFormatForSize( + g > 1 ? weights_tz.size() - 1 : weights_tz.size(), filter_fmt)); } if (input_grad) { auto weights_memory_p = handler.AcquireWeightsMemoryFromDataPrimitive( diff --git a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc index 48279658c80e93428f940c40e61d7b9af23f4ee3..56537900216a8a4e4e96791123c7d50da621ab62 100644 --- a/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/conv_transpose_mkldnn_op.cc @@ -117,7 +117,8 @@ class ConvTransposeMKLDNNOpKernel : public paddle::framework::OpKernel { PADDLE_ENFORCE( dilations.size() == 2 && dilations[0] == 1 && dilations[1] == 1, - "dilation in convolution is not implemented yet"); + platform::errors::Unimplemented( + "dilation in convolution is not implemented yet")); const T* input_data = input->data(); const T* filter_data = filter->data(); diff --git a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc index 2a8b332521804ccebdbd4e6914b2763abfb5dbdc..9df30b3295c00e69a956ee84770dfeb19a83487c 100644 --- a/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/pool_mkldnn_op.cc @@ -83,19 +83,24 @@ class PoolMKLDNNGradOpKernel : public paddle::framework::OpKernel { const Tensor* out_grad = ctx.Input(framework::GradVarName("Out")); Tensor* in_x_grad = ctx.Output(framework::GradVarName("X")); - PADDLE_ENFORCE_EQ(in_x->layout(), DataLayout::kMKLDNN, - "Wrong layout set for Input tensor"); - PADDLE_ENFORCE_NE(in_x->format(), MKLDNNMemoryFormat::undef, - "Wrong format set for Input tensor"); + PADDLE_ENFORCE_EQ( + in_x->layout(), DataLayout::kMKLDNN, + platform::errors::InvalidArgument("Wrong layout set for Input tensor")); + PADDLE_ENFORCE_NE( + in_x->format(), MKLDNNMemoryFormat::undef, + platform::errors::InvalidArgument("Wrong format set for Input tensor")); PADDLE_ENFORCE_EQ(out_grad->layout(), DataLayout::kMKLDNN, - "Wrong layout set for Input output_grad tensor"); + platform::errors::InvalidArgument( + "Wrong layout set for Input output_grad tensor")); PADDLE_ENFORCE_NE(out_grad->format(), MKLDNNMemoryFormat::undef, - "Wrong format set for Input output_grad tensor"); + platform::errors::InvalidArgument( + "Wrong format set for Input output_grad tensor")); PADDLE_ENFORCE_EQ( ctx.Attr("is_test"), false, - "is_test attribute should be set to False in training phase."); + platform::errors::InvalidArgument( + "is_test attribute should be set to False in training phase.")); std::string pooling_type = ctx.Attr("pooling_type"); diff --git a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc index 4d825e4ee279bc2c505cfabff1917d1a5319d1dd..5014381a4e215917883f45288de4482db5cbf79c 100644 --- a/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/softmax_mkldnn_op.cc @@ -140,7 +140,8 @@ class SoftmaxMKLDNNGradKernel : public paddle::framework::OpKernel { PADDLE_ENFORCE_EQ( dout->dims(), dx->dims(), - "The shape of softmax_grad's input and output must be identical."); + platform::errors::InvalidArgument( + "The shape of softmax_grad's input and output must be identical.")); auto dims = dout->dims(); // input and output share the same shape const int axis = CanonicalAxis(ctx.Attr("axis"), dims.size()); diff --git a/paddle/fluid/operators/randint_op.cu b/paddle/fluid/operators/randint_op.cu index a07a92621e6b3726be518df6abcec58257a91489..40e390b0b87246bbaa8474262df8ba5576297385 100644 --- a/paddle/fluid/operators/randint_op.cu +++ b/paddle/fluid/operators/randint_op.cu @@ -13,6 +13,7 @@ // limitations under the License. #include #include +#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/uniform_random_op.h" @@ -49,15 +50,23 @@ class GPURandintKernel : public framework::OpKernel { int64_t size = out->numel(); unsigned int seed = static_cast(context.Attr("seed")); + + /* std::minstd_rand engine; if (seed == 0) { std::random_device rd; seed = rd(); } engine.seed(seed); + */ + std::uniform_int_distribution<> dist(context.Attr("low"), context.Attr("high") - 1); - for (int64_t i = 0; i < size; ++i) data[i] = dist(engine); + auto engine = framework::GetCPURandomEngine(seed); + + for (int64_t i = 0; i < size; ++i) { + data[i] = dist(*engine); + } if (platform::is_gpu_place(context.GetPlace())) { // Copy tensor to out diff --git a/paddle/fluid/operators/truncated_gaussian_random_op.cu b/paddle/fluid/operators/truncated_gaussian_random_op.cu index 5a3510babe4d57b9e80f0e7898df98033834ca15..a838c30771a5c1229061a58b12c6777a3d24c6f3 100644 --- a/paddle/fluid/operators/truncated_gaussian_random_op.cu +++ b/paddle/fluid/operators/truncated_gaussian_random_op.cu @@ -15,6 +15,7 @@ limitations under the License. */ #include #include #include +#include "paddle/fluid/framework/generator.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" @@ -46,6 +47,37 @@ struct TruncatedNormal { } }; +template +struct TruncatedNormalOffset { + T mean, std; + T a_normal_cdf; + T b_normal_cdf; + unsigned int seed; + T numeric_min; + int offset_; + + __host__ __device__ TruncatedNormalOffset(T mean, T std, T numeric_min, + int seed, int offset) + : mean(mean), + std(std), + seed(seed), + numeric_min(numeric_min), + offset_(offset) { + a_normal_cdf = (1.0 + erff(-2.0 / sqrtf(2.0))) / 2.0; + b_normal_cdf = (1.0 + erff(2.0 / sqrtf(2.0))) / 2.0; + } + + __host__ __device__ T operator()(const unsigned int n) const { + thrust::minstd_rand rng; + rng.seed(seed); + thrust::uniform_real_distribution dist(numeric_min, 1); + rng.discard(n); + T value = dist(rng); + auto p = a_normal_cdf + (b_normal_cdf - a_normal_cdf) * value; + return std::sqrt(2.0) * erfinvf(2 * p - 1) * std + mean; + } +}; + template class GPUTruncatedGaussianRandomKernel : public framework::OpKernel { public: @@ -54,14 +86,31 @@ class GPUTruncatedGaussianRandomKernel : public framework::OpKernel { T* data = tensor->mutable_data(context.GetPlace()); unsigned int seed = static_cast(context.Attr("seed")); + bool seed_flag = false; if (seed == 0) { std::random_device rd; seed = rd(); + seed_flag = true; } T mean = static_cast(context.Attr("mean")); T std = static_cast(context.Attr("std")); thrust::counting_iterator index_sequence_begin(0); int64_t size = tensor->numel(); + + int device_id = + BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace()).GetDeviceId(); + auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id); + + if (gen_cuda->GetIsInitPy() && seed_flag) { + auto seed_offset = gen_cuda->IncrementOffset(1); + int gen_offset = size * seed_offset.second; + thrust::transform( + index_sequence_begin, index_sequence_begin + size, + thrust::device_ptr(data), + TruncatedNormalOffset(mean, std, std::numeric_limits::min(), + seed_offset.first, seed_offset.second)); + } + thrust::transform( index_sequence_begin, index_sequence_begin + size, thrust::device_ptr(data), diff --git a/paddle/fluid/operators/uniform_random_op.cu b/paddle/fluid/operators/uniform_random_op.cu index 4df1e0ffeb97564803f452114d52ab03d0464f8a..6237137cccbc6840b345c9e26dda1ccdc8df43b0 100644 --- a/paddle/fluid/operators/uniform_random_op.cu +++ b/paddle/fluid/operators/uniform_random_op.cu @@ -51,6 +51,39 @@ struct UniformGenerator { } }; +template +struct UniformGeneratorOffset { + T min_, max_; + unsigned int seed_; + T diag_val_; + unsigned int diag_num_; + unsigned int diag_step_; + int offset_; + __host__ __device__ UniformGeneratorOffset(T min, T max, int seed, + int diag_num, int diag_step, + T diag_val, int offset) + : min_(min), + max_(max), + seed_(seed), + diag_num_(diag_num), + diag_step_(diag_step), + diag_val_(diag_val), + offset_(offset) {} + + __host__ __device__ T operator()(const unsigned int n) const { + thrust::minstd_rand rng; + rng.seed(seed_); + thrust::uniform_real_distribution dist(min_, max_); + rng.discard(n + offset_); + T out = dist(rng); + unsigned int remainder = n % (diag_step_ + 1); + if (remainder == 0 && diag_num_ > n / (diag_step_ + 1)) { + out = diag_val_; + } + return out; + } +}; + // It seems that Eigen::Tensor::random in GPU will SEGFAULT. // Use std::random and thrust::random(thrust is a std library in CUDA) to // implement uniform random. @@ -89,10 +122,11 @@ class GPUUniformRandomKernel : public framework::OpKernel { } T* data = tensor->mutable_data(context.GetPlace()); unsigned int seed = static_cast(context.Attr("seed")); - + bool seed_flag = false; if (seed == 0) { std::random_device rd; seed = rd(); + seed_flag = true; } T min = static_cast(context.Attr("min")); @@ -104,10 +138,23 @@ class GPUUniformRandomKernel : public framework::OpKernel { T diag_val = static_cast(context.Attr("diag_val")); thrust::counting_iterator index_sequence_begin(0); int64_t size = tensor->numel(); - thrust::transform( - index_sequence_begin, index_sequence_begin + size, - thrust::device_ptr(data), - UniformGenerator(min, max, seed, diag_num, diag_step, diag_val)); + int device_id = + BOOST_GET_CONST(platform::CUDAPlace, context.GetPlace()).GetDeviceId(); + auto gen_cuda = framework::GetDefaultCUDAGenerator(device_id); + if (gen_cuda->GetIsInitPy() && seed_flag) { + auto seed_offset = gen_cuda->IncrementOffset(1); + int gen_offset = size * seed_offset.second; + thrust::transform( + index_sequence_begin, index_sequence_begin + size, + thrust::device_ptr(data), + UniformGeneratorOffset(min, max, seed_offset.first, diag_num, + diag_step, diag_val, gen_offset)); + } else { + thrust::transform( + index_sequence_begin, index_sequence_begin + size, + thrust::device_ptr(data), + UniformGenerator(min, max, seed, diag_num, diag_step, diag_val)); + } } }; diff --git a/paddle/fluid/operators/unsqueeze_op.cc b/paddle/fluid/operators/unsqueeze_op.cc index c33e7c6068648d019a38450a92fec79032411598..ee1361e3618302816200efc759ebd18ee05c9274 100644 --- a/paddle/fluid/operators/unsqueeze_op.cc +++ b/paddle/fluid/operators/unsqueeze_op.cc @@ -13,9 +13,11 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/unsqueeze_op.h" + #include #include #include + #include "paddle/fluid/framework/op_registry.h" namespace paddle { @@ -327,6 +329,7 @@ REGISTER_OPERATOR(unsqueeze2_grad, ops::Unsqueeze2GradOp, REGISTER_OP_CPU_KERNEL( unsqueeze, ops::UnsqueezeKernel, ops::UnsqueezeKernel, + ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel); @@ -334,12 +337,14 @@ REGISTER_OP_CPU_KERNEL( unsqueeze_grad, ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel, + ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel); REGISTER_OP_CPU_KERNEL( unsqueeze2, ops::UnsqueezeKernel, ops::UnsqueezeKernel, + ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel); @@ -347,6 +352,7 @@ REGISTER_OP_CPU_KERNEL( unsqueeze2_grad, ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel, + ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel); diff --git a/paddle/fluid/operators/unsqueeze_op.cu.cc b/paddle/fluid/operators/unsqueeze_op.cu.cc index 3258de53b8b7cda994c9555bf6a62502f3c04c23..0e8f47a692380cc96a371bb7a5319af89a3d28c4 100644 --- a/paddle/fluid/operators/unsqueeze_op.cu.cc +++ b/paddle/fluid/operators/unsqueeze_op.cu.cc @@ -21,6 +21,7 @@ REGISTER_OP_CUDA_KERNEL( unsqueeze, ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, + ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel); @@ -30,6 +31,7 @@ REGISTER_OP_CUDA_KERNEL( ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel, + ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel); @@ -38,6 +40,7 @@ REGISTER_OP_CUDA_KERNEL( ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, + ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel); @@ -47,6 +50,7 @@ REGISTER_OP_CUDA_KERNEL( ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel, + ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel); diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 652b4dd47daa8aecdcae43e8c910d7dd61bbb64d..ef827fd74903afd007c864307e942749e3eb0bd1 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -136,6 +136,8 @@ cc_test(profiler_test SRCS profiler_test.cc DEPS profiler) nv_test(float16_gpu_test SRCS float16_test.cu DEPS lod_tensor) cc_test(float16_test SRCS float16_test.cc DEPS lod_tensor) +cc_test(bfloat16_test SRCS bfloat16_test.cc DEPS lod_tensor) + nv_test(test_limit_gpu_memory SRCS test_limit_gpu_memory.cu DEPS gpu_info flags) nv_library(cuda_device_guard SRCS cuda_device_guard.cc DEPS gpu_info) diff --git a/paddle/fluid/platform/bfloat16.h b/paddle/fluid/platform/bfloat16.h new file mode 100644 index 0000000000000000000000000000000000000000..742329abb2dae20437120c0d4ba5975d41b0a7c9 --- /dev/null +++ b/paddle/fluid/platform/bfloat16.h @@ -0,0 +1,439 @@ +// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#if !defined(_WIN32) +#define PADDLE_ALIGN(x) __attribute__((aligned(x))) +#else +#define PADDLE_ALIGN(x) __declspec(align(x)) +#endif + +#include +#include "paddle/fluid/platform/hostdevice.h" +#include "unsupported/Eigen/CXX11/Tensor" + +namespace paddle { +namespace platform { + +struct PADDLE_ALIGN(2) bfloat16 { + public: + uint16_t x; + + bfloat16() = default; + bfloat16(const bfloat16& o) = default; + bfloat16& operator=(const bfloat16& o) = default; + bfloat16(bfloat16&& o) = default; + bfloat16& operator=(bfloat16&& o) = default; + ~bfloat16() = default; + + HOSTDEVICE inline explicit bfloat16(float val) { + std::memcpy(&x, reinterpret_cast(&val) + 2, 2); + } + + template + HOSTDEVICE inline explicit bfloat16(const T& val) + : x(bfloat16(static_cast(val)).x) {} + + HOSTDEVICE inline bfloat16& operator=(bool b) { + x = b ? 0x3f80 : 0; + return *this; + } + + HOSTDEVICE inline bfloat16& operator=(int8_t val) { + x = bfloat16(val).x; + return *this; + } + + HOSTDEVICE inline bfloat16& operator=(uint8_t val) { + x = bfloat16(val).x; + return *this; + } + + HOSTDEVICE inline bfloat16& operator=(int16_t val) { + x = bfloat16(val).x; + return *this; + } + + HOSTDEVICE inline bfloat16& operator=(uint16_t val) { + x = bfloat16(val).x; + return *this; + } + + HOSTDEVICE inline bfloat16& operator=(int32_t val) { + x = bfloat16(val).x; + return *this; + } + + HOSTDEVICE inline bfloat16& operator=(uint32_t val) { + x = bfloat16(val).x; + return *this; + } + + HOSTDEVICE inline bfloat16& operator=(int64_t val) { + x = bfloat16(val).x; + return *this; + } + + HOSTDEVICE inline bfloat16& operator=(uint64_t val) { + x = bfloat16(val).x; + return *this; + } + + HOSTDEVICE inline bfloat16& operator=(float val) { + x = bfloat16(val).x; + return *this; + } + + HOSTDEVICE inline bfloat16& operator=(double val) { + x = bfloat16(val).x; + return *this; + } + + HOSTDEVICE inline explicit operator float() const { + float val = 0.f; + uint16_t temp = x; + memcpy(reinterpret_cast(&val) + 2, reinterpret_cast(&temp), + 2); + return val; + } + + HOSTDEVICE inline explicit operator bool() const { return (x & 0x7fff) != 0; } + + HOSTDEVICE inline explicit operator int8_t() const { + return static_cast(static_cast(*this)); + } + + HOSTDEVICE inline explicit operator uint8_t() const { + return static_cast(static_cast(*this)); + } + + HOSTDEVICE inline explicit operator int16_t() const { + return static_cast(static_cast(*this)); + } + + HOSTDEVICE inline explicit operator uint16_t() const { + return static_cast(static_cast(*this)); + } + + HOSTDEVICE inline explicit operator int32_t() const { + return static_cast(static_cast(*this)); + } + + HOSTDEVICE inline explicit operator uint32_t() const { + return static_cast(static_cast(*this)); + } + + HOSTDEVICE inline explicit operator int64_t() const { + return static_cast(static_cast(*this)); + } + + HOSTDEVICE inline explicit operator uint64_t() const { + return static_cast(static_cast(*this)); + } + + HOSTDEVICE inline explicit operator double() const { + return static_cast(static_cast(*this)); + } +}; + +HOSTDEVICE inline bfloat16 operator+(const bfloat16& a, const bfloat16& b) { + return bfloat16(static_cast(a) + static_cast(b)); +} + +HOSTDEVICE inline bfloat16 operator-(const bfloat16& a, const bfloat16& b) { + return bfloat16(static_cast(a) - static_cast(b)); +} + +HOSTDEVICE inline bfloat16 operator*(const bfloat16& a, const bfloat16& b) { + return bfloat16(static_cast(a) * static_cast(b)); +} + +HOSTDEVICE inline bfloat16 operator/(const bfloat16& a, const bfloat16& b) { + return bfloat16(static_cast(a) / static_cast(b)); +} + +HOSTDEVICE inline bfloat16 operator-(const bfloat16& a) { + bfloat16 res; + res.x = a.x ^ 0x8000; + return res; +} + +HOSTDEVICE inline bfloat16& operator+=(bfloat16& a, // NOLINT + const bfloat16& b) { + a = bfloat16(static_cast(a) + static_cast(b)); + return a; +} + +HOSTDEVICE inline bfloat16& operator-=(bfloat16& a, // NOLINT + const bfloat16& b) { + a = bfloat16(static_cast(a) - static_cast(b)); + return a; +} + +HOSTDEVICE inline bfloat16& operator*=(bfloat16& a, // NOLINT + const bfloat16& b) { + a = bfloat16(static_cast(a) * static_cast(b)); + return a; +} + +HOSTDEVICE inline bfloat16& operator/=(bfloat16& a, // NOLINT + const bfloat16& b) { + a = bfloat16(static_cast(a) / static_cast(b)); + return a; +} + +HOSTDEVICE inline bfloat16 raw_uint16_to_bfloat16(uint16_t a) { + bfloat16 res; + res.x = a; + return res; +} + +HOSTDEVICE inline bool operator==(const bfloat16& a, const bfloat16& b) { + return static_cast(a) == static_cast(b); +} + +HOSTDEVICE inline bool operator!=(const bfloat16& a, const bfloat16& b) { + return static_cast(a) != static_cast(b); +} + +HOSTDEVICE inline bool operator<(const bfloat16& a, const bfloat16& b) { + return static_cast(a) < static_cast(b); +} + +HOSTDEVICE inline bool operator<=(const bfloat16& a, const bfloat16& b) { + return static_cast(a) <= static_cast(b); +} + +HOSTDEVICE inline bool operator>(const bfloat16& a, const bfloat16& b) { + return static_cast(a) > static_cast(b); +} + +HOSTDEVICE inline bool operator>=(const bfloat16& a, const bfloat16& b) { + return static_cast(a) >= static_cast(b); +} + +HOSTDEVICE inline bool(isnan)(const bfloat16& a) { + return (a.x & 0x7FFF) > 0x7F80; +} + +HOSTDEVICE inline bool(isinf)(const bfloat16& a) { + return (a.x & 0x7F80) == 0x7F80; +} + +HOSTDEVICE inline bool(isfinite)(const bfloat16& a) { + return !((isnan)(a)) && !((isinf)(a)); +} + +inline std::ostream& operator<<(std::ostream& os, const bfloat16& a) { + os << a.x; + return os; +} + +} // namespace platform +} // namespace paddle + +namespace std { + +template <> +struct is_pod { + static const bool value = + is_trivial::value && + is_standard_layout::value; +}; + +template <> +struct is_floating_point + : std::integral_constant< + bool, std::is_same::type>::value> {}; +template <> +struct is_signed { + static const bool value = true; +}; + +template <> +struct is_unsigned { + static const bool value = false; +}; + +inline bool isnan(const paddle::platform::bfloat16& a) { + return paddle::platform::isnan(a); +} + +inline bool isinf(const paddle::platform::bfloat16& a) { + return paddle::platform::isinf(a); +} + +template <> +struct numeric_limits { + static const bool is_specialized = true; + static const bool is_signed = true; + static const bool is_integer = false; + static const bool is_exact = false; + static const bool has_infinity = true; + static const bool has_quiet_NaN = true; + static const bool has_signaling_NaN = true; + static const float_denorm_style has_denorm = denorm_present; + static const bool has_denorm_loss = false; + static const std::float_round_style round_style = std::round_to_nearest; + static const bool is_iec559 = false; + static const bool is_bounded = false; + static const bool is_modulo = false; + static const int digits = 8; + static const int digits10 = 2; + static const int max_digits10 = 9; + static const int radix = 2; + static const int min_exponent = -125; + static const int min_exponent10 = -37; + static const int max_exponent = 128; + static const int max_exponent10 = 38; + static const bool traps = true; + static const bool tinyness_before = false; + + static paddle::platform::bfloat16(min)() { + return paddle::platform::raw_uint16_to_bfloat16(0x007f); + } + static paddle::platform::bfloat16 lowest() { + return paddle::platform::raw_uint16_to_bfloat16(0xff7f); + } + static paddle::platform::bfloat16(max)() { + return paddle::platform::raw_uint16_to_bfloat16(0x7f7f); + } + static paddle::platform::bfloat16 epsilon() { + return paddle::platform::raw_uint16_to_bfloat16(0x3400); + } + static paddle::platform::bfloat16 round_error() { + return paddle::platform::bfloat16(0.5); + } + static paddle::platform::bfloat16 infinity() { + return paddle::platform::raw_uint16_to_bfloat16(0x7f80); + } + static paddle::platform::bfloat16 quiet_NaN() { + return paddle::platform::raw_uint16_to_bfloat16(0xffc1); + } + static paddle::platform::bfloat16 signaling_NaN() { + return paddle::platform::raw_uint16_to_bfloat16(0xff81); + } + static paddle::platform::bfloat16 denorm_min() { + return paddle::platform::raw_uint16_to_bfloat16(0x0001); + } +}; + +} // namespace std + +namespace Eigen { + +using bfloat16 = paddle::platform::bfloat16; + +template <> +struct NumTraits : GenericNumTraits { + enum { + IsSigned = true, + IsInteger = false, + IsComplex = false, + RequireInitialization = false + }; + + HOSTDEVICE static inline bfloat16 epsilon() { + return paddle::platform::raw_uint16_to_bfloat16(0x3400); + } + HOSTDEVICE static inline bfloat16 dummy_precision() { + return bfloat16(1e-5f); + } + HOSTDEVICE static inline bfloat16 highest() { + return paddle::platform::raw_uint16_to_bfloat16(0x7f7f); + } + HOSTDEVICE static inline bfloat16 lowest() { + return paddle::platform::raw_uint16_to_bfloat16(0xff7f); + } + HOSTDEVICE static inline bfloat16 infinity() { + return paddle::platform::raw_uint16_to_bfloat16(0x7f80); + } + HOSTDEVICE static inline bfloat16 quiet_NaN() { + return paddle::platform::raw_uint16_to_bfloat16(0xffc1); + } +}; +namespace numext { + +template <> +HOSTDEVICE inline bool(isnan)(const bfloat16& a) { + return (paddle::platform::isnan)(a); +} + +template <> +HOSTDEVICE inline bool(isinf)(const bfloat16& a) { + return (paddle::platform::isinf)(a); +} + +template <> +HOSTDEVICE inline bool(isfinite)(const bfloat16& a) { + return (paddle::platform::isfinite)(a); +} + +template <> +HOSTDEVICE inline bfloat16 exp(const bfloat16& a) { + return bfloat16(::expf(static_cast(a))); +} + +template <> +HOSTDEVICE inline bfloat16 erf(const bfloat16& a) { + return bfloat16(::erff(static_cast(a))); +} + +template <> +HOSTDEVICE inline bfloat16 log(const bfloat16& a) { + return bfloat16(::logf(static_cast(a))); +} + +template <> +HOSTDEVICE inline bfloat16 tanh(const bfloat16& a) { + return bfloat16(::tanhf(static_cast(a))); +} + +template <> +HOSTDEVICE inline bfloat16 sqrt(const bfloat16& a) { + return bfloat16(::sqrtf(static_cast(a))); +} + +template <> +HOSTDEVICE inline bfloat16 ceil(const bfloat16& a) { + return bfloat16(::ceilf(static_cast(a))); +} + +template <> +HOSTDEVICE inline bfloat16 floor(const bfloat16& a) { + return bfloat16(::floorf(static_cast(a))); +} + +template <> +HOSTDEVICE inline bfloat16 round(const bfloat16& a) { + return bfloat16(::roundf(static_cast(a))); +} + +template <> +HOSTDEVICE inline bfloat16 pow(const bfloat16& a, const bfloat16& b) { + return bfloat16(::powf(static_cast(a), static_cast(b))); +} + +template <> +HOSTDEVICE inline bfloat16 abs(const bfloat16& a) { + return bfloat16(::fabs(static_cast(a))); +} + +} // namespace numext +} // namespace Eigen diff --git a/paddle/fluid/platform/bfloat16_test.cc b/paddle/fluid/platform/bfloat16_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..bdb508ee33630004daae132fcdcf71146a50e640 --- /dev/null +++ b/paddle/fluid/platform/bfloat16_test.cc @@ -0,0 +1,162 @@ +/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/bfloat16.h" + +#include + +#define GLOG_NO_ABBREVIATED_SEVERITIES // msvc conflict logging with windows.h +#include "gtest/gtest.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/init.h" + +namespace paddle { +namespace platform { + +using bfloat16 = paddle::platform::bfloat16; + +TEST(bfloat16, conversion_cpu) { + // Conversion from float + EXPECT_EQ(bfloat16(1.0f).x, 0x3f80); + EXPECT_EQ(bfloat16(0.5f).x, 0x3f00); + EXPECT_EQ(bfloat16(0.33333f).x, 0x3eaa); + EXPECT_EQ(bfloat16(0.0f).x, 0x0000); + EXPECT_EQ(bfloat16(-0.0f).x, 0x8000); + EXPECT_EQ(bfloat16(65504.0f).x, 0x477f); + EXPECT_EQ(bfloat16(65536.0f).x, 0x4780); + + // Conversion from double + EXPECT_EQ(bfloat16(1.0).x, 0x3f80); + EXPECT_EQ(bfloat16(0.5).x, 0x3f00); + EXPECT_EQ(bfloat16(0.33333).x, 0x3eaa); + EXPECT_EQ(bfloat16(0.0).x, 0x0000); + EXPECT_EQ(bfloat16(-0.0).x, 0x8000); + EXPECT_EQ(bfloat16(65504.0).x, 0x477f); + EXPECT_EQ(bfloat16(65536.0).x, 0x4780); + + // Conversion from int + EXPECT_EQ(bfloat16(-1).x, 0xbf80); + EXPECT_EQ(bfloat16(0).x, 0x0000); + EXPECT_EQ(bfloat16(1).x, 0x3f80); + EXPECT_EQ(bfloat16(2).x, 0x4000); + EXPECT_EQ(bfloat16(3).x, 0x4040); + + // Conversion from bool + EXPECT_EQ(bfloat16(true).x, 0x3f80); + EXPECT_EQ(bfloat16(false).x, 0x0000); + + // Assignment operator + bfloat16 v_assign; + v_assign = bfloat16(0.f); + EXPECT_EQ(v_assign.x, 0x0000); + v_assign = 0.5f; + EXPECT_EQ(v_assign.x, 0x3f00); + v_assign = 0.33333; + EXPECT_EQ(v_assign.x, 0x3eaa); + v_assign = -1; + EXPECT_EQ(v_assign.x, 0xbf80); + + // Conversion operator + EXPECT_EQ(static_cast(bfloat16(0.5f)), 0.5f); + EXPECT_NEAR(static_cast(bfloat16(0.33333)), 0.33333, 0.01); + EXPECT_EQ(static_cast(bfloat16(-1)), -1); + EXPECT_EQ(static_cast(bfloat16(true)), true); +} + +TEST(bfloat16, arithmetic_cpu) { + EXPECT_NEAR(static_cast(bfloat16(1) + bfloat16(1)), 2, 0.001); + EXPECT_EQ(static_cast(bfloat16(5) + bfloat16(-5)), 0); + EXPECT_NEAR(static_cast(bfloat16(0.33333f) + bfloat16(0.66667f)), 1.0f, + 0.01); + EXPECT_EQ(static_cast(bfloat16(3) - bfloat16(5)), -2); + EXPECT_NEAR(static_cast(bfloat16(0.66667f) - bfloat16(0.33333f)), + 0.33334f, 0.01); + EXPECT_NEAR(static_cast(bfloat16(3.3f) * bfloat16(2.0f)), 6.6f, 0.01); + EXPECT_NEAR(static_cast(bfloat16(-2.1f) * bfloat16(-3.0f)), 6.3f, 0.1); + EXPECT_NEAR(static_cast(bfloat16(2.0f) / bfloat16(3.0f)), 0.66667f, + 0.01); + EXPECT_EQ(static_cast(bfloat16(1.0f) / bfloat16(2.0f)), 0.5f); + EXPECT_EQ(static_cast(-bfloat16(512.0f)), -512.0f); + EXPECT_EQ(static_cast(-bfloat16(-512.0f)), 512.0f); +} + +TEST(bfloat16, comparison_cpu) { + EXPECT_TRUE(bfloat16(1.0f) == bfloat16(1.0f)); + EXPECT_FALSE(bfloat16(-1.0f) == bfloat16(-0.5f)); + EXPECT_TRUE(bfloat16(1.0f) != bfloat16(0.5f)); + EXPECT_FALSE(bfloat16(-1.0f) != bfloat16(-1.0f)); + EXPECT_TRUE(bfloat16(1.0f) < bfloat16(2.0f)); + EXPECT_FALSE(bfloat16(-1.0f) < bfloat16(-1.0f)); + EXPECT_TRUE(bfloat16(1.0f) <= bfloat16(1.0f)); + EXPECT_TRUE(bfloat16(2.0f) > bfloat16(1.0f)); + EXPECT_FALSE(bfloat16(-2.0f) > bfloat16(-2.0f)); + EXPECT_TRUE(bfloat16(2.0f) >= bfloat16(2.0f)); +} + +TEST(bfloat16, lod_tensor_cpu) { + framework::LoDTensor lod_tensor; + + std::vector input_data = {bfloat16(1.0f), bfloat16(0.5f), + bfloat16(0.33333f), bfloat16(0.0f)}; + EXPECT_EQ(input_data[0].x, 0x3f80); + EXPECT_EQ(input_data[1].x, 0x3f00); + EXPECT_EQ(input_data[2].x, 0x3eaa); + EXPECT_EQ(input_data[3].x, 0x0000); + + lod_tensor.Resize({4, 1}); + lod_tensor.set_lod(framework::LoD({{0, 2, 4}})); + bfloat16* data_ptr = lod_tensor.mutable_data(CPUPlace()); + + EXPECT_NE(data_ptr, nullptr); + EXPECT_EQ(input_data.size(), static_cast(lod_tensor.numel())); + for (size_t i = 0; i < input_data.size(); ++i) { + data_ptr[i] = input_data[i]; + EXPECT_EQ(data_ptr[i].x, input_data[i].x); + } +} + +TEST(bfloat16, floating) { + // compile time assert. + PADDLE_ENFORCE_EQ( + std::is_floating_point::value, true, + platform::errors::Fatal("std::is_floating_point with bfloat16 data type " + "should be equal to true but it is not")); +} + +TEST(bfloat16, print) { + bfloat16 a = bfloat16(1.0f); + std::cout << a << std::endl; +} + +// CPU test +TEST(bfloat16, isinf) { + bfloat16 a; + a.x = 0x7f80; + bfloat16 b = bfloat16(INFINITY); + bfloat16 c = static_cast(INFINITY); + EXPECT_EQ(std::isinf(a), true); + EXPECT_EQ(std::isinf(b), true); + EXPECT_EQ(std::isinf(c), true); +} + +TEST(bfloat16, isnan) { + bfloat16 a; + a.x = 0x7fff; + bfloat16 b = bfloat16(NAN); + bfloat16 c = static_cast(NAN); + EXPECT_EQ(std::isnan(a), true); + EXPECT_EQ(std::isnan(b), true); + EXPECT_EQ(std::isnan(c), true); +} + +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/dynload/cublas.h b/paddle/fluid/platform/dynload/cublas.h index 7e32720c1d733411178c102d5c4500f722e7d005..562e7542012247c86add9e64f182d857ea969c60 100644 --- a/paddle/fluid/platform/dynload/cublas.h +++ b/paddle/fluid/platform/dynload/cublas.h @@ -38,14 +38,15 @@ extern void *cublas_dso_handle; */ #define DECLARE_DYNAMIC_LOAD_CUBLAS_WRAP(__name) \ struct DynLoad__##__name { \ - using FUNC_TYPE = decltype(&::__name); \ template \ - inline cublasStatus_t operator()(Args... args) { \ + inline auto operator()(Args... args) -> DECLARE_TYPE(__name, args...) { \ + using cublas_func = \ + decltype(::__name(std::declval()...)) (*)(Args...); \ std::call_once(cublas_dso_flag, []() { \ cublas_dso_handle = paddle::platform::dynload::GetCublasDsoHandle(); \ }); \ static void *p_##__name = dlsym(cublas_dso_handle, #__name); \ - return reinterpret_cast(p_##__name)(args...); \ + return reinterpret_cast(p_##__name)(args...); \ } \ }; \ extern DynLoad__##__name __name diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index 3782eb684f21f8c09e9dac124082ae596fe5d1bc..8fb66c6f34bd8453f1aceb731bb1cd94b8e75a69 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -161,6 +161,12 @@ inline mkldnn::memory::data_type MKLDNNGetDataType() { return mkldnn::memory::data_type::u8; } +template <> +inline mkldnn::memory::data_type +MKLDNNGetDataType() { + return mkldnn::memory::data_type::bf16; +} + inline void Reorder(mkldnn::memory src, mkldnn::memory dst, const mkldnn::engine& engine) { auto reorder_prim = mkldnn::reorder(src, dst); diff --git a/paddle/fluid/pybind/generator_py.cc b/paddle/fluid/pybind/generator_py.cc index 90b7f501052530a306ba22ea6a244f0ef8fad563..67121e24089f7c6c5b8de985da89039eca85f094 100644 --- a/paddle/fluid/pybind/generator_py.cc +++ b/paddle/fluid/pybind/generator_py.cc @@ -59,6 +59,7 @@ void BindGenerator(py::module* m_ptr) { .def_property("_is_init_py", &framework::Generator::GetIsInitPy, &framework::Generator::SetIsInitPy); m.def("default_cpu_generator", &framework::DefaultCPUGenerator); -} // end Generator -} // end namespace pybind + m.def("default_cuda_generator", &framework::GetDefaultCUDAGenerator); +} +} // namespace pybind } // namespace paddle diff --git a/paddle/fluid/pybind/tensor_py.h b/paddle/fluid/pybind/tensor_py.h index 4377a8c2cef5aab7a200955cd25830d448014817..5ee15073267b6eac8978022a70ead5d0f439c62f 100644 --- a/paddle/fluid/pybind/tensor_py.h +++ b/paddle/fluid/pybind/tensor_py.h @@ -26,6 +26,7 @@ limitations under the License. */ #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/math/concat_and_split.h" #include "paddle/fluid/operators/strided_memcpy.h" +#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/float16.h" #include "pybind11/numpy.h" @@ -104,6 +105,7 @@ struct ValidDTypeToPyArrayChecker { } DECLARE_VALID_DTYPE_TO_PY_ARRAY(platform::float16); +DECLARE_VALID_DTYPE_TO_PY_ARRAY(platform::bfloat16); DECLARE_VALID_DTYPE_TO_PY_ARRAY(float); DECLARE_VALID_DTYPE_TO_PY_ARRAY(double); DECLARE_VALID_DTYPE_TO_PY_ARRAY(bool); @@ -119,6 +121,9 @@ inline std::string TensorDTypeToPyDTypeStr( if (type == proto_type) { \ if (std::is_same::value) { \ return "e"; \ + } else if (std::is_same::value) { \ + /* NumPy character code of uint16 due to no support for bfloat16 */ \ + return "H"; \ } else { \ constexpr auto kIsValidDType = ValidDTypeToPyArrayChecker::kValue; \ PADDLE_ENFORCE_EQ( \ @@ -262,10 +267,10 @@ void SetTensorFromPyArray(framework::Tensor *self, const py::object &obj, SetTensorFromPyArrayT(self, array, place, zero_copy); } else if (py::isinstance>(array)) { - // TODO(cql): temporary keeping uint16, which is used for casting float16 - // before. It should be depracated later. - SetTensorFromPyArrayT(self, array, place, - zero_copy); + // since there is still no support for bfloat16 in NumPy, + // uint16 is used for casting bfloat16 + SetTensorFromPyArrayT(self, array, place, + zero_copy); } else if (py::isinstance>(array)) { SetTensorFromPyArrayT(self, array, place, zero_copy); } else { @@ -479,6 +484,8 @@ inline framework::Tensor *_sliceTensor(const framework::Tensor &self, switch (src_type) { case framework::proto::VarType::FP16: return _sliceAndConcat(self, obj, dim); + case framework::proto::VarType::BF16: + return _sliceAndConcat(self, obj, dim); case framework::proto::VarType::FP32: return _sliceAndConcat(self, obj, dim); case framework::proto::VarType::FP64: diff --git a/paddle/fluid/train/CMakeLists.txt b/paddle/fluid/train/CMakeLists.txt index 1f88eb2109aa23b6b60104451908b0a70c41c898..7eab677fac1683fdc95c9e338b1099d78b5cabc3 100644 --- a/paddle/fluid/train/CMakeLists.txt +++ b/paddle/fluid/train/CMakeLists.txt @@ -29,8 +29,10 @@ function(train_test TARGET_NAME) PROPERTIES DEPENDS test_${TARGET_NAME}) set_tests_properties(test_train_${TARGET_NAME}${arg} PROPERTIES LABELS "RUN_TYPE=DIST") - set_tests_properties(test_train_${TARGET_NAME}${arg} - PROPERTIES TIMEOUT 150) + if(NOT WIN32 AND NOT APPLE) + set_tests_properties(test_train_${TARGET_NAME}${arg} + PROPERTIES TIMEOUT 150) + endif() endforeach() endfunction(train_test) diff --git a/paddle/http.log b/paddle/http.log new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/paddle/scripts/paddle_build.bat b/paddle/scripts/paddle_build.bat index cfb59a04f8147f5c09aa08a01bcd304bf8ccc120..f9ec40c1830655d2ccfe1b71270e94341e875fc5 100644 --- a/paddle/scripts/paddle_build.bat +++ b/paddle/scripts/paddle_build.bat @@ -20,13 +20,12 @@ rem Paddle CI Task On Windows Platform rem ================================================= rem -------clean up environment----------- +wmic process where name="op_function_generator.exe" call terminate 2>NUL set work_dir=%cd% -if exist build rmdir build /s/q mkdir build cd /d build tree . dir paddle\fluid\pybind\Release -taskkill /f /im op_function_generator.exe 2>NUL rem ------initialize the virtual environment------ if not defined PYTHON_ROOT set PYTHON_ROOT=C:\Python37 @@ -216,7 +215,7 @@ pip install -U %PADDLE_WHL_FILE_WIN% --user if %ERRORLEVEL% NEQ 0 ( call paddle_winci\Scripts\deactivate.bat 2>NUL echo pip install whl package failed! - exit /b 3 + exit /b 1 ) python %work_dir%\paddle\scripts\installation_validate.py @@ -225,7 +224,7 @@ goto:eof :test_whl_pacakage_error call paddle_winci\Scripts\deactivate.bat 2>NUL echo Test import paddle failed, will exit! -exit /b 3 +exit /b 1 rem --------------------------------------------------------------------------------------------- :unit_test @@ -248,6 +247,9 @@ goto:eof :unit_test_error call paddle_winci\Scripts\deactivate.bat 2>NUL +for /F %%# in ('wmic os get localdatetime^|findstr 20') do set end=%%# +set end=%end:~4,10% +call :timestamp "%start%" "%end%" "TestCases Total" echo Running unit tests failed, will exit! exit /b 8 @@ -268,7 +270,7 @@ goto:eof :test_inference_error call paddle_winci\Scripts\deactivate.bat 2>NUL echo Testing fluid library for inference failed! -exit /b 5 +exit /b 1 rem --------------------------------------------------------------------------------------------- :check_change_of_unittest @@ -399,7 +401,7 @@ taskkill /f /im git-remote-https.exe 2>NUL taskkill /f /im vctip.exe 2>NUL taskkill /f /im cvtres.exe 2>NUL taskkill /f /im rc.exe 2>NUL -taskkill /f /im op_function_generator.exe 2>NUL +wmic process where name="op_function_generator.exe" call terminate 2>NUL taskkill /f /im python.exe 2>NUL call paddle_winci\Scripts\deactivate.bat 2>NUL taskkill /f /im python.exe 2>NUL diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 926747ef6186e3b9439baf787572fe9d1988fb46..9c1c95f37ed0785bfd770e7cbc02002daba8447b 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -273,7 +273,7 @@ function cmake_gen() { function abort(){ echo "Your change doesn't follow PaddlePaddle's code style." 1>&2 echo "Please use pre-commit to check what is wrong." 1>&2 - exit 1 + exit 4 } function check_style() { @@ -303,7 +303,7 @@ function check_style() { if [ $commit_files == 'off' ];then echo "code format error" - exit 1 + exit 4 fi trap : 0 } @@ -528,6 +528,7 @@ EOF elif [ "$1" == "cp37-cp37m" ]; then pip3.7 install --user ${INSTALL_PREFIX:-/paddle/build}/opt/paddle/share/wheels/*.whl fi + set +e ut_startTime_s=`date +%s` ctest --output-on-failure -j $2;mactest_error=$? ut_endTime_s=`date +%s` @@ -959,7 +960,7 @@ set +x retry_unittests_record="$retry_unittests_record$failed_test_lists" failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'` - read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(\w+\)" | sed 's/(.\+)//' | sed 's/- //' ) + read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) echo "=========================================" echo "This is the ${exec_time_array[$exec_times]} time to re-run" echo "=========================================" @@ -1395,24 +1396,49 @@ function example() { fi } +function summary_check_problems() { + set +x + local check_style_code=$1 + local example_code=$2 + if [ $check_style_code -ne 0 -o $example_code -ne 0 ];then + echo "========================================" + echo "summary problems:" + echo "========================================" + if [ $check_style_code -ne 0 ];then + echo "- Check code style failed! Please check the log and fix problems." + fi + if [ $example_code -ne 0 ];then + echo "- Check example code failed! Please check the log and fix problems." + fi + [ $check_style_code -ne 0 ] && exit $check_style_code + [ $example_code -ne 0 ] && exit $example_code + fi + set -x +} + function main() { local CMD=$1 local parallel_number=$2 init if [ "$CMD" != "assert_file_approvals" ];then python ${PADDLE_ROOT}/tools/summary_env.py + bash ${PADDLE_ROOT}/tools/get_cpu_info.sh fi case $CMD in build_only) cmake_gen_and_build ${PYTHON_ABI:-""} ${parallel_number} ;; build_and_check) - check_style + set +e + $(check_style >&2) + check_style_code=$? generate_upstream_develop_api_spec ${PYTHON_ABI:-""} ${parallel_number} cmake_gen_and_build ${PYTHON_ABI:-""} ${parallel_number} check_sequence_op_unittest generate_api_spec ${PYTHON_ABI:-""} "PR" - example + $(example >&2) + example_code=$? + summary_check_problems $check_style_code $example_code assert_api_spec_approvals ;; build) diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index f98c8f5ee6643f09fcfde21b24b5b9bea510129b..d5793eb424ab794e3e8af8ef2312aac927c272e5 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -217,6 +217,8 @@ from .tensor.search import index_select #DEFINE_ALIAS from .tensor.search import nonzero #DEFINE_ALIAS from .tensor.search import sort #DEFINE_ALIAS from .framework.random import manual_seed #DEFINE_ALIAS +from .framework.random import get_cuda_rng_state #DEFINE_ALIAS +from .framework.random import set_cuda_rng_state #DEFINE_ALIAS from .framework import Variable #DEFINE_ALIAS from .framework import ParamAttr #DEFINE_ALIAS from .framework import create_global_var #DEFINE_ALIAS @@ -230,6 +232,7 @@ from .framework import grad #DEFINE_ALIAS from .framework import no_grad #DEFINE_ALIAS from .framework import save #DEFINE_ALIAS from .framework import load #DEFINE_ALIAS +from .framework import SaveLoadConfig #DEFINE_ALIAS from .framework import DataParallel #DEFINE_ALIAS from .framework import NoamDecay #DEFINE_ALIAS @@ -259,7 +262,7 @@ from .device import get_device from .fluid.dygraph.base import enable_dygraph as disable_static #DEFINE_ALIAS from .fluid.dygraph.base import disable_dygraph as enable_static #DEFINE_ALIAS from .fluid.framework import in_dygraph_mode as in_dynamic_mode #DEFINE_ALIAS -from .fluid.dygraph.base import no_grad #DEFINE_ALIAS +from .fluid.dygraph.base import no_grad_ as no_grad #DEFINE_ALIAS from . import jit from . import static @@ -267,5 +270,6 @@ from . import static # high-level api from .hapi import Model from .hapi import callbacks +from .hapi import summary import paddle.text import paddle.vision diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py index c40ae7179395a2fc16ece0d68546221ce53c2180..19df0ca91e103a0865f648daa5c173c2691307de 100644 --- a/python/paddle/distributed/collective.py +++ b/python/paddle/distributed/collective.py @@ -73,20 +73,21 @@ def broadcast(tensor, src, group=0): Examples: .. code-block:: python - import paddle - import paddle.prepare_context as prepare_context - - paddle.disable_static() - paddle.set_device('gpu:%d'%paddle.ParallelEnv().dev_id) - prepare_context() - if paddle.ParallelEnv().local_rank == 0: - np_data = np.array([[4, 5, 6], [4, 5, 6]]) - else: - np_data = np.array([[1, 2, 3], [1, 2, 3]]) - data = paddle.to_tensor(np_data) - paddle.distributed.broadcast(data, 1) - out = data.numpy() - # [[1, 2, 3], [1, 2, 3]] + import numpy as np + import paddle + from paddle.distributed import init_parallel_env + + paddle.disable_static() + paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) + init_parallel_env() + if paddle.distributed.ParallelEnv().local_rank == 0: + np_data = np.array([[4, 5, 6], [4, 5, 6]]) + else: + np_data = np.array([[1, 2, 3], [1, 2, 3]]) + data = paddle.to_tensor(np_data) + paddle.distributed.broadcast(data, 1) + out = data.numpy() + # [[1, 2, 3], [1, 2, 3]] """ if in_dygraph_mode(): return core.ops.c_broadcast(tensor, tensor, 'root', src, @@ -129,21 +130,22 @@ def all_reduce(tensor, op=ReduceOp.SUM, group=0): Examples: .. code-block:: python - import paddle - from paddle.distributed import ReduceOp - import paddle.prepare_context as prepare_context - - paddle.disable_static() - paddle.set_device('gpu:%d'%paddle.ParallelEnv().dev_id) - prepare_context() - if paddle.ParallelEnv().local_rank == 0: - np_data = np.array([[4, 5, 6], [4, 5, 6]]) - else: - np_data = np.array([[1, 2, 3], [1, 2, 3]]) - data = paddle.to_tensor(np_data) - paddle.distributed.all_reduce(data) - out = data.numpy() - # [[5, 7, 9], [5, 7, 9]] + import numpy as np + import paddle + from paddle.distributed import ReduceOp + from paddle.distributed import init_parallel_env + + paddle.disable_static() + paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) + init_parallel_env() + if paddle.distributed.ParallelEnv().local_rank == 0: + np_data = np.array([[4, 5, 6], [4, 5, 6]]) + else: + np_data = np.array([[1, 2, 3], [1, 2, 3]]) + data = paddle.to_tensor(np_data) + paddle.distributed.all_reduce(data) + out = data.numpy() + # [[5, 7, 9], [5, 7, 9]] """ if in_dygraph_mode(): if op == ReduceOp.SUM: @@ -204,20 +206,21 @@ def reduce(tensor, dst, op=ReduceOp.SUM, group=0): Examples: .. code-block:: python - import paddle - import paddle.prepare_context as prepare_context - - paddle.disable_static() - paddle.set_device('gpu:%d'%paddle.ParallelEnv().dev_id) - prepare_context() - if paddle.ParallelEnv().local_rank == 0: - np_data = np.array([[4, 5, 6], [4, 5, 6]]) - else: - np_data = np.array([[1, 2, 3], [1, 2, 3]]) - data = paddle.to_tensor(np_data) - paddle.distributed.reduce(data, 0) - out = data.numpy() - # [[5, 7, 9], [5, 7, 9]] + import numpy as np + import paddle + from paddle.distributed import init_parallel_env + + paddle.disable_static() + paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) + init_parallel_env() + if paddle.distributed.ParallelEnv().local_rank == 0: + np_data = np.array([[4, 5, 6], [4, 5, 6]]) + else: + np_data = np.array([[1, 2, 3], [1, 2, 3]]) + data = paddle.to_tensor(np_data) + paddle.distributed.reduce(data, 0) + out = data.numpy() + # [[5, 7, 9], [5, 7, 9]] """ if in_dygraph_mode(): if op == ReduceOp.SUM: @@ -286,25 +289,26 @@ def all_gather(tensor_list, tensor, group=0): Examples: .. code-block:: python - import paddle - import paddle.prepare_context as prepare_context - - paddle.disable_static() - paddle.set_device('gpu:%d'%paddle.ParallelEnv().dev_id) - prepare_context() - tensor_list = [] - if paddle.ParallelEnv().local_rank == 0: - np_data1 = np.array([[4, 5, 6], [4, 5, 6]]) - np_data2 = np.array([[4, 5, 6], [4, 5, 6]]) - data1 = paddle.to_tensor(np_data1) - data2 = paddle.to_tensor(np_data2) - paddle.distributed.all_gather(tensor_list, data1) - else: - np_data1 = np.array([[1, 2, 3], [1, 2, 3]]) - np_data2 = np.array([[1, 2, 3], [1, 2, 3]]) - data1 = paddle.to_tensor(np_data1) - data2 = paddle.to_tensor(np_data2) - out = paddle.distributed.all_gather(tensor_list, data2) + import numpy as np + import paddle + from paddle.distributed import init_parallel_env + + paddle.disable_static() + paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) + init_parallel_env() + tensor_list = [] + if paddle.distributed.ParallelEnv().local_rank == 0: + np_data1 = np.array([[4, 5, 6], [4, 5, 6]]) + np_data2 = np.array([[4, 5, 6], [4, 5, 6]]) + data1 = paddle.to_tensor(np_data1) + data2 = paddle.to_tensor(np_data2) + paddle.distributed.all_gather(tensor_list, data1) + else: + np_data1 = np.array([[1, 2, 3], [1, 2, 3]]) + np_data2 = np.array([[1, 2, 3], [1, 2, 3]]) + data1 = paddle.to_tensor(np_data1) + data2 = paddle.to_tensor(np_data2) + paddle.distributed.all_gather(tensor_list, data2) """ op_type = 'c_allgather' helper = LayerHelper(op_type, **locals()) @@ -359,25 +363,26 @@ def scatter(tensor, tensor_list=None, src=0, group=0): Examples: .. code-block:: python - import paddle - import paddle.prepare_context as prepare_context - - paddle.disable_static() - paddle.set_device('gpu:%d'%paddle.ParallelEnv().dev_id) - prepare_context() - if paddle.ParallelEnv().local_rank == 0: - np_data1 = np.array([7, 8, 9]) - np_data2 = np.array([10, 11, 12]) - else: - np_data1 = np.array([1, 2, 3]) - np_data2 = np.array([4, 5, 6]) - data1 = paddle.to_tensor(np_data1) - data2 = paddle.to_tensor(np_data2) - if paddle.ParallelEnv().local_rank == 0: - paddle.distributed.scatter(data1, src=1) - else: - paddle.distributed.scatter(data1, tensor_list=[data1, data2], src=1) - out = data1.numpy() + import numpy as np + import paddle + from paddle.distributed import init_parallel_env + + paddle.disable_static() + paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) + init_parallel_env() + if paddle.distributed.ParallelEnv().local_rank == 0: + np_data1 = np.array([7, 8, 9]) + np_data2 = np.array([10, 11, 12]) + else: + np_data1 = np.array([1, 2, 3]) + np_data2 = np.array([4, 5, 6]) + data1 = paddle.to_tensor(np_data1) + data2 = paddle.to_tensor(np_data2) + if paddle.distributed.ParallelEnv().local_rank == 0: + paddle.distributed.scatter(data1, src=1) + else: + paddle.distributed.scatter(data1, tensor_list=[data1, data2], src=1) + out = data1.numpy() """ op_type = 'c_scatter' global _default_group @@ -425,13 +430,13 @@ def barrier(group=0): Examples: .. code-block:: python - import paddle - import paddle.prepare_context as prepare_context + import paddle + from paddle.distributed import init_parallel_env - paddle.disable_static() - paddle.set_device('gpu:%d'%paddle.ParallelEnv().dev_id) - prepare_context() - paddle.distributed.barrier() + paddle.disable_static() + paddle.set_device('gpu:%d'%paddle.distributed.ParallelEnv().dev_id) + init_parallel_env() + paddle.distributed.barrier() """ op_type = 'barrier' temp = paddle.fill_constant([1], dtype="int32", value="1") diff --git a/python/paddle/distributed/fleet/__init__.py b/python/paddle/distributed/fleet/__init__.py index 42ac68ba1a64de54f029878ceab08435c924d087..5f0cf9f93d62eba9b81e8a834b52f84122f2702d 100644 --- a/python/paddle/distributed/fleet/__init__.py +++ b/python/paddle/distributed/fleet/__init__.py @@ -50,3 +50,10 @@ distributed_optimizer = fleet.distributed_optimizer save_inference_model = fleet.save_inference_model save_persistables = fleet.save_persistables minimize = fleet.minimize +distributed_model = fleet.distributed_model +step = fleet.step +clear_grad = fleet.clear_grad +set_lr = fleet.set_lr +get_lr = fleet.get_lr +state_dict = fleet.state_dict +set_state_dict = fleet.set_state_dict diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index 26063d1b8a9225aff63628bb37f433ec95257dc7..9c1793fd5b56eb728ae7d16840cf4fb09cf975c8 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -118,7 +118,7 @@ class DistributedStrategy(object): strategy = fleet.DistributedStrategy() strategy.dgc = True strategy.recompute = True - strategy.recompute_configs = {"checkpoint": ["x"]} + strategy.recompute_configs = {"checkpoints": ["x"]} strategy.save_to_prototxt("dist_strategy.prototxt") """ with open(output, "w") as fout: @@ -133,7 +133,7 @@ class DistributedStrategy(object): import paddle.distributed.fleet as fleet strategy = fleet.DistributedStrategy() - strategy.load_from_prototxt("dist_strategy.protoxt") + strategy.load_from_prototxt("dist_strategy.prototxt") """ with open(pb_file, 'r') as f: self.strategy = google.protobuf.text_format.Merge( @@ -147,6 +147,7 @@ class DistributedStrategy(object): Examples: .. code-block:: python + import paddle exe_strategy = paddle.fluid.ExecutionStrategy() exe_strategy.num_threads = 10 exe_strategy.num_iteration_per_drop_scope = 10 @@ -179,6 +180,7 @@ class DistributedStrategy(object): Examples: .. code-block:: python + import paddle build_strategy = paddle.fluid.BuildStrategy() build_strategy.enable_sequential_execution = True build_strategy.fuse_elewise_add_act_ops = True @@ -252,14 +254,19 @@ class DistributedStrategy(object): a dict. **Notes**: - **Detailed arguments for a_sync_configs** - **k_step**: number of local optimization updates before communication - **max_merge_var_num**: maximum number of merged gradients before communication - **send_queue_size**: a buffer size of worker communication - **independent_recv_thread**: if we are using independent recv thread for communication - **thread_pool_size**: number of thread pool - **send_wait_times**: waiting time for sending gradients - **runtime_split_send_recv**: if we are using Tensor split for send and recv during runtime + k_step(int): number of local optimization updates before communication + + max_merge_var_num(int): maximum number of merged gradients before communication + + send_queue_size(int): a buffer size of worker communication + + independent_recv_thread(bool): if we are using independent recv thread for communication + + thread_pool_size(int): number of thread pool + + send_wait_times(int): waiting time for sending gradients + + runtime_split_send_recv(bool): if we are using Tensor split for send and recv during runtime Examples: .. code-block:: python @@ -270,11 +277,12 @@ class DistributedStrategy(object): strategy = fleet.DistributedStrategy() strategy.a_sync = True # by default this is True - configs = {"k_step": 10000, "send_queue_size": 32} + configs = {"k_steps": 1024, "send_queue_size": 32} strategy.a_sync_configs = configs # code block for defining loss and local optimizer # sgd = fleet.distributed_optimizer(optimizer, strategy) + """ return get_msg_dict(self.strategy.a_sync_configs) @@ -314,14 +322,21 @@ class DistributedStrategy(object): settings that can be configured through a dict. **Notes**: - **init_loss_scaling(float)**: The initial loss scaling factor. Default 32768. - **use_dynamic_loss_scaling(bool)**: Whether to use dynamic loss scaling. Default True. - **incr_every_n_steps(int)**: Increases loss scaling every n consecutive steps with finite gradients. Default 1000. - **decr_every_n_nan_or_inf(int)**: Decreases loss scaling every n accumulated steps with nan or inf gradients. Default 2. - **incr_ratio(float)**: The multiplier to use when increasing the loss scaling. Default 2.0. - **decr_ratio(float)**: The less-than-one-multiplier to use when decreasing the loss scaling. Default 0.5. - **custom_white_list(list[str])**: Users' custom white list which always execution fp16. - **custom_black_list(list[str])**: Users' custom black list which forbidden execution fp16. + init_loss_scaling(float): The initial loss scaling factor. Default 32768. + + use_dynamic_loss_scaling(bool): Whether to use dynamic loss scaling. Default True. + + incr_every_n_steps(int): Increases loss scaling every n consecutive steps with finite gradients. Default 1000. + + decr_every_n_nan_or_inf(int): Decreases loss scaling every n accumulated steps with nan or inf gradients. Default 2. + + incr_ratio(float): The multiplier to use when increasing the loss scaling. Default 2.0. + + decr_ratio(float): The less-than-one-multiplier to use when decreasing the loss scaling. Default 0.5. + + custom_white_list(list[str]): Users' custom white list which always execution fp16. + + custom_black_list(list[str]): Users' custom black list which forbidden execution fp16. Examples: .. code-block:: python @@ -553,7 +568,7 @@ class DistributedStrategy(object): import paddle.distributed.fleet as fleet strategy = fleet.DistributedStrategy() strategy.recompute = True - strategy.recompute_configs = {"checkpionts": ["x", "y"]} + strategy.recompute_configs = {"checkpoints": ["x", "y"]} """ return get_msg_dict(self.strategy.recompute_configs) @@ -603,6 +618,7 @@ class DistributedStrategy(object): **Notes**: **Detailed arguments for pipeline_configs** + **micro_batch**: the number of small batches in each user defined batch Examples: @@ -626,10 +642,10 @@ class DistributedStrategy(object): @property def localsgd(self): """ - Indicating whether we are using Local SGD training. For more details, please refer to - [Don't Use Large Mini-Batches, Use Local SGD](https://arxiv.org/pdf/1808.07217.pdf), + Indicating whether we are using Local SGD training. Default Value: False + For more details, please refer to + `Don't Use Large Mini-Batches, Use Local SGD `_. - Default Value: False Examples: .. code-block:: python @@ -655,13 +671,12 @@ class DistributedStrategy(object): setting that can be configured through a dict. **Notes**: - **k_steps(int)**: The local steps for training before parameter - synchronization. Default 1. If strategy.auto is set True, the - local steps will be calculated automatically during training. - The algorithm is referenced in this paper: - [Adaptive Communication Strategies to Achieve the Best Error-Runtime Trade-off in Local-Update SGD](https://arxiv.org/pdf/1810.08313.pdf). - In this case, k_steps indicates the first local steps which - is suggested setting to 1. + k_steps(int) The local steps for training before parameter synchronization. Default 1. + + If strategy.auto is set True, the local steps will be calculated automatically during training. + The algorithm is referenced in this paper: + `Adaptive Communication Strategies to Achieve the Best Error-Runtime Trade-off in Local-Update SGD `_. + In this case, k_steps indicates the first local steps which is suggested setting to 1. Examples: .. code-block:: python @@ -712,14 +727,16 @@ class DistributedStrategy(object): settings that can be configured through a dict. **Notes**: - **rampup_begin_step(int)**: The beginning step from which gradient compression is implemented. Default 0. - **rampup_step(int)**: Time steps used in sparsity warm-up periods. Default is 1. - For example, if the sparsity is [0.75, 0.9375, 0.984375, 0.996, 0.999], and the rampup_step is 100, - it will use 0.75 at 0~19 steps, and 0.9375 at 20~39 steps, and so on. And when reach sparsity array - ends, it will use 0.999 then and after. - **sparsity(list[float])**: Get top important element from gradient tensor, the ratio is (1 - sparsity). - Default is [0.999]. For example, if the sparsity is [0.99, 0.999], the top [1%, 0.1%] important - element will be transmitted. + rampup_begin_step(int): The beginning step from which gradient compression is implemented. Default 0. + + rampup_step(int): Time steps used in sparsity warm-up periods. Default is 1. \ + For example, if the sparsity is [0.75, 0.9375, 0.984375, 0.996, 0.999], and the rampup_step is 100, \ + it will use 0.75 at 0~19 steps, and 0.9375 at 20~39 steps, and so on. And when reach sparsity array \ + ends, it will use 0.999 then and after. + + sparsity(list[float]): Get top important element from gradient tensor, the ratio is (1 - sparsity). \ + Default is [0.999]. For example, if the sparsity is [0.99, 0.999], the top [1%, 0.1%] important \ + element will be transmitted. Examples: .. code-block:: python @@ -749,7 +766,8 @@ class DistributedStrategy(object): to model parameters. Examples: - .. code-block:: python + .. code-block:: python + import paddle.distributed.fleet as fleet strategy = fleet.DistributedStrategy() strategy.gradient_merge = True @@ -768,11 +786,15 @@ class DistributedStrategy(object): def gradient_merge_configs(self): """ the key-value configs of distribute_strategy - Keys: - k_steps (int): the update period of the parameters - avg (bool): whether to average the gradients of each mini-batch, - the default value is `True` - Example: + + **Note**: + k_steps(int): the update period of the parameters. + + avg(bool): whether to average the gradients of each mini-batch, the default value is `True` + + Examples: + .. code-block:: python + import paddle.distributed.fleet as fleet strategy = fleet.DistributedStrategy() strategy.gradient_merge = True @@ -826,6 +848,7 @@ class DistributedStrategy(object): Examples: .. code-block:: python + import paddle.distributed.fleet as fleet strategy = fleet.DistributedStrategy() strategy.lars = True @@ -882,6 +905,7 @@ class DistributedStrategy(object): Examples: .. code-block:: python + import paddle.distributed.fleet as fleet strategy = fleet.DistributedStrategy() strategy.lamb = True diff --git a/python/paddle/distributed/fleet/base/fleet_base.py b/python/paddle/distributed/fleet/base/fleet_base.py index 282ac29d6f9dafb4eb3b83471157464620326348..8c748060e630079af362759b1e4c1c0b09d58063 100644 --- a/python/paddle/distributed/fleet/base/fleet_base.py +++ b/python/paddle/distributed/fleet/base/fleet_base.py @@ -13,7 +13,10 @@ # limitations under the License. from __future__ import print_function +import warnings import paddle +from paddle.fluid.framework import dygraph_only +from paddle.fluid import compiler from .role_maker import UserDefinedRoleMaker, PaddleCloudRoleMaker, RoleMakerBase from .strategy_compiler import StrategyCompiler from .distributed_strategy import DistributedStrategy @@ -21,6 +24,7 @@ from .meta_optimizer_factory import MetaOptimizerFactory from .runtime_factory import RuntimeFactory from .util_factory import UtilFactory from paddle.fluid.wrapped_decorator import wrap_decorator +from paddle.fluid.dygraph import parallel_helper def _inited_runtime_handler_(func): @@ -35,7 +39,24 @@ def _inited_runtime_handler_(func): return __impl__ +def _is_non_distributed_check_(func): + def __impl__(*args, **kwargs): + cls = args[0] + + if cls._role_maker is not None and cls._role_maker._is_non_distributed( + ) is True: + warnings.warn( + "%s() function doesn't work when use non_distributed fleet." % + (func.__name__)) + return + + return func(*args, **kwargs) + + return __impl__ + + inited_runtime_handler = wrap_decorator(_inited_runtime_handler_) +is_non_distributed_check = wrap_decorator(_is_non_distributed_check_) class Fleet(object): @@ -159,6 +180,12 @@ class Fleet(object): "`role_maker` should be subclass of `RoleMakerBase`, but got {}". format(type(role_maker))) self.strategy_compiler = StrategyCompiler() + if paddle.fluid.framework.in_dygraph_mode(): + if parallel_helper._is_parallel_ctx_initialized(): + warnings.warn( + "The dygraph parallel environment has been initialized.") + else: + paddle.distributed.init_parallel_env() return None def is_first_worker(self): @@ -367,6 +394,7 @@ class Fleet(object): """ self._role_maker.barrier_worker() + @is_non_distributed_check @inited_runtime_handler def init_worker(self): """ @@ -391,6 +419,7 @@ class Fleet(object): """ self._runtime_handle._init_worker() + @is_non_distributed_check @inited_runtime_handler def init_server(self, *args, **kwargs): """ @@ -416,6 +445,7 @@ class Fleet(object): """ self._runtime_handle._init_server(*args, **kwargs) + @is_non_distributed_check @inited_runtime_handler def run_server(self): """ @@ -440,6 +470,7 @@ class Fleet(object): """ self._runtime_handle._run_server() + @is_non_distributed_check @inited_runtime_handler def stop_worker(self): """ @@ -564,12 +595,344 @@ class Fleet(object): """ self.user_defined_optimizer = optimizer + if paddle.fluid.framework.in_dygraph_mode(): + return self + if strategy == None: strategy = DistributedStrategy() self.user_defined_strategy = strategy self.valid_strategy = None return self + @dygraph_only + def distributed_model(self, model): + """ + Return dygraph distributed data parallel model (Layer) + Only work in dygraph mode + + Examples: + .. code-block:: python + import paddle + import paddle.nn as nn + from paddle.distributed import fleet + + class LinearNet(nn.Layer): + def __init__(self): + super(LinearNet, self).__init__() + self._linear1 = nn.Linear(10, 10) + self._linear2 = nn.Linear(10, 1) + + def forward(self, x): + return self._linear2(self._linear1(x)) + + def train(): + # 1. enable dynamic mode + paddle.disable_static() + + # 2. initialize fleet environment + fleet.init(is_collective=True) + + # 3. create layer & optimizer + layer = LinearNet() + loss_fn = nn.MSELoss() + adam = paddle.optimizer.Adam( + learning_rate=0.001, parameters=layer.parameters()) + + # 4. get data_parallel model using fleet + adam = fleet.distributed_optimizer(adam) + dp_layer = fleet.distributed_model(layer) + + # 5. run layer + inputs = paddle.randn([10, 10], 'float32') + outputs = dp_layer(inputs) + labels = paddle.randn([10, 1], 'float32') + loss = loss_fn(outputs, labels) + + print("loss:", loss.numpy()) + + loss = dp_layer.scale_loss(loss) + loss.backward() + dp_layer.apply_collective_grads() + + adam.step() + adam.clear_grad() + + if __name__ == '__main__': + paddle.distributed.spawn(train) + """ + assert model is not None + self.model = paddle.DataParallel(model) + return self.model + + @dygraph_only + def state_dict(self): + """ + Get state dict information from optimizer. + Only work in dygraph mode + + Returns: + state_dict(dict) : dict contains all the Tensor used by optimizer + + Examples: + .. code-block:: python + import numpy as np + import paddle + from paddle.distributed import fleet + + paddle.disable_static() + fleet.init(is_collective=True) + + value = np.arange(26).reshape(2, 13).astype("float32") + a = paddle.fluid.dygraph.to_variable(value) + + layer = paddle.nn.Linear(13, 5) + adam = paddle.optimizer.Adam(learning_rate=0.01, parameters=layer.parameters()) + + adam = fleet.distributed_optimizer(adam) + dp_layer = fleet.distributed_model(layer) + state_dict = adam.state_dict() + """ + # imitate target optimizer retrieval + return self.user_defined_optimizer.state_dict() + + @dygraph_only + def set_state_dict(self, state_dict): + """ + Load optimizer state dict. + Only work in dygraph mode + + Args: + state_dict(dict) : Dict contains all the Tensor needed by optimizer + + Returns: None + + Examples: + .. code-block:: python + import numpy as np + import paddle + from paddle.distributed import fleet + + paddle.disable_static() + fleet.init(is_collective=True) + + value = np.arange(26).reshape(2, 13).astype("float32") + a = paddle.fluid.dygraph.to_variable(value) + + layer = paddle.nn.Linear(13, 5) + adam = paddle.optimizer.Adam(learning_rate=0.01, parameters=layer.parameters()) + + adam = fleet.distributed_optimizer(adam) + dp_layer = fleet.distributed_model(layer) + state_dict = adam.state_dict() + paddle.framework.save(state_dict, "paddle_dy") + para_state_dict, opti_state_dict = paddle.framework.load( "paddle_dy") + adam.set_state_dict(opti_state_dict) + """ + # imitate target optimizer retrieval + return self.user_defined_optimizer.set_state_dict(state_dict) + + @dygraph_only + def set_lr(self, value): + """ + Set the value of the learning rate manually in the optimizer. + Only work in dygraph mode + + Args: + value (float|Tensor): the value of learning rate + + Returns: None + + Examples: + .. code-block:: python + import numpy as np + import paddle + from paddle.distributed import fleet + + paddle.disable_static() + fleet.init(is_collective=True) + + value = np.arange(26).reshape(2, 13).astype("float32") + a = paddle.fluid.dygraph.to_variable(value) + + layer = paddle.nn.Linear(13, 5) + adam = paddle.optimizer.Adam(learning_rate=0.01, parameters=layer.parameters()) + + adam = fleet.distributed_optimizer(adam) + dp_layer = fleet.distributed_model(layer) + + lr_list = [0.2, 0.3, 0.4, 0.5, 0.6] + for i in range(5): + adam.set_lr(lr_list[i]) + lr = adam.get_lr() + print("current lr is {}".format(lr)) + # Print: + # current lr is 0.2 + # current lr is 0.3 + # current lr is 0.4 + # current lr is 0.5 + # current lr is 0.6 + """ + # imitate target optimizer retrieval + return self.user_defined_optimizer.set_lr(value) + + @dygraph_only + def get_lr(self): + """ + Get current step learning rate. + Only work in dygraph mode + + Returns: + float: The learning rate of the current step. + + Examples: + .. code-block:: python + import numpy as np + import paddle + from paddle.distributed import fleet + + paddle.disable_static() + fleet.init(is_collective=True) + + value = np.arange(26).reshape(2, 13).astype("float32") + a = paddle.fluid.dygraph.to_variable(value) + + layer = paddle.nn.Linear(13, 5) + adam = paddle.optimizer.Adam(learning_rate=0.01, parameters=layer.parameters()) + + adam = fleet.distributed_optimizer(adam) + dp_layer = fleet.distributed_model(layer) + + lr = adam.get_lr() + print(lr) # 0.01 + """ + # imitate target optimizer retrieval + return self.user_defined_optimizer.get_lr() + + @dygraph_only + def step(self): + """ + Execute the optimizer once. + Only work in dygraph mode + + Returns: None + + Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + from paddle.distributed import fleet + + class LinearNet(nn.Layer): + def __init__(self): + super(LinearNet, self).__init__() + self._linear1 = nn.Linear(10, 10) + self._linear2 = nn.Linear(10, 1) + + def forward(self, x): + return self._linear2(self._linear1(x)) + + def train(): + # 1. enable dynamic mode + paddle.disable_static() + + # 2. initialize fleet environment + fleet.init(is_collective=True) + + # 3. create layer & optimizer + layer = LinearNet() + loss_fn = nn.MSELoss() + adam = paddle.optimizer.Adam( + learning_rate=0.001, parameters=layer.parameters()) + + # 4. get data_parallel model using fleet + adam = fleet.distributed_optimizer(adam) + dp_layer = fleet.distributed_model(layer) + + # 5. run layer + inputs = paddle.randn([10, 10], 'float32') + outputs = dp_layer(inputs) + labels = paddle.randn([10, 1], 'float32') + loss = loss_fn(outputs, labels) + + print("loss:", loss.numpy()) + + loss = dp_layer.scale_loss(loss) + loss.backward() + dp_layer.apply_collective_grads() + + adam.step() + adam.clear_grad() + + if __name__ == '__main__': + paddle.distributed.spawn(train) + + """ + # imitate target optimizer retrieval + return self.user_defined_optimizer.step() + + @dygraph_only + def clear_grad(self): + """ + Execute the optimizer once. + Only work in dygraph mode + + Returns: None + + Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + from paddle.distributed import fleet + + class LinearNet(nn.Layer): + def __init__(self): + super(LinearNet, self).__init__() + self._linear1 = nn.Linear(10, 10) + self._linear2 = nn.Linear(10, 1) + + def forward(self, x): + return self._linear2(self._linear1(x)) + + def train(): + # 1. enable dynamic mode + paddle.disable_static() + + # 2. initialize fleet environment + fleet.init(is_collective=True) + + # 3. create layer & optimizer + layer = LinearNet() + loss_fn = nn.MSELoss() + adam = paddle.optimizer.Adam( + learning_rate=0.001, parameters=layer.parameters()) + + # 4. get data_parallel model using fleet + adam = fleet.distributed_optimizer(adam) + dp_layer = fleet.distributed_model(layer) + + # 5. run layer + inputs = paddle.randn([10, 10], 'float32') + outputs = dp_layer(inputs) + labels = paddle.randn([10, 1], 'float32') + loss = loss_fn(outputs, labels) + + print("loss:", loss.numpy()) + + loss = dp_layer.scale_loss(loss) + loss.backward() + dp_layer.apply_collective_grads() + + adam.step() + adam.clear_grad() + + if __name__ == '__main__': + paddle.distributed.spawn(train) + """ + # imitate target optimizer retrieval + return self.user_defined_optimizer.clear_grad() + def minimize(self, loss, startup_program=None, @@ -593,8 +956,8 @@ class Fleet(object): tuple: tuple (optimize_ops, params_grads), A list of operators appended by minimize and a list of (param, grad) variable pairs, param is ``Parameter``, grad is the gradient value corresponding to the parameter. - The returned tuple can be passed to ``fetch_list`` in ``Executor.run()`` to - indicate program pruning. If so, the program will be pruned by ``feed`` and + The returned tuple can be passed to ``fetch_list`` in ``Executor.run()`` to + indicate program pruning. If so, the program will be pruned by ``feed`` and ``fetch_list`` before run, see details in ``Executor``. Examples: @@ -619,6 +982,11 @@ class Fleet(object): # for more examples, please reference https://github.com/PaddlePaddle/FleetX """ + if paddle.fluid.framework.in_dygraph_mode(): + # imitate target optimizer retrieval + target_opt = self.user_defined_optimizer + return target_opt.minimize(loss) + context = {} # cache original feed forward program self.origin_main_program = loss.block.program @@ -672,6 +1040,20 @@ class Fleet(object): optimize_ops = [] params_grads = [] + if self._role_maker._is_non_distributed() and not self._is_collective: + if self._runtime_handle is None: + self._runtime_handle = RuntimeFactory()._create_runtime(context) + + compiled_program = compiler.CompiledProgram( + self.origin_main_program).with_data_parallel( + loss_name=loss.name, share_vars_from=None) + loss.block.program._graph = compiled_program + return self.user_defined_optimizer.minimize( + loss, + startup_program=startup_program, + parameter_list=parameter_list, + no_grad_set=no_grad_set) + if meta_optimizer: optimize_ops, params_grads = meta_optimizer.minimize( loss, diff --git a/python/paddle/distributed/fleet/base/role_maker.py b/python/paddle/distributed/fleet/base/role_maker.py index 25f2d0dd3f45855d9f337c6b7154db9cb5bbae45..8614b1861343b8e48b55a8e75d9e432ef6329184 100644 --- a/python/paddle/distributed/fleet/base/role_maker.py +++ b/python/paddle/distributed/fleet/base/role_maker.py @@ -232,6 +232,8 @@ class PaddleCloudRoleMaker(RoleMakerBase): self._node_type_comm = None self._all_comm = None + self._non_distributed = False + if not self._is_collective: self._hdfs_name = kwargs.get("hdfs_name", "") self._hdfs_ugi = kwargs.get("hdfs_ugi", "") @@ -373,6 +375,15 @@ class PaddleCloudRoleMaker(RoleMakerBase): self.generate_role() return self._server_endpoints + def _is_non_distributed(self): + """ + Return True if indispensable environment for fleetrun is not found + (use python-run to launch fleet-code directly) + """ + if not self._role_is_generated: + self.generate_role() + return self._non_distributed + def _heter_worker_num(self): """ get heter worker nums @@ -409,13 +420,22 @@ class PaddleCloudRoleMaker(RoleMakerBase): try: # Environment variable PADDLE_PSERVERS_IP_PORT_LIST must be set # format: string(ip:port,ip:port), eg. 127.0.0.1:6001,127.0.0.1:6002 - self._server_endpoints = os.getenv("PADDLE_PSERVERS_IP_PORT_LIST", - "").split(",") - assert self._server_endpoints != "" + self._server_endpoints = os.getenv("PADDLE_PSERVERS_IP_PORT_LIST") self._worker_endpoints = os.getenv("PADDLE_TRAINER_ENDPOINTS", "").split(",") - assert self._server_endpoints != "" - + if self._server_endpoints is None: + # back to non_distributed execution. + self._server_endpoints = "" + self._trainers_num = 1 + self._role = Role.WORKER + self._current_id = 0 + self._node_num = 1 + self._heter_trainers_num = 0 + self._heter_trainer_endpoints = None + self._non_distributed = True + return + + self._server_endpoints = self._server_endpoints.split(",") trainers_num = int(os.environ["PADDLE_TRAINERS_NUM"]) training_role = os.environ["TRAINING_ROLE"] @@ -488,7 +508,11 @@ class PaddleCloudRoleMaker(RoleMakerBase): assert (self._training_role == "TRAINER") self._worker_endpoints = os.getenv("PADDLE_TRAINER_ENDPOINTS") self._cur_endpoint = os.getenv("PADDLE_CURRENT_ENDPOINT") - assert self._worker_endpoints is not None, "can't find PADDLE_TRAINER_ENDPOINTS" + if self._worker_endpoints is None: + # back to non_distributed execution. + self._worker_endpoints = "127.0.0.1:6170" + self._cur_endpoint = self._worker_endpoints + self._non_distributed = True self._worker_endpoints = self._worker_endpoints.split(",") self._trainers_num = len(self._worker_endpoints) self._node_num = len( diff --git a/python/paddle/distributed/fleet/launch.py b/python/paddle/distributed/fleet/launch.py index 29a1bda92f17443e6c38b070379481aaa419b1d4..7778acaf83b310cfa9a04059ce6d3be2d5326089 100644 --- a/python/paddle/distributed/fleet/launch.py +++ b/python/paddle/distributed/fleet/launch.py @@ -200,11 +200,11 @@ def launch_collective(args): start_port = os.environ.get('FLAGS_START_PORT') if cloud_utils.use_paddlecloud() and trainers_num != 1: cluster, pod = cloud_utils.get_cloud_cluster(args.ips, gpus, start_port) - logger.info("get cluster from cloud:{}".format(cluster)) + logger.debug("get cluster from cloud:{}".format(cluster)) else: # trainers_num = 1 or not use paddlecloud ips="a,b" cluster, pod = get_cluster_from_args(args, gpus) - logger.info("get cluster from args:{}".format(cluster)) + logger.debug("get cluster from args:{}".format(cluster)) procs = start_local_trainers( cluster, @@ -217,7 +217,8 @@ def launch_collective(args): alive = watch_local_trainers(procs, cluster.trainers_nranks()) if not alive: - logger.info("Local procs complete, POD info:{}".format(pod)) + logger.info("Local processes completed.") + logger.debug("POD info:{}".format(pod)) break time.sleep(3) @@ -313,18 +314,26 @@ def launch_ps(args): cmds = [] log_fns = [] for idx, cur_server in enumerate(pod.servers): - current_env.update({ + proc_env = { "PADDLE_PSERVERS_IP_PORT_LIST": server_endpoints, "PADDLE_PORT": cur_server.endpoint.split(":")[1], "TRAINING_ROLE": "PSERVER", "PADDLE_TRAINERS_NUM": str(worker_num), "POD_IP": cur_server.endpoint.split(":")[0] - }) + } + current_env.update(proc_env) cmd = [sys.executable, "-u", args.training_script ] + args.training_script_args cmds.append(cmd) + if idx == 0: + logger.info( + "Local server start {} processes. First process distributed " + "environment info (Only For Debug): {}".format( + len(pod.servers), + pretty_print_envs(proc_env, ("Distributed Envs", "Value")))) + if args.log_dir is not None: os.system("mkdir -p {}".format(args.log_dir)) fn = open("%s/serverlog.%d" % (args.log_dir, idx), "w") @@ -338,21 +347,32 @@ def launch_ps(args): tp.rank = cur_server.rank tp.local_rank = idx tp.log_fn = fn - tp.log_offset = 0 if fn else None + tp.log_offset = fn.tell() if fn else None tp.cmd = cmd procs.append(tp) for idx, cur_worker in enumerate(pod.workers): - current_env.update({ + proc_env = { "PADDLE_PSERVERS_IP_PORT_LIST": server_endpoints, + "PADDLE_TRAINER_ENDPOINTS": worker_endpoints, "PADDLE_TRAINERS_NUM": str(worker_num), "TRAINING_ROLE": "TRAINER", "PADDLE_TRAINER_ID": str(cur_worker.rank) - }) + } + current_env.update(proc_env) + cmd = [sys.executable, "-u", args.training_script ] + args.training_script_args cmds.append(cmd) + + if idx == 0: + logger.info( + "Local worker start {} processes. First process distributed " + "environment info (Only For Debug): {}".format( + len(pod.workers), + pretty_print_envs(proc_env, ("Distributed Envs", "Value")))) + if args.log_dir is not None: os.system("mkdir -p {}".format(args.log_dir)) fn = open("%s/workerlog.%d" % (args.log_dir, idx), "w") @@ -366,11 +386,14 @@ def launch_ps(args): tp.rank = cur_worker.rank tp.local_rank = idx tp.log_fn = fn - tp.log_offset = 0 if fn else None + tp.log_offset = fn.tell() if fn else None tp.cmd = cmd procs.append(tp) + logger.info( + "Please check servers and workers logs in {}/workerlog.* and {}/serverlog.*". + format(args.log_dir, args.log_dir)) # only wait worker to finish here for i, proc in enumerate(procs): if i < len(pod.servers): @@ -403,16 +426,16 @@ def launch(): cuda_device_num = fluid.core.get_cuda_device_count() if len(has_ps_args) > 0 or cuda_device_num == 0: logger.info( - "Run parameter-sever cpu mode. pserver args:{}, cuda count:{}". + "Run parameter-sever cpu mode. pserver arguments:{}, cuda count:{}". format(has_ps_args, cuda_device_num)) launch_ps(args) elif len(has_collective_args) > 0: - logger.info("Run collective gpu mode. gpu args:{}, cuda count:{}". + logger.info("Run collective gpu mode. gpu arguments:{}, cuda count:{}". format(has_collective_args, cuda_device_num)) launch_collective(args) else: logger.warning( - "Not found distinct args. Default use gpu collective mode") + "Not found distinct arguments. Default use gpu collective mode") launch_collective(args) diff --git a/python/paddle/distributed/fleet/launch_utils.py b/python/paddle/distributed/fleet/launch_utils.py index 350d8ae2b44db3e8f8e6b00d95c2b7a9ca91f88b..3da5aed8201ace6ccf9eed1ff322a7c6304de4a6 100644 --- a/python/paddle/distributed/fleet/launch_utils.py +++ b/python/paddle/distributed/fleet/launch_utils.py @@ -253,7 +253,8 @@ def terminate_local_procs(procs): for p in procs: if p.proc.poll() is None: p.proc.terminate() - p.log_fn.close() + if p.log_fn: + p.log_fn.close() logger.debug("terminate process id:{}".format(p.proc.pid)) #wait all process terminiated @@ -338,6 +339,45 @@ def get_ports(num, offset): return ports +def pretty_print_envs(envs, header=None): + spacing = 2 + max_k = 40 + max_v = 45 + + for k, v in envs.items(): + max_k = max(max_k, len(k)) + + h_format = "{{:^{}s}}{}{{:<{}s}}\n".format(max_k, " " * spacing, max_v) + l_format = "{{:<{}s}}{{}}{{:<{}s}}\n".format(max_k, max_v) + length = max_k + max_v + spacing + + border = "".join(["="] * length) + line = "".join(["-"] * length) + + draws = "" + draws += border + "\n" + + if header: + draws += h_format.format(header[0], header[1]) + else: + draws += h_format.format("fleetrun Distributed Envs", "Value") + + draws += line + "\n" + + for k, v in envs.items(): + if isinstance(v, str) and len(v) >= max_v: + str_v = "... " + v[-41:] + else: + str_v = v + + draws += l_format.format(k, " " * spacing, str(str_v)) + + draws += border + + _str = "\n{}\n".format(draws) + return _str + + class TrainerProc(object): def __init__(self): self.proc = None @@ -373,11 +413,19 @@ def start_local_trainers(cluster, current_env.update(proc_env) - logger.debug("trainer proc env:{}".format(current_env)) - cmd = [sys.executable, "-u", training_script] + training_script_args - logger.info("start trainer proc:{} env:{}".format(cmd, proc_env)) + logger.debug("start trainer proc{} env:{}".format(cmd, current_env)) + + if idx == 0: + logger.info("Local start {} processes. First process distributed " + "environment info (Only For Debug): {}".format( + len(pod.trainers), + pretty_print_envs(proc_env, ("Distributed Envs", + "Value")))) + logger.info( + "More details for debug about commands and environments are written in {}/run.sh". + format(log_dir)) fn = None if log_dir is not None: diff --git a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py index 1741f10ccb1c28bfe6abaa63e754568fa08e21ce..227f8f60210ee8a44ab9e87ed7b88337c79ac7f1 100644 --- a/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py +++ b/python/paddle/distributed/fleet/runtime/parameter_server_runtime.py @@ -154,15 +154,16 @@ class ParameterServerRuntime(RuntimeBase): kwargs["sparse_attrs"] = get_sparse_attrs() return kwargs - from paddle.fluid.incubate.fleet.parameter_server.ir.public import _get_lr_ops + from paddle.fluid.incubate.fleet.parameter_server.ir.public import _get_lr_ops, _has_global_step from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import \ SyncStrategy, GeoStrategy trainer_config = self.async_strategy.get_trainer_runtime_config() - lrs = _get_lr_ops(self.origin_main_program) - if len(lrs) > 0: + lrs = _has_global_step(_get_lr_ops(self.origin_main_program)) + + if lrs: kwargs = {"need_global_step": "1"} else: kwargs = {"need_global_step": "0"} @@ -201,6 +202,9 @@ class ParameterServerRuntime(RuntimeBase): if self.role_maker._get_heter_worker_device() == "GPU": gpu_id = int(os.getenv("FLAGS_selected_gpus", "0")) executor = Executor(fluid.CUDAPlace(gpu_id)) + elif self.role_maker._get_heter_worker_device() == "XPU": + xpu_id = int(os.getenv("FLAGS_selected_xpus", "0")) + executor = Executor(fluid.XPUPlace(xpu_id)) else: raise ValueError("Not Support Device {}".format( self.role_maker._get_heter_worker_device())) diff --git a/python/paddle/distribution.py b/python/paddle/distribution.py index 49e98805d24f3f8f5dc1cfcbf3ddc8d9fb835fde..918ebce07825139fabe8ddd4c1e266dd04eb7f6d 100644 --- a/python/paddle/distribution.py +++ b/python/paddle/distribution.py @@ -102,21 +102,24 @@ class Distribution(object): tmp = 0. for arg in args: - valid_arg = False - for cls in [float, list, np.ndarray, tensor.Variable]: - if isinstance(arg, cls): - valid_arg = True - break - assert valid_arg, "type of input args must be float, list, numpy.ndarray or Tensor." if isinstance(arg, float): - arg = np.zeros(1) + arg + arg = [arg] + if not isinstance(arg, (list, np.ndarray, tensor.Variable)): + raise TypeError( + "Type of input args must be float, list, numpy.ndarray or Tensor, but received type {}". + format(type(arg))) + arg_np = np.array(arg) arg_dtype = arg_np.dtype - if str(arg_dtype) not in ['float32']: - warnings.warn( - "data type of argument only support float32, your argument will be convert to float32." - ) + if str(arg_dtype) != 'float32': + if str(arg_dtype) != 'float64': + # "assign" op doesn't support float64. if dtype is float64, float32 variable will be generated + # and converted to float64 later using "cast". + warnings.warn( + "data type of argument only support float32 and float64, your argument will be convert to float32." + ) arg_np = arg_np.astype('float32') + # tmp is used to support broadcast, it summarizes shapes of all the args and get the mixed shape. tmp = tmp + arg_np numpy_args.append(arg_np) @@ -129,6 +132,36 @@ class Distribution(object): return tuple(variable_args) + def _check_values_dtype_in_probs(self, param, value): + """ + Log_prob and probs methods have input ``value``, if value's dtype is different from param, + convert value's dtype to be consistent with param's dtype. + + Args: + param (int|float|list|numpy.ndarray|Tensor): low and high in Uniform class, loc and scale in Normal class. + value (Tensor): The input tensor. + + Returns: + value (Tensor): Change value's dtype if value's dtype is different from param. + """ + if in_dygraph_mode(): + if value.dtype != param.dtype and convert_dtype( + value.dtype) in ['float32', 'float64']: + warnings.warn( + "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted." + ) + return core.ops.cast(value, 'in_dtype', value.dtype, + 'out_dtype', param.dtype) + + check_variable_and_dtype(value, 'value', ['float32', 'float64'], + 'log_prob') + if value.dtype != param.dtype: + warnings.warn( + "dtype of input 'value' needs to be the same as parameters of distribution class. dtype of 'value' will be converted." + ) + return tensor.cast(value, dtype=param.dtype) + return value + class Uniform(Distribution): """Uniform distribution with `low` and `high` parameters. @@ -155,8 +188,8 @@ class Uniform(Distribution): [broadcasting](https://www.paddlepaddle.org.cn/documentation/docs/en/develop/beginners_guide/basic_concept/broadcasting_en.html) (e.g., `high - low` is a valid operation). Args: - low(int|float|list|numpy.ndarray|Tensor): The lower boundary of uniform distribution.The data type is int, float32, list, numpy.ndarray or Tensor - high(int|float|list|numpy.ndarray|Tensor): The higher boundary of uniform distribution.The data type is int, float32, list, numpy.ndarray or Tensor + low(int|float|list|numpy.ndarray|Tensor): The lower boundary of uniform distribution.The data type is int, float, list, numpy.ndarray or Tensor + high(int|float|list|numpy.ndarray|Tensor): The higher boundary of uniform distribution.The data type is int, float, list, numpy.ndarray or Tensor name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Examples: @@ -206,6 +239,7 @@ class Uniform(Distribution): self.all_arg_is_float = False self.batch_size_unknown = False self.name = name if name is not None else 'Uniform' + self.dtype = 'float32' if isinstance(low, int): low = float(low) @@ -216,10 +250,22 @@ class Uniform(Distribution): self.batch_size_unknown = True self.low = low self.high = high + self.dtype = convert_dtype(low.dtype) else: if isinstance(low, float) and isinstance(high, float): self.all_arg_is_float = True + if isinstance( + low, + np.ndarray) and str(low.dtype) in ['float32', 'float64']: + self.dtype = low.dtype + elif isinstance( + high, + np.ndarray) and str(high.dtype) in ['float32', 'float64']: + self.dtype = high.dtype self.low, self.high = self._to_tensor(low, high) + if self.dtype != convert_dtype(self.low.dtype): + self.low = tensor.cast(self.low, dtype=self.dtype) + self.high = tensor.cast(self.high, dtype=self.dtype) def sample(self, shape, seed=0): """Generate samples of the specified shape. @@ -241,11 +287,11 @@ class Uniform(Distribution): if self.batch_size_unknown: output_shape = shape + batch_shape zero_tmp = tensor.fill_constant_batch_size_like( - self.low + self.high, batch_shape + shape, self.low.dtype, 0.) + self.low + self.high, batch_shape + shape, self.dtype, 0.) uniform_random_tmp = nn.uniform_random_batch_size_like( zero_tmp, zero_tmp.shape, - dtype=convert_dtype(zero_tmp.dtype), + dtype=self.dtype, min=0., max=1., seed=seed) @@ -259,9 +305,8 @@ class Uniform(Distribution): else: output_shape = shape + batch_shape output = nn.uniform_random( - output_shape, seed=seed) * (tensor.zeros( - output_shape, dtype=self.low.dtype) + - (self.high - self.low)) + output_shape, seed=seed, dtype=self.dtype) * (tensor.zeros( + output_shape, dtype=self.dtype) + (self.high - self.low)) output = elementwise_add(output, self.low, name=name) if self.all_arg_is_float: return nn.reshape(output, shape, name=name) @@ -279,22 +324,20 @@ class Uniform(Distribution): """ name = self.name + '_log_prob' + value = self._check_values_dtype_in_probs(self.low, value) if in_dygraph_mode(): + # ensure value in [low, high] lb_bool = self.low < value ub_bool = value < self.high - dtype = value.dtype lb = core.ops.cast(lb_bool, 'in_dtype', lb_bool.dtype, 'out_dtype', - dtype) + value.dtype) ub = core.ops.cast(ub_bool, 'in_dtype', ub_bool.dtype, 'out_dtype', - dtype) + value.dtype) return nn.log(lb * ub) - nn.log(self.high - self.low) - check_variable_and_dtype(value, 'value', ['float32', 'float64'], - 'log_prob') - - lb_bool = control_flow.less_than(self.low, value) - ub_bool = control_flow.less_than(value, self.high) + lb_bool = self.low < value + ub_bool = value < self.high lb = tensor.cast(lb_bool, dtype=value.dtype) ub = tensor.cast(ub_bool, dtype=value.dtype) return elementwise_sub( @@ -311,22 +354,19 @@ class Uniform(Distribution): """ name = self.name + '_probs' + value = self._check_values_dtype_in_probs(self.low, value) if in_dygraph_mode(): lb_bool = self.low < value ub_bool = value < self.high - dtype = value.dtype lb = core.ops.cast(lb_bool, 'in_dtype', lb_bool.dtype, 'out_dtype', - dtype) + value.dtype) ub = core.ops.cast(ub_bool, 'in_dtype', ub_bool.dtype, 'out_dtype', - dtype) + value.dtype) return (lb * ub) / (self.high - self.low) - check_variable_and_dtype(value, 'value', ['float32', 'float64'], - 'log_prob') - - lb_bool = control_flow.less_than(self.low, value) - ub_bool = control_flow.less_than(value, self.high) + lb_bool = self.low < value + ub_bool = value < self.high lb = tensor.cast(lb_bool, dtype=value.dtype) ub = tensor.cast(ub_bool, dtype=value.dtype) return elementwise_div((lb * ub), (self.high - self.low), name=name) @@ -334,6 +374,12 @@ class Uniform(Distribution): def entropy(self): """Shannon entropy in nats. + The entropy is + + .. math:: + + entropy(low, high) = \\log (high - low) + Returns: Tensor: Shannon entropy of uniform distribution.The data type is float32. @@ -364,8 +410,8 @@ class Normal(Distribution): * :math:`Z`: is the normalization constant. Args: - loc(int|float|list|numpy.ndarray|Tensor): The mean of normal distribution.The data type is int, float32, list, numpy.ndarray or Tensor. - scale(int|float|list|numpy.ndarray|Tensor): The std of normal distribution.The data type is int, float32, list, numpy.ndarray or Tensor. + loc(int|float|list|numpy.ndarray|Tensor): The mean of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor. + scale(int|float|list|numpy.ndarray|Tensor): The std of normal distribution.The data type is int, float, list, numpy.ndarray or Tensor. name(str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. Examples: @@ -418,6 +464,7 @@ class Normal(Distribution): self.batch_size_unknown = False self.all_arg_is_float = False self.name = name if name is not None else 'Normal' + self.dtype = 'float32' if isinstance(loc, int): loc = float(loc) @@ -428,10 +475,22 @@ class Normal(Distribution): self.batch_size_unknown = True self.loc = loc self.scale = scale + self.dtype = convert_dtype(loc.dtype) else: if isinstance(loc, float) and isinstance(scale, float): self.all_arg_is_float = True + if isinstance( + loc, + np.ndarray) and str(loc.dtype) in ['float32', 'float64']: + self.dtype = loc.dtype + elif isinstance( + scale, + np.ndarray) and str(scale.dtype) in ['float32', 'float64']: + self.dtype = scale.dtype self.loc, self.scale = self._to_tensor(loc, scale) + if self.dtype != convert_dtype(self.loc.dtype): + self.loc = tensor.cast(self.loc, dtype=self.dtype) + self.scale = tensor.cast(self.scale, dtype=self.dtype) def sample(self, shape, seed=0): """Generate samples of the specified shape. @@ -454,22 +513,18 @@ class Normal(Distribution): if self.batch_size_unknown: output_shape = shape + batch_shape zero_tmp = tensor.fill_constant_batch_size_like( - self.loc + self.scale, batch_shape + shape, self.loc.dtype, 0.) + self.loc + self.scale, batch_shape + shape, self.dtype, 0.) zero_tmp_reshape = nn.reshape(zero_tmp, output_shape) zero_tmp_shape = nn.shape(zero_tmp_reshape) normal_random_tmp = nn.gaussian_random( - zero_tmp_shape, - mean=0., - std=1., - seed=seed, - dtype=convert_dtype(self.loc.dtype)) + zero_tmp_shape, mean=0., std=1., seed=seed, dtype=self.dtype) output = normal_random_tmp * (zero_tmp_reshape + self.scale) output = elementwise_add(output, self.loc, name=name) return output else: output_shape = shape + batch_shape - output = nn.gaussian_random(output_shape, mean=0., std=1., seed=seed) * \ - (tensor.zeros(output_shape, dtype=self.loc.dtype) + self.scale) + output = nn.gaussian_random(output_shape, mean=0., std=1., seed=seed, dtype=self.dtype) * \ + (tensor.zeros(output_shape, dtype=self.dtype) + self.scale) output = elementwise_add(output, self.loc, name=name) if self.all_arg_is_float: return nn.reshape(output, shape, name=name) @@ -479,6 +534,16 @@ class Normal(Distribution): def entropy(self): """Shannon entropy in nats. + The entropy is + + .. math:: + + entropy(\sigma) = 0.5 \\log (2 \pi e \sigma^2) + + In the above equation: + + * :math:`scale = \sigma`: is the std. + Returns: Tensor: Shannon entropy of normal distribution.The data type is float32. @@ -486,7 +551,7 @@ class Normal(Distribution): name = self.name + '_entropy' batch_shape = list((self.loc + self.scale).shape) zero_tmp = tensor.fill_constant_batch_size_like( - self.loc + self.scale, batch_shape, self.loc.dtype, 0.) + self.loc + self.scale, batch_shape, self.dtype, 0.) return elementwise_add( 0.5 + zero_tmp, 0.5 * math.log(2 * math.pi) + nn.log((self.scale + zero_tmp)), @@ -502,11 +567,9 @@ class Normal(Distribution): Tensor: log probability.The data type is same with value. """ - if not in_dygraph_mode(): - check_variable_and_dtype(value, 'value', ['float32', 'float64'], - 'log_prob') - name = self.name + '_log_prob' + value = self._check_values_dtype_in_probs(self.loc, value) + var = self.scale * self.scale log_scale = nn.log(self.scale) return elementwise_sub( @@ -524,11 +587,9 @@ class Normal(Distribution): Tensor: probability.The data type is same with value. """ - if not in_dygraph_mode(): - check_variable_and_dtype(value, 'value', ['float32', 'float64'], - 'log_prob') - name = self.name + '_probs' + value = self._check_values_dtype_in_probs(self.loc, value) + var = self.scale * self.scale return elementwise_div( ops.exp(-1. * ((value - self.loc) * (value - self.loc)) / @@ -538,6 +599,29 @@ class Normal(Distribution): def kl_divergence(self, other): """The KL-divergence between two normal distributions. + The probability density function (pdf) is + + .. math:: + + KL\_divergence(\mu_0, \sigma_0; \mu_1, \sigma_1) = 0.5 (ratio^2 + (\\frac{diff}{\sigma_1})^2 - 1 - 2 \\ln {ratio}) + + .. math:: + + ratio = \\frac{\sigma_0}{\sigma_1} + + .. math:: + + diff = \mu_1 - \mu_0 + + In the above equation: + + * :math:`loc = \mu_0`: is the mean of current Normal distribution. + * :math:`scale = \sigma_0`: is the std of current Normal distribution. + * :math:`loc = \mu_1`: is the mean of other Normal distribution. + * :math:`scale = \sigma_1`: is the std of other Normal distribution. + * :math:`ratio`: is the ratio of scales. + * :math:`diff`: is the difference between means. + Args: other (Normal): instance of Normal. diff --git a/python/paddle/fluid/clip.py b/python/paddle/fluid/clip.py index 5f6594a47213021c3a82dd4a0266f52240270e87..7b301ac19d1d3dc1f4aabb6cf3af2f0874faa677 100644 --- a/python/paddle/fluid/clip.py +++ b/python/paddle/fluid/clip.py @@ -129,7 +129,7 @@ class GradientClipBase(object): def __str__(self): raise NotImplementedError() - @imperative_base.no_grad() + @imperative_base.no_grad def _dygraph_clip(self, params_grads): raise NotImplementedError @@ -258,7 +258,7 @@ class GradientClipByValue(GradientClipBase): def __str__(self): return "Gradient Clip By Value, min = %f, max=%f" % (self.min, self.max) - @imperative_base.no_grad() + @imperative_base.no_grad def _dygraph_clip(self, params_grads): params_and_grads = [] for p, g in params_grads: @@ -413,7 +413,7 @@ class GradientClipByNorm(GradientClipBase): def __str__(self): return "Gradient Clip By Norm, clip_norm=%f" % self.clip_norm - @imperative_base.no_grad() + @imperative_base.no_grad def _dygraph_clip(self, params_grads): params_and_grads = [] for p, g in params_grads: @@ -565,7 +565,7 @@ class GradientClipByGlobalNorm(GradientClipBase): def __str__(self): return "Gradient Clip By GlobalNorm, global_norm=%f" % (self.clip_norm) - @imperative_base.no_grad() + @imperative_base.no_grad def _dygraph_clip(self, params_grads): params_and_grads = [] sum_square_list = [] diff --git a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py index 93013ef8bf8442311621202e0a86dd65e7c38b30..328dafe6219adb3c6355de0bafc430c52725024f 100644 --- a/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py +++ b/python/paddle/fluid/contrib/mixed_precision/fp16_utils.py @@ -74,7 +74,7 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype): continue for in_var_name in op.input(in_name): in_var = block.var(in_var_name) - if in_var.type not in valid_types: + if in_var.type not in valid_types or in_var.dtype == dest_dtype: continue if in_var.dtype == src_dtype: cast_name = in_var.name + '.cast_' + _dtype_to_str(dest_dtype) @@ -84,7 +84,7 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype): name=cast_name, dtype=dest_dtype, persistable=False, - stop_gradient=False) + stop_gradient=in_var.stop_gradient) block._insert_op( idx, @@ -100,7 +100,7 @@ def _insert_cast_op(block, op, idx, src_dtype, dest_dtype): else: if op.has_attr('in_dtype'): op._set_attr('in_dtype', dest_dtype) - if src_dtype == core.VarDesc.VarType.FP32: + if src_dtype == core.VarDesc.VarType.FP32 and dest_dtype == core.VarDesc.VarType.FP16: for out_name in op.output_names: if op.type == 'batch_norm' and out_name != 'Y': continue diff --git a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt index 007d701284dfc7ff2cafb128984414517579fce3..6ac005060e0b21d88f17619bbe88b7a56c23fdb8 100644 --- a/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt +++ b/python/paddle/fluid/contrib/slim/tests/CMakeLists.txt @@ -270,7 +270,7 @@ foreach(src ${TEST_OPS}) endforeach() # setting timeout value for old unittests -if(NOT WIN32) +if(NOT WIN32 AND NOT APPLE) set_tests_properties(test_post_training_quantization_mobilenetv1 PROPERTIES TIMEOUT 250 LABELS "RUN_TYPE=NIGHTLY") - set_tests_properties(test_post_training_quantization_resnet50 PROPERTIES TIMEOUT 200 LABELS "RUN_TYPE=NIGHTLY") + set_tests_properties(test_post_training_quantization_resnet50 PROPERTIES TIMEOUT 200 LABELS "RUN_TYPE=NIGHTLY") endif() diff --git a/python/paddle/fluid/data_feeder.py b/python/paddle/fluid/data_feeder.py index 45aa85d4168a55e206460ce2e39292013caa9ce0..5da83da33b8de334d4ae1e5b072cfb20d74c1271 100644 --- a/python/paddle/fluid/data_feeder.py +++ b/python/paddle/fluid/data_feeder.py @@ -132,6 +132,28 @@ def check_dtype(input_dtype, extra_message)) +def check_shape(shape, + op_name, + expected_shape_type=(list, tuple, Variable), + expected_element_type=(int, Variable), + expected_tensor_dtype=('int32', 'int64')): + # See NOTE [ Why skip dynamic graph check ] + if in_dygraph_mode(): + return + check_type(shape, 'shape', expected_shape_type, op_name) + if expected_element_type is not None and not isinstance(shape, Variable): + for item in shape: + check_type(item, 'element of shape', expected_element_type, op_name) + if expected_tensor_dtype is not None and isinstance(item, Variable): + check_dtype( + item.dtype, 'element of shape', expected_tensor_dtype, + op_name, + 'If element of shape is Tensor, its data type should be {}'. + format(', '.join(expected_tensor_dtype))) + if expected_tensor_dtype is not None and isinstance(shape, Variable): + check_dtype(shape.dtype, 'shape', expected_tensor_dtype, op_name) + + class DataToLoDTensorConverter(object): def __init__(self, place, lod_level, shape, dtype): self.place = place diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index 2174dbd31b8fb1ae97894699e03e25e809085cc8..c548bdfeba19510b26c0f80d356fa6a6b7bbaed7 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -12,9 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. from ..wrapped_decorator import signature_safe_contextmanager, wrap_decorator -import inspect import decorator import contextlib +import functools +import inspect import sys import numpy as np from paddle.fluid import core @@ -26,8 +27,8 @@ import objgraph from ..data_feeder import convert_dtype __all__ = [ - 'no_grad', 'grad', 'guard', 'enable_dygraph', 'disable_dygraph', 'enabled', - 'to_variable' + 'no_grad', 'no_grad_', 'grad', 'guard', 'enable_dygraph', 'disable_dygraph', + 'enabled', 'to_variable' ] @@ -167,7 +168,80 @@ def disable_dygraph(): _functional_dygraph_context_manager = None -class no_grad: +@signature_safe_contextmanager +def _switch_tracer_mode_guard_(is_train=True): + tracer = framework._dygraph_tracer() + if tracer: + mode = tracer._train_mode + tracer._train_mode = is_train + try: + yield + finally: + tracer._train_mode = mode + else: + yield + + +def no_grad(func=None): + """ + :api_attr: imperative + + Create a context which disables dygraph gradient calculation. + In this mode, the result of every computation will have `stop_gradient=True`. + + Also functions as a decorator. (Make sure to instantiate without parenthesis.) + + Examples: + + .. code-block:: python + + import numpy as np + import paddle.fluid as fluid + + # use as generator + + data = np.array([[2, 3], [4, 5]]).astype('float32') + with fluid.dygraph.guard(): + l0 = fluid.Linear(2, 2) # l0.weight.gradient() is None + l1 = fluid.Linear(2, 2) + with fluid.dygraph.no_grad(): + # l1.weight.stop_gradient is False + tmp = l1.weight * 2 # tmp.stop_gradient is True + x = fluid.dygraph.to_variable(data) + y = l0(x) + tmp + o = l1(y) + o.backward() + print(tmp.gradient() is None) # True + print(l0.weight.gradient() is None) # False + + # use as decorator + + @fluid.dygraph.no_grad + def test_layer(): + with fluid.dygraph.guard(): + inp = np.ones([3, 1024], dtype='float32') + t = fluid.dygraph.base.to_variable(inp) + linear1 = fluid.Linear(1024, 4, bias_attr=False) + linear2 = fluid.Linear(4, 4) + ret = linear1(t) + dy_ret = linear2(ret) + + test_layer() + + """ + if func is None: + return _switch_tracer_mode_guard_(is_train=False) + else: + + @decorator.decorator + def __impl__(func, *args, **kwargs): + with _switch_tracer_mode_guard_(is_train=False): + return func(*args, **kwargs) + + return __impl__(func) + + +class no_grad_: """ :api_attr: imperative diff --git a/python/paddle/fluid/dygraph/checkpoint.py b/python/paddle/fluid/dygraph/checkpoint.py index 82018132cc8b8600958e5cd52df5844e3d37638e..f85b184f68111bbc0930b36e2ba6e05c2dbd006a 100644 --- a/python/paddle/fluid/dygraph/checkpoint.py +++ b/python/paddle/fluid/dygraph/checkpoint.py @@ -16,13 +16,16 @@ from __future__ import print_function import os import collections +import functools from ..framework import Variable, default_main_program, in_dygraph_mode, dygraph_only, Parameter, ParamBase, _varbase_creator, _dygraph_tracer import pickle import six from . import learning_rate_scheduler import warnings from .. import core -from paddle.fluid.dygraph.io import VARIABLE_FILENAME, EXTRA_VAR_INFO_FILENAME, _load_persistable_vars +from .base import guard +from paddle.fluid.dygraph.jit import SaveLoadConfig +from paddle.fluid.dygraph.io import _construct_program_holders, _construct_params_and_buffers __all__ = [ 'save_dygraph', @@ -30,6 +33,37 @@ __all__ = [ ] +# NOTE(chenweihang): deprecate load_dygraph's argument keep_name_table, +# ensure compatibility when user still use keep_name_table argument +def deprecate_keep_name_table(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + def __warn_and_build_configs__(keep_name_table): + warnings.warn( + "The argument `keep_name_table` has deprecated, please use `SaveLoadConfig.keep_name_table`.", + DeprecationWarning) + configs = SaveLoadConfig() + configs.keep_name_table = keep_name_table + return configs + + # deal with arg `keep_name_table` + if len(args) > 1 and isinstance(args[1], bool): + args = list(args) + args[1] = __warn_and_build_configs__(args[1]) + # deal with kwargs + elif 'keep_name_table' in kwargs: + kwargs['configs'] = __warn_and_build_configs__(kwargs[ + 'keep_name_table']) + kwargs.pop('keep_name_table') + else: + # do nothing + pass + + return func(*args, **kwargs) + + return wrapper + + @dygraph_only def save_dygraph(state_dict, model_path): ''' @@ -100,17 +134,27 @@ def save_dygraph(state_dict, model_path): # TODO(qingqing01): remove dygraph_only to support loading static model. # maybe need to unify the loading interface after 2.0 API is ready. -#@dygraph_only -def load_dygraph(model_path, keep_name_table=False): +# @dygraph_only +@deprecate_keep_name_table +def load_dygraph(model_path, configs=None): ''' :api_attr: imperative - Load parameter state_dict from disk. + Load parameter state dict from disk. + + .. note:: + Due to some historical reasons, if you load ``state_dict`` from the saved + result of `paddle.io.save_inference_model`, the structured variable name + will cannot be restored. You need to set the argument `use_structured_name=False` + when using `Layer.set_state_dict` later. Args: - model_path(str) : The file prefix store the state_dict. (The path should Not contain suffix '.pdparams') - keep_name_table(bool, optional) : Whether keep structed name to parameter name conversion table in output dict. - Default : False + model_path(str) : The file prefix store the state_dict. + (The path should Not contain suffix '.pdparams') + configs (SaveLoadConfig, optional): :ref:`api_imperative_jit_saveLoadConfig` + object that specifies additional configuration options, these options + are for compatibility with ``jit.save/io.save_inference_model`` formats. + Default None. Returns: state_dict(dict) : the dict store the state_dict @@ -118,23 +162,27 @@ def load_dygraph(model_path, keep_name_table=False): Examples: .. code-block:: python - import paddle.fluid as fluid + import paddle - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + paddle.disable_static() - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") + emb = paddle.nn.Embedding([10, 10]) - adam = fluid.optimizer.Adam( learning_rate = fluid.layers.noam_decay( 100, 10000), - parameter_list = emb.parameters() ) - state_dict = adam.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") + state_dict = emb.state_dict() + paddle.save(state_dict, "paddle_dy") - para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy") + scheduler = paddle.optimizer.lr_scheduler.NoamLR( + d_model=0.01, warmup_steps=100, verbose=True) + adam = paddle.optimizer.Adam( + learning_rate=scheduler, + parameters=emb.parameters()) + state_dict = adam.state_dict() + paddle.save(state_dict, "paddle_dy") - ''' + para_state_dict, opti_state_dict = paddle.load("paddle_dy") + ''' + # deal with argument `model_path` model_prefix = model_path if model_prefix.endswith(".pdparams"): model_prefix = model_prefix[:-9] @@ -145,74 +193,53 @@ def load_dygraph(model_path, keep_name_table=False): opti_dict = None params_file_path = model_prefix + ".pdparams" opti_file_path = model_prefix + ".pdopt" + + # deal with argument `configs` + if configs is None: + configs = SaveLoadConfig() + if not os.path.exists(params_file_path) and not os.path.exists( opti_file_path): - # Load state dict by `jit.save` save format - # TODO(chenweihang): [Why not support `io.save_infernece_model` save format here] + # Load state dict by `jit.save/io.save_inference_model` save format + # NOTE(chenweihang): [ Compatibility of save_inference_model save format ] # The model saved by `save_inference_model` does not completely correspond to # the information required by the `state_dict` under the dygraph. - # Although we reluctantly restore the `state_dict` in some scenarios, - # this may not be complete and there are some limitations, so this function - # will be considered later. The limitations include: - # 1. `save_inference_model` not save structured name, we need to remind - # the user to configure the `use_structured_name` argument when `set_dict`, - # but this argument is currently not public - # 2. if `save_inference_model` save all persistable variables in a single file, - # user need to give the variable name list to load `state_dict` + # `save_inference_model` not save structured name, we need to remind + # the user to configure the `use_structured_name` argument when `set_state_dict` + # NOTE(chenweihang): `jit.save` doesn't save optimizer state # 1. check model path if not os.path.isdir(model_prefix): raise ValueError("Model saved directory '%s' is not exists." % model_prefix) - # 2. load `__variables.info__` - var_info_path = os.path.join(model_prefix, EXTRA_VAR_INFO_FILENAME) - if not os.path.exists(var_info_path): - raise RuntimeError( - "No target can be loaded. Now only supports loading `state_dict` from " - "the result saved by `imperative.save` and `imperative.jit.save`." - ) - with open(var_info_path, 'rb') as f: - extra_var_info = pickle.load(f) - # 3. load `__variables__` - # TODO(chenweihang): now only supports loading from default save format: - # - all persistable vars saved in one file named `__variables__` - # for other case, we may need to modify the arguments of this API - var_file_path = os.path.join(model_prefix, VARIABLE_FILENAME) - if not os.path.exists(var_file_path): - raise RuntimeError( - "The parameter file to be loaded was not found. " - "Now only supports loading from the default save format, " - "and does not support custom params_filename and " - "save parameters separately.") - # 4. load all persistable vars - load_var_list = [] - for name in sorted(extra_var_info): - var = _varbase_creator(name=name, persistable=True) - load_var_list.append(var) - _dygraph_tracer().trace_op( - type='load_combine', - inputs={}, - outputs={'Out': load_var_list}, - attrs={'file_path': var_file_path}) - # 5. construct state_dict - para_dict = dict() - for var in load_var_list: - structured_name = extra_var_info[var.name].get('structured_name', - None) - if structured_name is None: - raise RuntimeError( - "Cannot find saved variable (%s)'s structured name in saved model.", - var.name) - para_dict[structured_name] = var.numpy() - # NOTE: `jit.save` doesn't save optimizer state + + # 2. load program desc & construct _ProgramHolder + programs = _construct_program_holders(model_path, + configs.model_filename) + + # 3. load layer parameters & buffers + # NOTE: using fluid.dygraph.guard() here will cause import error in py2 + with guard(): + persistable_var_dict = _construct_params_and_buffers( + model_prefix, + programs, + configs.separate_params, + configs.params_filename, + append_suffix=False) + + # 4. construct state_dict + para_dict = dict() + for var_name in persistable_var_dict: + para_dict[var_name] = persistable_var_dict[var_name].numpy() else: # Load state dict by `save_dygraph` save format + para_dict = {} if os.path.exists(params_file_path): with open(params_file_path, 'rb') as f: para_dict = pickle.load(f) if six.PY2 else pickle.load( f, encoding='latin1') - if not keep_name_table and "StructuredToParameterName@@" in para_dict: + if not configs.keep_name_table and "StructuredToParameterName@@" in para_dict: del para_dict["StructuredToParameterName@@"] if os.path.exists(opti_file_path): diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/basic_api_transformer.py b/python/paddle/fluid/dygraph/dygraph_to_static/basic_api_transformer.py index 82f39ffd080ec803beca4e60695204b707f48210..9334c15f7bcbc0ca3782be1d4f7fc6826a59bdbc 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/basic_api_transformer.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/basic_api_transformer.py @@ -16,9 +16,7 @@ import astor import gast from paddle.fluid.dygraph.dygraph_to_static.static_analysis import AstNodeWrapper -from paddle.fluid.dygraph.dygraph_to_static.utils import is_dygraph_api, is_to_variable -from paddle.fluid.dygraph.dygraph_to_static.utils import to_assign_node, to_static_ast, update_args_of_func -from paddle.fluid.dygraph.dygraph_to_static.utils import dygraph_class_to_static_api +from paddle.fluid.dygraph.dygraph_to_static import utils class BasicApiTransformer(gast.NodeTransformer): @@ -56,7 +54,7 @@ class BasicApiTransformer(gast.NodeTransformer): if isinstance(child_node, gast.Call): # TODO(liym27): # Considers that a dygraph api which modifies the input or has a output. - if is_dygraph_api(child_node): + if utils.is_dygraph_api(child_node): return else: self._visit_Call(child_node) @@ -73,7 +71,7 @@ class BasicApiTransformer(gast.NodeTransformer): if self._is_dygraph_forward(func_name): class_node = self._get_class_node(func_name) - static_node = to_static_ast(node, class_node) + static_node = utils.to_static_ast(node, class_node) return static_node else: return node @@ -91,14 +89,51 @@ class BasicApiTransformer(gast.NodeTransformer): if is_to_variable(node_value): return False - if is_dygraph_api(node_value): + if utils.is_dygraph_api(node_value): dygraph_api = node_value.func.attr - if not dygraph_class_to_static_api.get(dygraph_api): + if not utils.dygraph_class_to_static_api.get(dygraph_api): return False - update_args_of_func(node_value, node_value, "__init__") + utils.update_args_of_func(node_value, node_value, "__init__") target_str = astor.to_source(gast.gast_to_ast(node.targets[0])) self.class_node_dict[target_str] = node_value return True # TODO: node.value is not dygraph class return False + + +def is_to_variable(node): + assert isinstance(node, gast.Call) + api_name = utils.ast_to_source_code(node.func).strip() + + if utils.is_dygraph_api(node): + return api_name.endswith("to_variable") + + if utils.is_paddle_api(node): + return api_name.endswith("to_tensor") + + return False + + +def to_assign_node(node): + # Transform dygraph api `fluid.dygraph.to_variable` alias `paddle.to_tensor` to static api `fluid.layers.assign`. + # NOTE: + # 1. Api `to_variable` supports data type {float16, float32, float64, int16, int32, int64, uint8, uint16}, + # but api `assign` only supports {float32, float64, int32, int64, bool}; + # 2. If the input of api `assign` is numpy.ndarray, its size cannot be greater than 1024 * 1024. + + assert isinstance(node, gast.Call) + assign_api = gast.parse('fluid.layers.assign').body[0].value + node.func = assign_api + + if node.args: + node.args = [node.args[0]] + node.keywords = [] + else: + for idx, kw in enumerate(node.keywords): + if kw.arg == 'value' or kw.arg == 'data': + node.keywords[idx].arg = 'input' + node.keywords = [node.keywords[idx]] + node.args = [] + break + return node diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/logging_utils.py b/python/paddle/fluid/dygraph/dygraph_to_static/logging_utils.py index 75cb65085846d672d2488c98bf6ad625ac12e78b..c52872b15016169504359b54ad5a40360e244ce0 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/logging_utils.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/logging_utils.py @@ -98,8 +98,15 @@ class TranslatorLogger(object): return level == self.transformed_code_level def has_verbosity(self, level): + """ + Checks whether the verbosity level set by the user is greater than or equal to the log level. + Args: + level(int): The level of log. + Returns: + True if the verbosity level set by the user is greater than or equal to the log level, otherwise False. + """ level = self.check_level(level) - return level >= self.verbosity_level + return self.verbosity_level >= level def error(self, msg, *args, **kwargs): self.logger.error(msg, *args, **kwargs) diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py index ad7d6dfd3f96a710015456453de57dc0eb58f94d..cb489af44d0adc7da377f73a3205c3c264769b4d 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/program_translator.py @@ -24,6 +24,7 @@ import warnings import gast from paddle.fluid import framework +from paddle.fluid import in_dygraph_mode from paddle.fluid.dygraph import layers from paddle.fluid.data_feeder import check_type from paddle.fluid.layers.utils import flatten @@ -32,6 +33,7 @@ from paddle.fluid.dygraph.base import switch_to_static_graph from paddle.fluid.dygraph.dygraph_to_static import DygraphToStaticAst from paddle.fluid.dygraph.dygraph_to_static.error import ERROR_DATA from paddle.fluid.dygraph.dygraph_to_static.error import attach_error_data +from paddle.fluid.dygraph.dygraph_to_static import logging_utils from paddle.fluid.dygraph.dygraph_to_static.origin_info import attach_origin_info from paddle.fluid.dygraph.dygraph_to_static.origin_info import create_and_update_origin_info_map from paddle.fluid.dygraph.dygraph_to_static.origin_info import update_op_callstack_with_origin_info @@ -283,13 +285,21 @@ class StaticLayer(object): Return: Outputs of decorated function. """ + # 1. call dygraph function directly if not enable `declarative` if not self._program_trans.enable_declarative: - warnings.warn( - "The decorator '@paddle.jit.to_static' doesn't work when setting ProgramTranslator.enable=False. " + logging_utils.warn( + "The decorator '@paddle.jit.to_static' does NOT work when setting ProgramTranslator.enable=False. " "We will just return dygraph output.") return self._call_dygraph_function(*args, **kwargs) + if not in_dygraph_mode() and self._program_trans.enable_declarative: + raise RuntimeError( + "Failed to run the callable object {} decorated by '@paddle.jit.to_static', " + "because it does NOT in dynamic mode. Please disable the static mode to enter dynamic mode with the " + "following API: paddle.disable_static().".format( + self.dygraph_function)) + # 2. trace ops from dygraph layers and cache the generated program. args, kwargs = self._function_spec.unified_args_and_kwargs(args, kwargs) try: diff --git a/python/paddle/fluid/dygraph/dygraph_to_static/utils.py b/python/paddle/fluid/dygraph/dygraph_to_static/utils.py index f344ad2f7d7af00e6037b7552e258bf5c796a3b8..86593dc24aa8bda7906aab2001e8bd285f64288a 100644 --- a/python/paddle/fluid/dygraph/dygraph_to_static/utils.py +++ b/python/paddle/fluid/dygraph/dygraph_to_static/utils.py @@ -136,9 +136,12 @@ def is_api_in_module(node, module_prefix): # import_str = "".join(import_statements) import paddle import paddle.fluid as fluid + import paddle.fluid.dygraph as dygraph import paddle.fluid.layers as layers + from paddle.fluid.dygraph import to_variable - import paddle.fluid.dygraph as dygraph + from paddle import to_tensor + return eval("_is_api_in_module_helper({}, '{}')".format(func_str, module_prefix)) except NameError: @@ -146,15 +149,18 @@ def is_api_in_module(node, module_prefix): def is_dygraph_api(node): + # Note: A api in module dygraph_to_static is not a real dygraph api. if is_api_in_module(node, "paddle.fluid.dygraph.dygraph_to_static"): return False + # TODO(liym27): A better way to determine whether it is a dygraph api. + # Consider the decorator @dygraph_only return is_api_in_module(node, "paddle.fluid.dygraph") def is_paddle_api(node): - return is_api_in_module(node, "paddle.fluid") + return is_api_in_module(node, "paddle") # Is numpy_api cannot reuse is_api_in_module because of numpy module problem @@ -233,14 +239,6 @@ def _add_keywords_to(node, dygraph_api_name): return -def is_to_variable(node): - assert isinstance(node, gast.Call) - if is_dygraph_api(node): - api_name = ast_to_source_code(node.func).strip() - return api_name.endswith("to_variable") - return False - - def to_static_ast(node, class_node): assert isinstance(node, gast.Call) assert isinstance(class_node, gast.Call) @@ -268,29 +266,6 @@ def to_static_ast(node, class_node): return node -def to_assign_node(node): - # Transform dygraph api `fluid.dygraph.to_variable` to static api `fluid.layers.assign`. - # NOTE: - # 1. Api `to_variable` supports data type {float16, float32, float64, int16, int32, int64, uint8, uint16}, - # but api `assign` only supports {float32, float64, int32, int64, bool}; - # 2. If the input of api `assign` is numpy.ndarray, its size cannot be greater than 1024 * 1024. - assert isinstance(node, gast.Call) - assign_api = gast.parse('fluid.layers.assign').body[0].value - node.func = assign_api - - if node.args: - node.args = [node.args[0]] - node.keywords = [] - else: - for idx, kw in enumerate(node.keywords): - if kw.arg == 'value': - node.keywords[idx].arg = 'input' - node.keywords = [node.keywords[idx]] - node.args = [] - break - return node - - def update_args_of_func(node, dygraph_node, method_name): assert isinstance(node, gast.Call) if method_name not in ["__init__", "forward"]: diff --git a/python/paddle/fluid/dygraph/io.py b/python/paddle/fluid/dygraph/io.py index 7f3d450a49c7d3fcc9ca1d3c2d7c5eb732671c6c..1d2ea142c7d5f2e653e446986a39d1bc155006f0 100644 --- a/python/paddle/fluid/dygraph/io.py +++ b/python/paddle/fluid/dygraph/io.py @@ -488,6 +488,15 @@ def _load_persistable_vars(model_path, return load_var_dict +# NOTE(chenweihang): to adapt paddle.load to get state_dict +def _remove_varname_suffix(var_dict, program_holder): + no_suffix_var_dict = dict() + for var_name in var_dict: + no_suffix_name = program_holder._suffix_varname_dict[var_name] + no_suffix_var_dict[no_suffix_name] = var_dict[var_name] + return no_suffix_var_dict + + def _construct_program_holders(model_path, model_filename=None): # make sure the path has been checked program_holder_dict = dict() @@ -517,7 +526,8 @@ def _construct_program_holders(model_path, model_filename=None): def _construct_params_and_buffers(model_path, programs, separate_params=False, - params_filename=None): + params_filename=None, + append_suffix=True): var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME) if os.path.exists(var_info_path): var_dict = _load_persistable_vars(model_path, var_info_path, @@ -526,6 +536,10 @@ def _construct_params_and_buffers(model_path, else: var_dict = _load_persistable_vars_by_program( model_path, programs['forward'], params_filename) + + if not append_suffix: + var_dict = _remove_varname_suffix(var_dict, programs['forward']) + return var_dict @@ -542,89 +556,92 @@ class TranslatedLayer(layers.Layer): .. code-block:: python import numpy as np - import paddle.fluid as fluid - from paddle.fluid.dygraph import Linear - from paddle.fluid.dygraph import declarative + import paddle + import paddle.nn as nn + import paddle.optimizer as opt - BATCH_SIZE = 32 - BATCH_NUM = 20 + BATCH_SIZE = 16 + BATCH_NUM = 4 + EPOCH_NUM = 4 - def random_batch_reader(): - def _get_random_images_and_labels(image_shape, label_shape): - image = np.random.random(size=image_shape).astype('float32') - label = np.random.random(size=label_shape).astype('int64') - return image, label + IMAGE_SIZE = 784 + CLASS_NUM = 10 - def __reader__(): - for _ in range(BATCH_NUM): - batch_image, batch_label = _get_random_images_and_labels( - [BATCH_SIZE, 784], [BATCH_SIZE, 1]) - yield batch_image, batch_label + # define a random dataset + class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples - return __reader__ + def __getitem__(self, idx): + image = np.random.random([IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64') + return image, label + + def __len__(self): + return self.num_samples - class LinearNet(fluid.dygraph.Layer): - def __init__(self, in_size, out_size): + class LinearNet(nn.Layer): + def __init__(self): super(LinearNet, self).__init__() - self._linear = Linear(in_size, out_size) + self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) - @declarative + @paddle.jit.to_static def forward(self, x): return self._linear(x) + def train(layer, loader, loss_fn, opt): + for epoch_id in range(EPOCH_NUM): + for batch_id, (image, label) in enumerate(loader()): + out = layer(image) + loss = loss_fn(out, label) + loss.backward() + opt.step() + opt.clear_grad() + print("Epoch {} batch {}: loss = {}".format( + epoch_id, batch_id, np.mean(loss.numpy()))) + # enable dygraph mode - fluid.enable_dygraph() + place = paddle.CPUPlace() + paddle.disable_static(place) # 1. train & save model. - # create network - net = LinearNet(784, 1) - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=net.parameters()) - # create data loader - train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator(random_batch_reader()) - # train - for data in train_loader(): - img, label = data - label.stop_gradient = True - cost = net(img) + # create network + layer = LinearNet() + loss_fn = nn.CrossEntropyLoss() + adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters()) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) + # create data loader + dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) + loader = paddle.io.DataLoader(dataset, + places=place, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) - avg_loss.backward() - adam.minimize(avg_loss) - net.clear_gradients() + # train + train(layer, loader, loss_fn, adam) + # save model_path = "linear.example.model" - fluid.dygraph.jit.save( - layer=net, - model_path=model_path, - input_spec=[img]) + paddle.jit.save(layer, model_path) # 2. load model as TranslatedLayer - translated_layer = fluid.dygraph.jit.load(model_path) + + # load + translated_layer = paddle.jit.load(model_path) + # inference translated_layer.eval() - x = fluid.dygraph.to_variable(np.random.random((1, 784)).astype('float32')) + x = paddle.randn([1, IMAGE_SIZE], 'float32') pred = translated_layer(x) + # fine-tune translated_layer.train() - adam = fluid.optimizer.AdamOptimizer(learning_rate=0.1, parameter_list=translated_layer.parameters()) - train_loader = fluid.io.DataLoader.from_generator(capacity=5) - train_loader.set_batch_generator(random_batch_reader()) - for data in train_loader(): - img, label = data - label.stop_gradient = True - - cost = translated_layer(img) + adam = opt.Adam(learning_rate=0.001, parameters=translated_layer.parameters()) + train(translated_layer, loader, loss_fn, adam) - loss = fluid.layers.cross_entropy(cost, label) - avg_loss = fluid.layers.mean(loss) - - avg_loss.backward() - adam.minimize(avg_loss) - translated_layer.clear_gradients() """ def __init__(self, programs, persistable_vars): @@ -685,7 +702,7 @@ class TranslatedLayer(layers.Layer): # 1. load program desc & construct _ProgramHolder programs = _construct_program_holders(model_path, model_filename) - # 2. load layer parameters & parameter attributes + # 2. load layer parameters & buffers persistable_vars = _construct_params_and_buffers( model_path, programs, separate_params, params_filename) @@ -800,3 +817,107 @@ class TranslatedLayer(layers.Layer): def eval(self): self._is_test = True + + def program(self, method_name='forward'): + """ + Gets translated program of specified method. + + Args: + - method_name (string): mehtod name corresponding to the program + to be obtained. Default: 'forward'. + + Returns: + Program + + Examples: + .. code-block:: python + + import numpy as np + import paddle + import paddle.nn as nn + import paddle.optimizer as opt + + BATCH_SIZE = 16 + BATCH_NUM = 4 + EPOCH_NUM = 4 + + IMAGE_SIZE = 784 + CLASS_NUM = 10 + + # define a random dataset + class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples + + def __getitem__(self, idx): + image = np.random.random([IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64') + return image, label + + def __len__(self): + return self.num_samples + + class LinearNet(nn.Layer): + def __init__(self): + super(LinearNet, self).__init__() + self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) + + @paddle.jit.to_static + def forward(self, x): + return self._linear(x) + + def train(layer, loader, loss_fn, opt): + for epoch_id in range(EPOCH_NUM): + for batch_id, (image, label) in enumerate(loader()): + out = layer(image) + loss = loss_fn(out, label) + loss.backward() + opt.step() + opt.clear_grad() + print("Epoch {} batch {}: loss = {}".format( + epoch_id, batch_id, np.mean(loss.numpy()))) + + # enable dygraph mode + place = paddle.CPUPlace() + paddle.disable_static(place) + + # create network + layer = LinearNet() + loss_fn = nn.CrossEntropyLoss() + adam = opt.Adam(learning_rate=0.001, parameters=layer.parameters()) + + # create data loader + dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) + loader = paddle.io.DataLoader(dataset, + places=place, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) + + # train + train(layer, loader, loss_fn, adam) + + # save + model_path = "linear.example.model" + paddle.jit.save(layer, model_path) + + # load + translated_layer = paddle.jit.load(model_path) + + # get program + program = translated_layer.program() + """ + # 1. get program holder + program_holder = self._program_holder_dict.get(method_name, None) + if program_holder is None: + raise ValueError( + "The method `%s` is not exists in loaded TranslatedLayer." % + method_name) + + # 2. get inference program desc + program_desc = program_holder.infer_program + + # 3. construct program + program = _build_program_by_desc(program_desc) + return program diff --git a/python/paddle/fluid/dygraph/jit.py b/python/paddle/fluid/dygraph/jit.py index f67b79b91f7da235697d920cf0dfe376e88ab93e..9f4ec2b55bc6b56fc796d3124edf1ec0deb3f23e 100644 --- a/python/paddle/fluid/dygraph/jit.py +++ b/python/paddle/fluid/dygraph/jit.py @@ -293,6 +293,8 @@ class SaveLoadConfig(object): self._model_filename = None self._params_filename = None self._separate_params = False + # used for `paddle.load` + self._keep_name_table = False # NOTE: Users rarely use following configs, so these configs are not open to users, # reducing user learning costs, but we retain the configuration capabilities @@ -600,6 +602,54 @@ class SaveLoadConfig(object): % type(value)) self._separate_params = value + @property + def keep_name_table(self): + """ + Configures whether keep ``structured_name -> parameter_name`` dict in loaded state dict. + This dict is the debugging information saved when call `paddle.save`. + It is generally only used for debugging and does not affect the actual training or inference. + By default, it will not be retained in `paddle.load` result. Default: False. + + .. note:: + Only used for ``paddle.load``. + + Examples: + .. code-block:: python + + import paddle + + paddle.disable_static() + + linear = paddle.nn.Linear(5, 1) + + state_dict = linear.state_dict() + paddle.save(state_dict, "paddle_dy") + + configs = paddle.SaveLoadConfig() + configs.keep_name_table = True + para_state_dict, _ = paddle.load("paddle_dy", configs) + + print(para_state_dict) + # the name_table is 'StructuredToParameterName@@' + # {'bias': array([0.], dtype=float32), + # 'StructuredToParameterName@@': + # {'bias': u'linear_0.b_0', 'weight': u'linear_0.w_0'}, + # 'weight': array([[ 0.04230034], + # [-0.1222527 ], + # [ 0.7392676 ], + # [-0.8136974 ], + # [ 0.01211023]], dtype=float32)} + """ + return self._keep_name_table + + @keep_name_table.setter + def keep_name_table(self, value): + if not isinstance(value, bool): + raise TypeError( + "The SaveLoadConfig.keep_name_table should be bool value, but received input's type is %s." + % type(value)) + self._keep_name_table = value + @switch_to_static_graph def save(layer, model_path, input_spec=None, configs=None): diff --git a/python/paddle/fluid/dygraph/layers.py b/python/paddle/fluid/dygraph/layers.py index 1ef719b9da187be659d9c898ec996b5ad0c0d8a6..7075024369f328b59ecac014b0960fc26f447ff2 100644 --- a/python/paddle/fluid/dygraph/layers.py +++ b/python/paddle/fluid/dygraph/layers.py @@ -29,6 +29,9 @@ from .layer_object_helper import LayerObjectHelper from .base import program_desc_tracing_guard, param_guard from paddle.fluid import framework from ..param_attr import ParamAttr +from paddle.fluid.executor import Executor, global_scope +from paddle.fluid.framework import in_dygraph_mode +from paddle.fluid.framework import _current_expected_place as _get_device __all__ = ['Layer'] @@ -797,7 +800,7 @@ class Layer(core.Layer): raise ValueError( "super(YourLayer, self).__init__() should be called first") if len(self._loaddict_holder) > 0: - assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in stat_dict".format( + assert value.name in self._loaddict_holder, "Parameter not found, Can't not find [ {} ] in state_dict".format( value.name) value.set_value(self._loaddict_holder[value.name]) @@ -943,12 +946,13 @@ class Layer(core.Layer): destination = destination_temp return destination - def set_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): + @framework.deprecate_stat_dict + def set_state_dict(self, + state_dict, + include_sublayers=True, + use_structured_name=True): ''' - Set parameters and persistable buffers from stat_dict. All the parameters and buffers will be reset by the tensor in the stat_dict + Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict Parameters: state_dict(dict) : Dict contains all the parameters and persistable buffers. @@ -961,72 +965,67 @@ class Layer(core.Layer): Examples: .. code-block:: python - import paddle.fluid as fluid - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + import paddle + + paddle.disable_static() + + emb = paddle.nn.Embedding([10, 10]) - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") - - emb.set_dict( para_state_dict ) + state_dict = emb.state_dict() + paddle.save(state_dict, "paddle_dy") + + para_state_dict, _ = paddle.load("paddle_dy") - ''' - self.load_dict( - stat_dict, - include_sublayers=include_sublayers, - use_structured_name=use_structured_name) + emb.set_state_dict(para_state_dict) - def load_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): ''' - Set parameters and persistable buffers from stat_dict. All the parameters and persistabl buffers will be reset by the tensor in the stat_dict - This api will be Deprecated. Please use set_dict - - Parameters: - state_dict(dict) : Dict contains all the parameters and persistable buffers. - include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True - use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key. - Default: True - Returns: - None - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) - - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") - - emb.load_dict( para_state_dict ) - - ''' - - inner_state_dict = self.state_dict() + def _check_match(key, param): + state = state_dict.get(key, None) + if state is None: + raise ValueError("{} is not found in the provided dict.".format( + key)) + if list(state.shape) != list(param.shape): + raise ValueError( + "{} receives a shape {}, but the expected shape is {}.". + format(key, list(state.shape), list(param.shape))) + return param, state + + matched_param_state = [] + for key, param in self.state_dict().items(): + key_name = key if use_structured_name else param.name + try: + match_res = _check_match(key_name, param) + matched_param_state.append(match_res) + except ValueError as err: + warnings.warn(("Skip loading for {}. ".format(key) + str(err))) + + if in_dygraph_mode(): + for param, state in matched_param_state: + param.set_value(state) + else: - for name, param_or_buffer in inner_state_dict.items(): - key_name = name if use_structured_name else param_or_buffer.name - if key_name in stat_dict: - param_or_buffer.set_value(stat_dict[key_name]) - else: - raise RuntimeError( - "Parameter or persistable buffer not found, Can't find [ {} ] in stat_dict" - "use_structured_name is set to [{}]".format( - key_name, use_structured_name)) - unused_para_list = [] - for k, v in stat_dict.items(): - if k not in inner_state_dict: - unused_para_list.append(k) - if len(unused_para_list) > 0: - warnings.warn( - "Variables [ {} ] are not used, because not included in layers state_dict". - format(" ".join(unused_para_list))) + def _set_var(var, ndarray): + t = global_scope().find_var(var.name).get_tensor() + p = t._place() + if p.is_cpu_place(): + place = core.CPUPlace() + elif p.is_cuda_pinned_place(): + place = core.CUDAPinnedPlace() + else: + p = core.Place() + p.set_place(t._place()) + place = core.CUDAPlace(p.gpu_device_id()) + t.set(ndarray, place) + + executor = Executor(_get_device())._default_executor + # restore parameter states + core._create_loaded_parameter( + [param for param, state in matched_param_state], + global_scope(), executor) + for param, state in matched_param_state: + _set_var(param, state) + + # [aliases] Compatible with old method names + set_dict = set_state_dict + load_dict = set_state_dict diff --git a/python/paddle/fluid/dygraph/learning_rate_scheduler.py b/python/paddle/fluid/dygraph/learning_rate_scheduler.py index cce383be7e22cd066199f814db80a75367862b82..cd6af6fd5b575e8188088bde9e8944ab94c7e0f8 100644 --- a/python/paddle/fluid/dygraph/learning_rate_scheduler.py +++ b/python/paddle/fluid/dygraph/learning_rate_scheduler.py @@ -97,7 +97,7 @@ class LearningRateDecay(object): """ self.keys = ['step_num'] - def set_dict(self, state_dict): + def set_state_dict(self, state_dict): """ Loads the schedulers state. """ @@ -114,6 +114,9 @@ class LearningRateDecay(object): "There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict" ) + # [aliases] Compatible with old method names + set_dict = set_state_dict + def step(self): raise NotImplementedError() diff --git a/python/paddle/fluid/dygraph/math_op_patch.py b/python/paddle/fluid/dygraph/math_op_patch.py index bb55c6725e6a62f2cef393fd34b249c217be0c54..8c4109674200bf97354444f92f00b13e053152a0 100644 --- a/python/paddle/fluid/dygraph/math_op_patch.py +++ b/python/paddle/fluid/dygraph/math_op_patch.py @@ -41,7 +41,7 @@ def monkey_patch_math_varbase(): The difference is, in dygraph mode, use auto-generated op functions for better performance. """ - @no_grad() + @no_grad def create_tensor(value, dtype, shape): out = _varbase_creator(dtype=dtype) out = core.ops.fill_constant(out, 'dtype', dtype, 'shape', shape, diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py index 6fbf3bfe76f6ef1f699ef34bb7efe60247c8531c..472022bced7e3e2dd11d301501ebaec75e5e412a 100644 --- a/python/paddle/fluid/dygraph/parallel.py +++ b/python/paddle/fluid/dygraph/parallel.py @@ -445,7 +445,7 @@ class DataParallel(layers.Layer): self._reshape_inplace(x=g_var, shape=g_shape) assert g_var.shape == g_shape - @no_grad() + @no_grad def apply_collective_grads(self): """ AllReduce the Parameters' gradient. @@ -587,12 +587,13 @@ class DataParallel(layers.Layer): include_sublayers=include_sublayers, structured_name_prefix=structured_name_prefix) - def set_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): + @framework.deprecate_stat_dict + def set_state_dict(self, + state_dict, + include_sublayers=True, + use_structured_name=True): ''' - Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict + Set parameters of self._layers from state_dict. All the parameters of self._layers will be reset by the tensor in the state_dict Parameters: state_dict(dict) : Dict contains all the parameters @@ -605,62 +606,27 @@ class DataParallel(layers.Layer): Examples: .. code-block:: python - import paddle.fluid as fluid - with fluid.dygraph.guard(): - strategy=fluid.dygraph.prepare_context() - emb = fluid.dygraph.Embedding([10, 10]) - emb = fluid.dygraph.DataParallel(emb, strategy) - - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") - - emb.set_dict( para_state_dict ) + import paddle - ''' - - self._layers.set_dict( - stat_dict, - include_sublayers=include_sublayers, - use_structured_name=use_structured_name) - - def load_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): - ''' - Set parameters of self._layers from stat_dict. All the parameters of self._layers will be reset by the tensor in the stat_dict - - This api will be Deprecated. Please use set_dict - - Parameters: - state_dict(dict) : Dict contains all the parameters - include_sublayers(bool, optional) : If true, also include the parameters from sublayers. Default: True - use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter name as key. - Default: True - Returns: - None + paddle.disable_static() - Examples: - .. code-block:: python + emb = paddle.nn.Embedding([10, 10]) + emb = fluid.dygraph.DataParallel(emb, strategy) - import paddle.fluid as fluid - with fluid.dygraph.guard(): - strategy=fluid.dygraph.prepare_context() - emb = fluid.dygraph.Embedding([10, 10]) - emb = fluid.dygraph.DataParallel(emb, strategy) + state_dict = emb.state_dict() + paddle.save(state_dict, "paddle_dy") - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") + para_state_dict, _ = paddle.load("paddle_dy") - emb.load_dict( para_state_dict ) + emb.set_state_dict(para_state_dict) ''' - self._layers.load_dict( - stat_dict, + self._layers.set_state_dict( + state_dict, include_sublayers=include_sublayers, use_structured_name=use_structured_name) + + # [aliases] Compatible with old method names + set_dict = set_state_dict + load_dict = set_state_dict diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index fc4e91aad4fff1db325e17828d26ccd94c164c3d..5281df9ead10acea5ae8656dcc4a0eed14fb3e83 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -36,6 +36,7 @@ from . import core from . import unique_name import paddle.version as fluid_version import warnings +import functools __all__ = [ 'Program', @@ -238,6 +239,25 @@ def _fake_interface_only_(func): return __impl__ +# NOTE(chenweihang): There is argument name typo (stat_dict, correct name is state_dict) +# in fluid api Layer.set_dict, Optimizer.load, in order to correct the argument without +# introducing compatibility issues, add this decorator +# NOTE(chenweihang): not using `wrap_decorator` here is because `wrap_decorator` will +# move kwargs to args, which doesn't work in this decorate case +def deprecate_stat_dict(func): + @functools.wraps(func) + def wrapper(*args, **kwargs): + if 'stat_dict' in kwargs: + warnings.warn( + "The argument `stat_dict` has deprecated, please change it to `state_dict`.", + DeprecationWarning) + kwargs['state_dict'] = kwargs['stat_dict'] + kwargs.pop('stat_dict') + return func(*args, **kwargs) + + return wrapper + + dygraph_not_support = wrap_decorator(_dygraph_not_support_) dygraph_only = wrap_decorator(_dygraph_only_) fake_interface_only = wrap_decorator(_fake_interface_only_) diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py index 378c8fc23d7528766ca9eca062c87a4511e32b46..216478479a7cfdcffac5f21855d0974309842c89 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/public.py @@ -42,6 +42,9 @@ op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() LR_SCHED_OP_ROLE_ATTR_VALUE = core.op_proto_and_checker_maker.OpRole.LRSched OPT_OP_ROLE_ATTR_VALUE = core.op_proto_and_checker_maker.OpRole.Optimize +SPARSE_OP_LIST = ["lookup_table", "lookup_table_v2"] +SPARSE_OP_TYPE_DICT = {"lookup_table": "W", "lookup_table_v2": "W"} + def _get_lr_ops(program): lr_ops = [] @@ -66,7 +69,7 @@ def _has_global_step(lr_ops): def is_sparse_op(op): - if op.type == "lookup_table" and op.attr('is_sparse') is True and op.attr( + if op.type in SPARSE_OP_LIST and op.attr('is_sparse') is True and op.attr( 'is_distributed') is False: return True @@ -78,7 +81,7 @@ def is_sparse_op(op): def is_distributed_sparse_op(op): - if op.type == "lookup_table" and op.attr('is_distributed') is True: + if op.type in SPARSE_OP_LIST and op.attr('is_distributed') is True: return True if op.type == "distributed_lookup_table" and op.attr( @@ -802,11 +805,10 @@ class CompileTimeStrategy(object): def _get_sparse_varnames(): varnames = [] - op_types = {"lookup_table": "W"} for op in origin_program.global_block().ops: - if op.type in op_types.keys() \ + if op.type in SPARSE_OP_TYPE_DICT.keys() \ and op.attr('remote_prefetch') is True: - param_name = op.input(op_types[op.type])[0] + param_name = op.input(SPARSE_OP_TYPE_DICT[op.type])[0] varnames.append(param_name) return list(set(varnames)) diff --git a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py index 201b3863a4b6d6d5fed036d85b2103f5defe61f0..4543af9820e8c9326098fa254494ca1c896d3b12 100644 --- a/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py +++ b/python/paddle/fluid/incubate/fleet/parameter_server/ir/trainer_pass.py @@ -1,3 +1,4 @@ +# -*- coding: UTF-8 -*- # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -40,6 +41,8 @@ LR_SCHED_OP_ROLE_ATTR_VALUE = core.op_proto_and_checker_maker.OpRole.LRSched OPT_OP_ROLE_ATTR_VALUE = core.op_proto_and_checker_maker.OpRole.Optimize op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName() +SPARSE_OP_TYPE_DICT = {"lookup_table": "W", "lookup_table_v2": "W"} + DEVICE_LIST = ["cpu", "gpu", "xpu"] COMMUNICATE_OPS_TYPE = ["send", "recv", "fetch_barrier", "send_barrier"] DEFAULT_DEVICE = 'cpu' @@ -81,11 +84,10 @@ def distributed_ops_pass(program, config): def _get_pull_sparse_ops(_program): pull_sparse_ops = {} - op_types = {"lookup_table": "W"} for op in _program.global_block().ops: - if op.type in op_types.keys() \ + if op.type in SPARSE_OP_TYPE_DICT.keys() \ and op.attr('remote_prefetch') is True: - param_name = op.input(op_types[op.type])[0] + param_name = op.input(SPARSE_OP_TYPE_DICT[op.type])[0] ops = pull_sparse_ops.get(param_name, []) ops.append(op) pull_sparse_ops[param_name] = ops @@ -101,6 +103,7 @@ def distributed_ops_pass(program, config): w = program.global_block().vars[ops[0].input("W")[0]] padding_idx = ops[0].attr("padding_idx") is_distributed = ops[0].attr("is_distributed") + op_type = ops[0].type outputs = [ program.global_block().vars[op.output("Out")[0]] for op in ops @@ -149,7 +152,8 @@ def distributed_ops_pass(program, config): "is_distributed": is_distributed, "pserver_num": len(pserver_endpoints), "padding_idx": padding_idx, - "trainer_id": trainer_id + "trainer_id": trainer_id, + "lookup_table_version": op_type }) else: raise ValueError( @@ -438,7 +442,23 @@ def find_heter_ops(program, default_device="cpu"): def create_heter_program(program, config, heter_program, heter_ops, block_var_detail, current_device): - # add heter op + + # This function mainly includes the following contents: + # 1. For every heter block: + # a) copy heter device op from origin program + # b) create variables which belong to heter op: + # -> if variable is persistable, clone it in global_scope + # -> if variable is temp, create it in heter block + # c) create communicate related op as follow: + # joint_var.0_1 -> slice -> reshape -> origin_var + # origin_var -> origin_program + # reshape -> concat -> joint_var.1_2 + # d) copy send op from origin program for var@grad which loacted in current heter block + # e) re-check every op in current blcok if its device is not current heter devie + # 2. Create send op for step counter in last heter-block + # 3. Create Listen&Serv OP for distributed training + # 4. update CompileTimeStrategy for heter_program + optimizer_block = [] grad_to_block_id = [] send_grad_var_list = [] @@ -450,17 +470,10 @@ def create_heter_program(program, config, heter_program, heter_ops, for _, op in enumerate(heter_block_ops): block_append_op(heter_program, program, heter_block, op) - # add relate variables - inputs = _get_input_map_from_op(program.global_block().vars, op) - add_vars_by_op_map(inputs, heter_program) - - outputs = _get_output_map_from_op(program.global_block().vars, op) - add_vars_by_op_map(outputs, heter_program) - entrance_vars = block_var_detail[index]["entrance"] - add_vars_by_var_list(entrance_vars, program, heter_program) + add_vars_by_var_list(entrance_vars, program, heter_program, heter_block) exit_vars = block_var_detail[index]["exit"] - add_vars_by_var_list(exit_vars, program, heter_program) + add_vars_by_var_list(exit_vars, program, heter_program, heter_block) comm_info = get_communicate_var_info(program, index, entrance_vars, exit_vars) @@ -468,13 +481,13 @@ def create_heter_program(program, config, heter_program, heter_ops, grad_to_block_id.append(comm_info["block_input_var_name"] + ":" + str( heter_block.idx)) - # create slice op first_op_index = 0 get_type_var_name = comm_info["input_var_reshape_name"][0].split( ".input_reshape@Heter")[0] - get_type_var = heter_program.global_block().vars[get_type_var_name] + get_type_var = heter_block.vars[get_type_var_name] + # create slice op insert_recv_slice_op( heter_program, heter_block, first_op_index, comm_info["block_input_var_name"], @@ -484,6 +497,13 @@ def create_heter_program(program, config, heter_program, heter_ops, for i in range(len(comm_info["input_var_reshape_dim"])) ]) first_op_index += len(comm_info["input_var_reshape_dim"]) + + heter_program.global_block().create_var( + name=comm_info["block_input_var_name"], + shape=(-1, sum(comm_info["input_var_reshape_dim"])), + dtype=get_type_var.dtype, + type=get_type_var.type) + # create reshape op for i in range(len(comm_info["input_var_reshape_name"])): var_name = entrance_vars[i] @@ -511,13 +531,14 @@ def create_heter_program(program, config, heter_program, heter_ops, comm_info["block_output_var_name"], [-1, sum(comm_info["output_var_reshape_dim"])]) check_op_device(heter_block, current_device) + + # add send op send_grad_var_list = send_grad_var_list + add_heter_send_op( program, heter_program, heter_block, block_var_detail[index]) # add step conter send_input_vars = [] dummy_output = [] - trainer_id = config.get_role_id() pserver_endpoints = config.get_ps_endpoints() optimizer_block[-1].append_op( type="send", @@ -552,7 +573,6 @@ def create_heter_program(program, config, heter_program, heter_ops, # append the listen_and_serv op heter_program.global_block().append_op( type="listen_and_serv", inputs={'X': []}, outputs={}, attrs=attrs) - check_heter_compile_time_strategy(program, config, send_grad_var_list) @@ -571,6 +591,16 @@ def check_heter_compile_time_strategy(program, config, send_grad_var_list): def create_trainer_program(program, config, heter_ops, block_var_detail): + # This function mainly includes the following contents: + # 1. For every heter block in origin program + # a) delete heter op and related variables + # b) add send&recv op + # c) add communicate ops as follows: + # origin_var -> reshape -> concat -> joint_var.0_1 + # send&recv op(send joint_var.0_1; recv joint_var.1_2) + # joint_var.1_2 -> slice -> reshape -> origin_var + # d) remove send op which related var@grad is not in trainer program + # 2. check every op's device for device in heter_ops.keys(): for heter_block_index in sorted(heter_ops[device]): replace_ops_by_communicate_op(program, config, heter_block_index, @@ -929,19 +959,19 @@ def insert_reshape_op(program, var_name, new_var_name, new_var_shape=None): - input_var = program.global_block().vars[var_name] + input_var = block.vars[var_name] - if new_var_name not in program.global_block().vars: - out = program.global_block().create_var( + if new_var_name not in block.vars: + out = block.create_var( name=new_var_name, shape=new_var_shape, dtype=input_var.dtype, type=input_var.type) else: - out = program.global_block().vars[new_var_name] + out = block.vars[new_var_name] new_var_shape = out.shape - x_shape = program.global_block().create_var( + x_shape = block.create_var( name="{}.xshape@Heter".format(var_name), dtype=input_var.dtype) block._insert_op( index=index, @@ -954,9 +984,7 @@ def insert_reshape_op(program, def insert_send_concat_op(program, block, index, var_name_list, new_var_name, new_var_shape): - input_var_list = [ - program.global_block().vars[var_name] for var_name in var_name_list - ] + input_var_list = [block.vars[var_name] for var_name in var_name_list] out = program.global_block().create_var( name=new_var_name, @@ -984,14 +1012,14 @@ def insert_recv_slice_op(program, block, index, var_name, var_shape, dtype, out_list = [] for i in range(len(new_var_name_list)): - if new_var_name_list[i] not in program.global_block().vars: - out = program.global_block().create_var( + if new_var_name_list[i] not in block.vars: + out = block.create_var( name=new_var_name_list[i], shape=new_var_shape_list[i], dtype=input_var.dtype, type=input_var.type) else: - out = program.global_block().vars[new_var_name_list[i]] + out = block.vars[new_var_name_list[i]] out_list.append(out) start_index = 0 @@ -1034,21 +1062,33 @@ def deleter_trainer_useless_var(program): def block_append_op(program, origin_program, block, op): - inputs = _get_input_map_from_op(origin_program.global_block().vars, op) + merge_ordereddict = origin_program.global_block().vars.copy() + merge_ordereddict.update(block.vars) + inputs = _get_input_map_from_op(merge_ordereddict, op) for key, varlist in six.iteritems(inputs): if not isinstance(varlist, list): varlist = [varlist] for var in varlist: - if var.name not in program.global_block().vars: - program.global_block()._clone_variable(var) + if var.name not in program.global_block( + ).vars and var.name not in block.vars: + if var.persistable: + program.global_block()._clone_variable( + var, force_persistable=False) + else: + block._clone_variable(var, force_persistable=False) outputs = _get_output_map_from_op(origin_program.global_block().vars, op) for key, varlist in six.iteritems(outputs): if not isinstance(varlist, list): varlist = [varlist] for var in varlist: - if var.name not in program.global_block().vars: - program.global_block()._clone_variable(var) + if var.name not in program.global_block( + ).vars and var.name not in block.vars: + if var.persistable: + program.global_block()._clone_variable( + var, force_persistable=False) + else: + block._clone_variable(var, force_persistable=False) if "_grad" not in op.type: # for forward op @@ -1073,21 +1113,15 @@ def block_append_op(program, origin_program, block, op): block._sync_with_cpp() -def add_vars_by_op_map(var_map, program): - for key, varlist in six.iteritems(var_map): - if not isinstance(varlist, list): - varlist = [varlist] - for i in range(len(varlist)): - var = varlist[i] - if var.name not in program.global_block().vars: - program.global_block()._clone_variable(var) - - -def add_vars_by_var_list(var_name_list, origin_program, program): +def add_vars_by_var_list(var_name_list, origin_program, program, block): for var_name in var_name_list: if var_name not in program.global_block().vars: var = origin_program.global_block().vars[var_name] - program.global_block()._clone_variable(var) + if var.persistable: + program.global_block()._clone_variable( + var, force_persistable=False) + else: + block._clone_variable(var, force_persistable=False) def get_varlist_from_op_map(var_map): diff --git a/python/paddle/fluid/input.py b/python/paddle/fluid/input.py index 15a3022f932f4a702bf7f94ed936468b6a06e94e..529588c0846b5a90a842c398bbb4409a04f35d53 100644 --- a/python/paddle/fluid/input.py +++ b/python/paddle/fluid/input.py @@ -129,6 +129,7 @@ def one_hot(input, depth, allow_out_of_range=False): return one_hot_out +@deprecated(since='2.0.0', update_to='paddle.nn.functional.embedding') def embedding(input, size, is_sparse=False, diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index 19822e410c71aa993e2d90a92c57c3522023ad81..db556913384785e1f11ba05dcc524ef1f1de92ab 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -147,8 +147,10 @@ class LayerHelper(LayerHelperBase): if 'use_cudnn' in self.kwargs and self.kwargs.get('use_cudnn'): act['use_cudnn'] = self.kwargs.get('use_cudnn') - if 'use_mkldnn' in self.kwargs: - act['use_mkldnn'] = self.kwargs.get('use_mkldnn') + use_mkldnn = self.kwargs.get( + 'use_mkldnn', core.globals().get("FLAGS_use_mkldnn", False)) + if use_mkldnn: + act['use_mkldnn'] = use_mkldnn act_type = act.pop('type') tmp = self.create_variable_for_type_inference(dtype=input_var.dtype) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 19c46fd21b1cda5f6f3155250fb953ce9a962bb2..9313de8c64fcf4efc1e192ad2826f05f51869bbf 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -367,6 +367,7 @@ def fc(input, return helper.append_activation(pre_activation) +@deprecated(since="2.0.0", update_to="paddle.nn.functional.embedding") def embedding(input, size, is_sparse=False, @@ -4814,11 +4815,6 @@ def split(input, num_or_sections, dim=-1, name=None): Returns: list(Tensor): The list of segmented Tensors. - Raises: - TypeError: The data type of ``input`` must be one of bool, float16, float32, float64, int32, int64. - TypeError: ``num_or_sections`` is not int, list or tuple. - TypeError: ``dim`` is not int or Tensor. The data type of ``dim`` must be int32 or int64 when it's a Tensor. - Example: .. code-block:: python @@ -6102,11 +6098,6 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=False, name=None): Returns: Tensor: A reshaped Tensor with the same data type as ``x``. It is a new tensor variable if ``inplace`` is ``False``, otherwise it is ``x``. If ``act`` is None, return the reshaped tensor variable, otherwise return the activated tensor variable. - Raises: - TypeError: If actual_shape is neither Tensor nor None. - ValueError: If more than one elements of ``shape`` is -1. - ValueError: If the element of ``shape`` is 0, the corresponding dimension should be less than or equal to the dimension of ``x``. - ValueError: If the elements in ``shape`` is negative except -1. Examples: .. code-block:: python @@ -8255,10 +8246,6 @@ def gather(input, index, overwrite=True): Returns: output (Tensor): The output is a tensor with the same rank as input. - Raises: - TypeError: ``x`` must be a Tensor and the data type of ``x`` must to be one of float16, float32, float64, int32, int64, uint8. - TypeError: ``index`` must be a Tensor and the data type of ``index`` must be int32 or int64. - Examples: .. code-block:: python @@ -8348,10 +8335,6 @@ def gather_nd(input, index, name=None): Returns: output (Tensor): A tensor with the shape index.shape[:-1] + input.shape[index.shape[-1]:] - - Raises: - TypeError: ``input`` must be a Tensor and the data type of ``input`` must be one of float32, float64, int32 and int64. - TypeError: ``index`` must be a Tensor and the data type of ``index`` must be one of int32 and int64. Examples: @@ -10017,15 +10000,16 @@ def stack(x, axis=0, name=None): Args: - x (Variable|list(Variable)): Input :code:`x` can be a single Tensor, a :code:`list` of Tensors. - If :code:`x` is a :code:`list`, the shapes of all these Tensors + x (list(Variable)|tuple(Variable)): Input :code:`x` can be a :code:`list` or :code:`tuple` of Tensors, the shapes of all these Tensors must be the same. Supposing input is N dims Tensors :math:`[d_0, d_1, ..., d_{n-1}]`, the output is N+1 dims Tensor :math:`[d_0, d_1, d_{axis-1}, len(x), d_{axis}, ..., d_{n-1}]`. Supported data types: float32, float64, int32, int64. - axis (int, optional): The axis along which all inputs are stacked. ``axis`` range is :math:`[-(R+1), R+1)`. - R is the first tensor of inputs. If ``axis`` < 0, :math:`axis=axis+rank(x[0])+1`. - The default value of axis is 0. + axis (int, optional): The axis along which all inputs are stacked. ``axis`` range is ``[-(R+1), R+1)``, + where ``R`` is the number of dimensions of the first input tensor ``x[0]``. + If ``axis < 0``, ``axis = axis+R+1``. The default value of axis is 0. + name (str, optional): Please refer to :ref:`api_guide_Name`, Default None. + Returns: Variable: The stacked Tensor, has same data type with input Tensors. Output dim is :math:`rank(x[0])+1`. @@ -10043,18 +10027,27 @@ def stack(x, axis=0, name=None): data = layers.stack([x1,x2], axis=1) # stack according to axis 1, data.shape=[None, 2, 1, 2] - # stack single Tensor - data = layers.stack(x1) # stack according to axis 0, data.shape=[1, None, 1, 2] """ axis = 0 if axis is None else axis - if not isinstance(x, list) and not isinstance(x, tuple): - x = [x] if in_dygraph_mode(): return core.ops.stack(x, 'axis', axis) + if not isinstance(x, list) and not isinstance(x, tuple): + # NOTE:(zhiqiu) Only support Variable as input if the Variable is a LOD_TENSOR_ARRAY create by create_array, array_write, array_read, etc. + # In that case, Variable is array of tensors indeed. + if isinstance(x, Variable) and x.desc.type( + ) == core.VarDesc.VarType.LOD_TENSOR_ARRAY: + x = [x] + else: + raise TypeError("The type of '%s' in %s must be %s, but received %s" + % ('x', 'stack', + 'list[Tensor], tuple[Tensor] or TensorArray', + type(x))) + helper = LayerHelper('stack', **locals()) + out = helper.create_variable_for_type_inference(x[0].dtype) if x[0].desc.type() == core.VarDesc.VarType.LOD_TENSOR_ARRAY: assert len(x) == 1, "If the elements of 'x' in stack are Variable(LoDTensorArray), " \ @@ -10599,7 +10592,7 @@ def gaussian_random(shape, dtype = convert_np_dtype_to_dtype_(dtype) if in_dygraph_mode(): - shape = utils._convert_shape_to_list(shape) + shape = utils.convert_shape_to_list(shape) return core.ops.gaussian_random('shape', shape, 'mean', float(mean), 'std', float(std), 'seed', seed, 'dtype', @@ -10616,7 +10609,7 @@ def gaussian_random(shape, 'dtype': dtype, 'use_mkldnn': False } - utils._get_shape_tensor_inputs( + utils.get_shape_tensor_inputs( inputs=inputs, attrs=attrs, shape=shape, @@ -12030,6 +12023,8 @@ for func in [ elementwise_floordiv, ]: op_proto = OpProtoHolder.instance().get_op_proto(func.__name__) + + # insert the c++ doc string on top of python doc string func.__doc__ = _generate_doc_string_( op_proto, additional_args_lines=[ @@ -12047,6 +12042,16 @@ for func in [ "mkldnn_data_type", "Scale_x", "Scale_y", "Scale_out" }) + """\n""" + str(func.__doc__) + doc_list = func.__doc__.splitlines() + + for idx, val in enumerate(doc_list): + if val.startswith("Warning: ") and val.endswith( + " instead." + ) and "and will be removed in future versions." in val: + doc_list.insert(0, doc_list.pop(idx)) + func.__doc__ = "\n" + "\n".join(i for i in doc_list) + break + for func in []: op_proto = OpProtoHolder.instance().get_op_proto(func.__name__) func.__doc__ = _generate_doc_string_( @@ -12152,13 +12157,10 @@ def logical_and(x, y, out=None, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - x_data = np.array([True], dtype=np.bool) - y_data = np.array([True, False, True, False], dtype=np.bool) - x = paddle.to_tensor(x_data) - y = paddle.to_tensor(y_data) + x = paddle.to_tensor([True]) + y = paddle.to_tensor([True, False, True, False]) res = paddle.logical_and(x, y) print(res.numpy()) # [True False True False] """ @@ -12271,11 +12273,9 @@ def logical_not(x, out=None, name=None): Examples: .. code-block:: python import paddle - import numpy as np paddle.disable_static() - x_data = np.array([True, False, True, False], dtype=np.bool) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([True, False, True, False]) res = paddle.logical_not(x) print(res.numpy()) # [False True False True] """ @@ -15093,7 +15093,7 @@ def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0, dtype = convert_np_dtype_to_dtype_(dtype) if in_dygraph_mode(): - shape = utils._convert_shape_to_list(shape) + shape = utils.convert_shape_to_list(shape) return core.ops.uniform_random('shape', shape, 'min', float(min), 'max', float(max), 'seed', seed, 'dtype', dtype) @@ -15103,7 +15103,7 @@ def uniform_random(shape, dtype='float32', min=-1.0, max=1.0, seed=0, inputs = dict() attrs = {'seed': seed, 'min': min, 'max': max, 'dtype': dtype} - utils._get_shape_tensor_inputs( + utils.get_shape_tensor_inputs( inputs=inputs, attrs=attrs, shape=shape, op_type='uniform_random/rand') helper = LayerHelper("uniform_random", **locals()) diff --git a/python/paddle/fluid/layers/ops.py b/python/paddle/fluid/layers/ops.py index 84cacea6ba5723f8a06fc87fa9c59d96f802e65a..1efae3ddf1f3422a53f69c4b5b8eeec6183fae96 100644 --- a/python/paddle/fluid/layers/ops.py +++ b/python/paddle/fluid/layers/ops.py @@ -86,13 +86,11 @@ add_sample_code(globals()["sigmoid"], r""" Examples: .. code-block:: python - import numpy as np import paddle import paddle.nn.functional as F paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = F.sigmoid(x) print(out.numpy()) # [0.40131234 0.450166 0.52497919 0.57444252] @@ -103,13 +101,11 @@ add_sample_code(globals()["logsigmoid"], r""" Examples: .. code-block:: python - import numpy as np import paddle import paddle.nn.functional as F paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = F.logsigmoid(x) print(out.numpy()) # [-0.91301525 -0.79813887 -0.64439666 -0.55435524] @@ -120,12 +116,10 @@ add_sample_code(globals()["exp"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.exp(x) print(out.numpy()) # [0.67032005 0.81873075 1.10517092 1.34985881] @@ -136,12 +130,10 @@ add_sample_code(globals()["tanh"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.tanh(x) print(out.numpy()) # [-0.37994896 -0.19737532 0.09966799 0.29131261] @@ -152,12 +144,10 @@ add_sample_code(globals()["atan"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.atan(x) print(out.numpy()) # [-0.38050638 -0.19739556 0.09966865 0.29145679] @@ -170,11 +160,10 @@ Examples: import paddle import paddle.nn.functional as F - import numpy as np paddle.disable_static() - x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = F.tanhshrink(x) # [-0.020051, -0.00262468, 0.000332005, 0.00868739] """) @@ -183,12 +172,10 @@ add_sample_code(globals()["sqrt"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([0.1, 0.2, 0.3, 0.4]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([0.1, 0.2, 0.3, 0.4]) out = paddle.sqrt(x) print(out.numpy()) # [0.31622777 0.4472136 0.54772256 0.63245553] @@ -199,12 +186,10 @@ add_sample_code(globals()["rsqrt"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([0.1, 0.2, 0.3, 0.4]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([0.1, 0.2, 0.3, 0.4]) out = paddle.rsqrt(x) print(out.numpy()) # [3.16227766 2.23606798 1.82574186 1.58113883] @@ -215,12 +200,10 @@ add_sample_code(globals()["abs"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.abs(x) print(out.numpy()) # [0.4 0.2 0.1 0.3] @@ -231,12 +214,10 @@ add_sample_code(globals()["ceil"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.ceil(x) print(out.numpy()) # [-0. -0. 1. 1.] @@ -247,12 +228,10 @@ add_sample_code(globals()["floor"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.floor(x) print(out.numpy()) # [-1. -1. 0. 0.] @@ -263,12 +242,10 @@ add_sample_code(globals()["cos"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.cos(x) print(out.numpy()) # [0.92106099 0.98006658 0.99500417 0.95533649] @@ -279,12 +256,10 @@ add_sample_code(globals()["acos"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.acos(x) print(out.numpy()) # [1.98231317 1.77215425 1.47062891 1.26610367] @@ -295,12 +270,10 @@ add_sample_code(globals()["sin"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.sin(x) print(out.numpy()) # [-0.38941834 -0.19866933 0.09983342 0.29552021] @@ -311,12 +284,10 @@ add_sample_code(globals()["asin"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.asin(x) print(out.numpy()) # [-0.41151685 -0.20135792 0.10016742 0.30469265] @@ -327,12 +298,10 @@ add_sample_code(globals()["cosh"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.cosh(x) print(out.numpy()) # [1.08107237 1.02006676 1.00500417 1.04533851] @@ -343,12 +312,10 @@ add_sample_code(globals()["sinh"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.sinh(x) print(out.numpy()) # [-0.41075233 -0.201336 0.10016675 0.30452029] @@ -359,12 +326,10 @@ add_sample_code(globals()["round"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.5, -0.2, 0.6, 1.5]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.5, -0.2, 0.6, 1.5]) out = paddle.round(x) print(out.numpy()) # [-1. -0. 1. 2.] @@ -375,12 +340,10 @@ add_sample_code(globals()["reciprocal"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.reciprocal(x) print(out.numpy()) # [-2.5 -5. 10. 3.33333333] @@ -391,12 +354,10 @@ add_sample_code(globals()["square"], r""" Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_variable(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.square(x) print(out.numpy()) # [0.16 0.04 0.01 0.09] @@ -409,11 +370,10 @@ Examples: import paddle import paddle.nn.functional as F - import numpy as np paddle.disable_static() - x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = F.softplus(x) # [0.513015, 0.598139, 0.744397, 0.854355] """) @@ -424,11 +384,10 @@ Examples: import paddle import paddle.nn.functional as F - import numpy as np paddle.disable_static() - x = paddle.to_tensor(np.array([-0.4, -0.2, 0.1, 0.3])) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = F.softsign(x) # [-0.285714, -0.166667, 0.0909091, 0.230769] """) @@ -761,11 +720,9 @@ Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_tensor(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.erf(x) print(out.numpy()) # [-0.42839236 -0.22270259 0.11246292 0.32862676] diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 77a78eb4a14a0a5ad9be9cff71131ca473106ab8..a90551c1b7b4fd45ae9a0e1cfa225a87db811295 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -29,6 +29,7 @@ from ..data_feeder import check_variable_and_dtype, check_type, check_dtype, con from paddle.utils import deprecated import numpy import warnings +from .utils import check_shape __all__ = [ 'create_tensor', 'create_parameter', 'create_global_var', 'cast', @@ -276,11 +277,6 @@ def concat(input, axis=0, name=None): name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. - Raises: - TypeError: ``input`` must be one of list, tuple or Tensor. - TypeError: The data type of ``input`` must be one of bool, float16, float32, float64, int32 and int64. - TypeError: The ``axis`` must be int or Tensor. The dtype of ``axis`` must be int32 or int64 when it's a Tensor. - TypeError: All the Tensors in ``input`` must have the same data type. Returns: Tensor: A Tensor with the same data type as ``input``. @@ -657,12 +653,6 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): Returns: Tensor: Tensor which is created according to shape and dtype. - Raises: - TypeError: The dtype must be one of bool, float16, float32, float64, int32 and int64 - and the data type of ``out`` must be the same as the ``dtype``. - TypeError: The shape must be one of list, tuple and Tensor, the data type of ``shape`` - must be int32 or int64 when ``shape`` is a Tensor - Examples: .. code-block:: python @@ -694,7 +684,7 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): attrs['str_value'] = str(float(value)) if in_dygraph_mode(): - shape = utils._convert_shape_to_list(shape) + shape = utils.convert_shape_to_list(shape) if out is None: out = _varbase_creator(dtype=dtype) @@ -718,20 +708,18 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None, name=None): value = cast(value, dtype) inputs['ValueTensor'] = value + check_shape(shape) check_dtype(dtype, 'dtype', ['bool', 'float16', 'float32', 'float64', 'int32', 'int64'], 'fill_constant') check_type(shape, 'shape', (Variable, list, tuple), 'fill_constant') - if isinstance(shape, Variable): - check_dtype(shape.dtype, 'shape', ['int32', 'int64'], 'fill_constant') - if out is not None: check_variable_and_dtype(out, 'out', [convert_dtype(dtype)], 'fill_constant') helper = LayerHelper("fill_constant", **locals()) - utils._get_shape_tensor_inputs( + utils.get_shape_tensor_inputs( inputs=inputs, attrs=attrs, shape=shape, op_type='fill_constant') if out is None: @@ -1050,10 +1038,6 @@ def ones(shape, dtype, force_cpu=False): Returns: Tensor: A tensor of data type :attr:`dtype` with shape :attr:`shape` and all elements set to 1. - Raises: - TypeError: The ``dtype`` must be one of bool, float16, float32, float64, int32, int64. - TypeError: The ``shape`` must be one of list, tuple and Tensor. The data type of ``shape`` must - be int32 or int64 when it's a Tensor. Examples: .. code-block:: python @@ -1086,10 +1070,6 @@ def zeros(shape, dtype, force_cpu=False, name=None): Returns: Tensor: A tensor of data type :attr:`dtype` with shape :attr:`shape` and all elements set to 0. - Raises: - TypeError: The ``dtype`` must be one of bool, float16, float32, float64, int32, int64. - TypeError: The ``shape`` must be one of list, tuple and Tensor. The data type of ``shape`` must - be int32 or int64 when it's a Tensor. Examples: .. code-block:: python @@ -1453,14 +1433,6 @@ def linspace(start, stop, num, dtype=None, name=None): the data shape of this tensor is :math:`[num]` . If the :attr:`num` is set 1, the output tensor just has \ the value with input :attr:`start`. - Raises: - TypeError: The ``dtype`` must be one of int32, int64, float32 and float64. - TypeError: The type of ``num`` must be int When it's not a Tensor. - TypeError: The data type of ``num`` must be int32 When it's a Tensor. - TypeError: The data type of ``start`` and ``stop`` must be same as ``dtype`` When it's a Tensor. - - - Examples: .. code-block:: python @@ -1474,6 +1446,8 @@ def linspace(start, stop, num, dtype=None, name=None): tensor_num = num tensor_start = start tensor_stop = stop + if not isinstance(num, Variable): + check_type(num, 'num', (int), 'linspace') if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) if not isinstance(start, Variable): @@ -1488,21 +1462,32 @@ def linspace(start, stop, num, dtype=None, name=None): helper = LayerHelper("linspace", **locals()) + start_dtype = convert_dtype(tensor_start.dtype) + stop_dtype = convert_dtype(tensor_stop.dtype) + out_dtype = convert_dtype(dtype) if isinstance(start, Variable): - check_dtype(start.dtype, 'start', (convert_dtype(dtype)), 'linspace') + check_dtype(start.dtype, 'start', + ['float32', 'float64', 'int32', 'int64'], 'linspace') else: check_type(start, 'start', (int, float), 'linspace') if isinstance(stop, Variable): - check_dtype(stop.dtype, 'stop', (convert_dtype(dtype)), 'linspace') + check_dtype(stop.dtype, 'stop', + ['float32', 'float64', 'int32', 'int64'], 'linspace') else: check_type(stop, 'stop', (int, float), 'linspace') if isinstance(num, Variable): check_dtype(num.dtype, 'num', ['int32'], 'linspace') - else: - check_type(num, 'num', (int), 'linspace') check_dtype(dtype, 'dtype', ['int32', 'int64', 'float32', 'float64'], 'linspace') + if ((stop_dtype == "float64" or start_dtype == "float64") and + out_dtype in ["float32", "int32"]) or ((stop_dtype == "int64" or + start_dtype == "int64") and + out_dtype == "int32"): + raise ValueError( + "The dtype of start/stop is {}/{} but the attr(dtype) of linspace is {}, " + "which may cause data type overflows. Please reset attr(dtype) of linspace." + .format(start_dtype, stop_dtype, dtype)) out = helper.create_variable_for_type_inference(dtype=dtype) @@ -1629,9 +1614,6 @@ def eye(num_rows, Returns: Tensor: An identity Tensor or LoDTensor of shape batch_shape + [num_rows, num_columns]. - Raises: - TypeError: The `dtype` must be one of float16, float32, float64, int32 and int64. - TypeError: The `num_columns` must be non-negative int. Examples: .. code-block:: python diff --git a/python/paddle/fluid/layers/utils.py b/python/paddle/fluid/layers/utils.py index 0d6965239e14b92d3d4997a9cf8efbe3fa7048b7..2095c9957e75b94396e573eba341f4cfded5dbc8 100644 --- a/python/paddle/fluid/layers/utils.py +++ b/python/paddle/fluid/layers/utils.py @@ -20,6 +20,7 @@ import numpy as np from ..framework import Variable from ..data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype from ..layer_helper import LayerHelper +from sys import version_info def convert_to_list(value, n, name, dtype=np.int): @@ -282,7 +283,7 @@ def _contain_var(list_or_tuple): return False -def _get_shape_tensor_inputs(inputs, attrs, shape, op_type): +def get_shape_tensor_inputs(inputs, attrs, shape, op_type): from .tensor import fill_constant, cast def _get_attr_shape(list_shape): @@ -347,7 +348,7 @@ def _convert_to_tensor_list(old_list, dtype="int32"): return new_list_tensor -def _convert_shape_to_list(shape): +def convert_shape_to_list(shape): """ Convert shape(list, tuple, variable) to list in imperative mode """ @@ -358,3 +359,22 @@ def _convert_shape_to_list(shape): else: shape = list(shape.numpy().astype(int)) return shape + + +def check_shape(shape): + """ + Check shape type and shape elements type before passing it to fill_constant + """ + if isinstance(shape, Variable): + check_dtype(shape.dtype, 'shape', ['int32', 'int64'], 'fill_constant') + else: + for ele in shape: + if not isinstance(ele, Variable): + if ele < 0: + raise ValueError( + "All elements in ``shape`` must be positive when it's a list or tuple" + ) + if not isinstance(ele, six.integer_types): + raise TypeError( + "All elements in ``shape`` must be integers when it's a list or tuple" + ) diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 8f34576b836a5412a6792a6dfd63b3c9fd8de560..8b37cfef3890eace0ff5141eeb91d85e78f1c964 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -61,7 +61,7 @@ class Optimizer(object): but need to use one of it's implementation. """ - @imperative_base.no_grad() + @imperative_base.no_grad def __init__(self, learning_rate, parameter_list=None, @@ -170,7 +170,7 @@ class Optimizer(object): return state_dict @framework.dygraph_only - def set_dict(self, state_dict): + def set_state_dict(self, state_dict): ''' Load optimizer state dict. For Adam optimizer, contains beta1, beta2, momentum etc. If LearningRateDecay have been used, global_step will be changed. @@ -182,20 +182,22 @@ class Optimizer(object): Examples: .. code-block:: python - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) + import paddle + + paddle.disable_static() + + emb = paddle.nn.Embedding([10, 10]) - state_dict = emb.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") + state_dict = emb.state_dict() + paddle.save(state_dict, "paddle_dy") - adam = fluid.optimizer.Adam(learning_rate=fluid.layers.noam_decay( 100, 10000), + adam = paddle.optimizer.Adam(learning_rate=fluid.layers.noam_decay( 100, 10000), parameter_list=emb.parameters()) - state_dict = adam.state_dict() - fluid.save_dygraph(state_dict, "paddle_dy") + state_dict = adam.state_dict() - para_state_dict, opti_state_dict = fluid.load_dygraph( "paddle_dy") + para_state_dict, opti_state_dict = paddle.load("paddle_dy") - adam.set_dict(opti_state_dict) + adam.set_state_dict(opti_state_dict) ''' from paddle.optimizer.lr_scheduler import _LRScheduler @@ -257,6 +259,9 @@ class Optimizer(object): tensor.set(load_para_np, framework._current_expected_place()) + # [aliases] Compatible with old method names + set_dict = set_state_dict + def get_opti_var_name_list(self): return self._opti_name_list @@ -897,7 +902,7 @@ class Optimizer(object): if p.trainable: p.clear_gradient() - @imperative_base.no_grad() + @imperative_base.no_grad def minimize(self, loss, startup_program=None, @@ -1015,7 +1020,7 @@ class SGDOptimizer(Optimizer): name=name) self.type = "sgd" - @no_grad() + @no_grad def _append_optimize_op(self, block, param_and_grad): lr = self._create_param_lr(param_and_grad) if framework.in_dygraph_mode(): @@ -1552,7 +1557,7 @@ class DGCMomentumOptimizer(Optimizer): dgc_op._set_attr(op_maker.kOpRoleVarAttrName(), [param_var.name, grad_var.name]) - @imperative_base.no_grad() + @imperative_base.no_grad def apply_gradients(self, params_grads): params_grads = sorted(params_grads, key=lambda x: x[0].name) params_grads, table_param_and_grad, table_optimize_op = \ @@ -4595,15 +4600,16 @@ class RecomputeOptimizer(Optimizer): ), "_checkpoints should be a list of Variable or a list of String" self._checkpoints = checkpoints - def load(self, stat_dict): + @framework.deprecate_stat_dict + def load(self, state_dict): """ - :api_attr: Static Graph + :api_attr: Static Graph load function is not supported by Recompute Optimizer for now. :return: None Args: - stat_dict: the dict load by load_persistable method + state_dict: the dict load by load_persistable method Examples: .. code-block:: python @@ -4627,8 +4633,8 @@ class RecomputeOptimizer(Optimizer): sgd = fluid.optimizer.RecomputeOptimizer(sgd) sgd._set_checkpoints([fc_1, pred]) try: - stat_dict = {} - sgd.load(stat_dict) + state_dict = {} + sgd.load(state_dict) except NotImplementedError as e: print(cpt.get_exception_message(e)) """ diff --git a/python/paddle/fluid/tests/demo/executor_train_dataset.py b/python/paddle/fluid/tests/demo/executor_train_dataset.py deleted file mode 100644 index 6938982de725c296aae29e70d0640749d0876353..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/demo/executor_train_dataset.py +++ /dev/null @@ -1,96 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import tarfile -import paddle.fluid as fluid -import paddle -from paddle.fluid import core - -URL = 'http://paddle-unittest-data.gz.bcebos.com/python_paddle_fluid_tests_demo_async-executor/train_data.tar.gz' -MD5 = '2a405a31508969b3ab823f42c0f522ca' - - -def bow_net(data, - label, - dict_dim=89528, - emb_dim=128, - hid_dim=128, - hid_dim2=96, - class_dim=2): - """ - BOW net - This model is from https://github.com/PaddlePaddle/models: - models/fluid/PaddleNLP/text_classification/nets.py - """ - # embedding - emb = fluid.layers.embedding( - input=data, size=[dict_dim, emb_dim], is_sparse=True) - bow = fluid.layers.sequence_pool(input=emb, pool_type='sum') - bowh = fluid.layers.tanh(bow) - # fc layer after conv - fc_1 = fluid.layers.fc(input=bowh, size=hid_dim, act="tanh") - fc_2 = fluid.layers.fc(input=fc_1, size=hid_dim2, act="tanh") - # probability of each class - prediction = fluid.layers.fc(input=[fc_2], size=class_dim, act="softmax") - # cross entropy loss - cost = fluid.layers.cross_entropy(input=prediction, label=label) - # mean loss - avg_cost = fluid.layers.mean(x=cost) - acc = fluid.layers.accuracy(input=prediction, label=label) - return avg_cost, acc, prediction - - -def train(): - # Download data - with tarfile.open(paddle.dataset.common.download(URL, "imdb", MD5)) as tarf: - tarf.extractall(path='./') - tarf.close() - - # Initialize dataset description - dataset = fluid.DatasetFactory().create_dataset() - dataset.set_batch_size(128) # See API doc for how to change other fields - - # define network - # input text data - data = fluid.layers.data( - name="words", shape=[1], dtype="int64", lod_level=1) - # label data - label = fluid.layers.data(name="label", shape=[1], dtype="int64") - dataset.set_use_var([data, label]) - avg_cost, acc, prediction = bow_net(data, label) - sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=0.002) - opt_ops, weight_and_grad = sgd_optimizer.minimize(avg_cost) - - # Run startup program - startup_program = fluid.default_startup_program() - place = fluid.CPUPlace() - executor = fluid.Executor(place) - executor.run(startup_program) - - main_program = fluid.default_main_program() - epochs = 10 - filelist = ["train_data/part-%d" % i for i in range(12)] - dataset.set_filelist(filelist) - for i in range(epochs): - dataset.set_thread(4) - executor.train_from_dataset( - main_program, # This can be changed during iteration - dataset, # This can be changed during iteration - debug=False) - fluid.io.save_inference_model('imdb/epoch%d.model' % i, - [data.name, label.name], [acc], executor) - - -if __name__ == "__main__": - train() diff --git a/python/paddle/fluid/tests/demo/fc_gan.py b/python/paddle/fluid/tests/demo/fc_gan.py deleted file mode 100644 index bd77779ce6ab5cf19e3e5ace3e51e39734b27c10..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/demo/fc_gan.py +++ /dev/null @@ -1,173 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import errno -import math -import os - -import matplotlib -import numpy - -import paddle -import paddle.fluid as fluid - -matplotlib.use('Agg') -import matplotlib.pyplot as plt -import matplotlib.gridspec as gridspec - -NOISE_SIZE = 100 -NUM_PASS = 1000 -NUM_REAL_IMGS_IN_BATCH = 121 -NUM_TRAIN_TIMES_OF_DG = 3 -LEARNING_RATE = 2e-5 - - -def D(x): - hidden = fluid.layers.fc(input=x, - size=200, - act='relu', - param_attr='D.w1', - bias_attr='D.b1') - logits = fluid.layers.fc(input=hidden, - size=1, - act=None, - param_attr='D.w2', - bias_attr='D.b2') - return logits - - -def G(x): - hidden = fluid.layers.fc(input=x, - size=200, - act='relu', - param_attr='G.w1', - bias_attr='G.b1') - img = fluid.layers.fc(input=hidden, - size=28 * 28, - act='tanh', - param_attr='G.w2', - bias_attr='G.b2') - return img - - -def plot(gen_data): - gen_data.resize(gen_data.shape[0], 28, 28) - n = int(math.ceil(math.sqrt(gen_data.shape[0]))) - fig = plt.figure(figsize=(n, n)) - gs = gridspec.GridSpec(n, n) - gs.update(wspace=0.05, hspace=0.05) - - for i, sample in enumerate(gen_data): - ax = plt.subplot(gs[i]) - plt.axis('off') - ax.set_xticklabels([]) - ax.set_yticklabels([]) - ax.set_aspect('equal') - plt.imshow(sample.reshape(28, 28), cmap='Greys_r') - - return fig - - -def main(): - try: - os.makedirs("./out") - except OSError as e: - if e.errno != errno.EEXIST: - raise - - startup_program = fluid.Program() - d_program = fluid.Program() - dg_program = fluid.Program() - - with fluid.program_guard(d_program, startup_program): - img = fluid.layers.data(name='img', shape=[784], dtype='float32') - d_loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=D(img), - label=fluid.layers.data( - name='label', shape=[1], dtype='float32')) - d_loss = fluid.layers.mean(d_loss) - - with fluid.program_guard(dg_program, startup_program): - noise = fluid.layers.data( - name='noise', shape=[NOISE_SIZE], dtype='float32') - g_img = G(x=noise) - g_program = dg_program.clone() - dg_loss = fluid.layers.sigmoid_cross_entropy_with_logits( - x=D(g_img), - label=fluid.layers.fill_constant_batch_size_like( - input=noise, dtype='float32', shape=[-1, 1], value=1.0)) - dg_loss = fluid.layers.mean(dg_loss) - - opt = fluid.optimizer.Adam(learning_rate=LEARNING_RATE) - - opt.minimize(loss=d_loss, startup_program=startup_program) - opt.minimize( - loss=dg_loss, - startup_program=startup_program, - parameter_list=[ - p.name for p in g_program.global_block().all_parameters() - ]) - exe = fluid.Executor(fluid.CPUPlace()) - exe.run(startup_program) - - num_true = NUM_REAL_IMGS_IN_BATCH - train_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.mnist.train(), buf_size=60000), - batch_size=num_true) - - for pass_id in range(NUM_PASS): - for batch_id, data in enumerate(train_reader()): - num_true = len(data) - n = numpy.random.uniform( - low=-1.0, high=1.0, - size=[num_true * NOISE_SIZE]).astype('float32').reshape( - [num_true, NOISE_SIZE]) - generated_img = exe.run(g_program, - feed={'noise': n}, - fetch_list={g_img})[0] - real_data = numpy.array([x[0] for x in data]).astype('float32') - real_data = real_data.reshape(num_true, 784) - total_data = numpy.concatenate([real_data, generated_img]) - total_label = numpy.concatenate([ - numpy.ones( - shape=[real_data.shape[0], 1], dtype='float32'), - numpy.zeros( - shape=[real_data.shape[0], 1], dtype='float32') - ]) - d_loss_np = exe.run(d_program, - feed={'img': total_data, - 'label': total_label}, - fetch_list={d_loss})[0] - for _ in range(NUM_TRAIN_TIMES_OF_DG): - n = numpy.random.uniform( - low=-1.0, high=1.0, - size=[2 * num_true * NOISE_SIZE]).astype('float32').reshape( - [2 * num_true, NOISE_SIZE, 1, 1]) - dg_loss_np = exe.run(dg_program, - feed={'noise': n}, - fetch_list={dg_loss})[0] - print("Pass ID={0}, Batch ID={1}, D-Loss={2}, DG-Loss={3}".format( - pass_id, batch_id, d_loss_np, dg_loss_np)) - # generate image each batch - fig = plot(generated_img) - plt.savefig( - 'out/{0}.png'.format(str(pass_id).zfill(3)), bbox_inches='tight') - plt.close(fig) - - -if __name__ == '__main__': - main() diff --git a/python/paddle/fluid/tests/demo/pipeline_train.py b/python/paddle/fluid/tests/demo/pipeline_train.py deleted file mode 100644 index 2f75908a160fd3c61c743dc407095d645737a534..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/demo/pipeline_train.py +++ /dev/null @@ -1,205 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import numpy as np -import copy -import pickle -import os -from functools import partial -import logging -import time -import paddle -import paddle.fluid as fluid -import paddle.fluid.layers as layers -import argparse -import random -import sys -import math - -logging.basicConfig(format='%(asctime)s - %(levelname)s - %(message)s') -logger = logging.getLogger("fluid") -logger.setLevel(logging.INFO) - -is_profile = False - - -def parse_args(): - parser = argparse.ArgumentParser("Resnet with pipelie parallel.") - parser.add_argument( - '--batch_size', type=int, default=100, help='input batch size') - parser.add_argument('--lr', type=float, default=0.001, help='learning rate') - return parser.parse_args() - - -def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1, - act=None): - conv = fluid.layers.conv2d( - input=input, - num_filters=num_filters, - filter_size=filter_size, - stride=stride, - padding=(filter_size - 1) // 2, - groups=groups, - act=None, - bias_attr=False) - return fluid.layers.batch_norm( - input=conv, - act=act, ) - - -def shortcut(input, ch_out, stride, is_first): - ch_in = input.shape[1] - if ch_in != ch_out or stride != 1 or is_first == True: - return conv_bn_layer(input, ch_out, 1, stride) - else: - return input - - -def bottleneck_block(input, num_filters, stride): - conv0 = conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu') - conv1 = conv_bn_layer( - input=conv0, - num_filters=num_filters, - filter_size=3, - stride=stride, - act='relu') - conv2 = conv_bn_layer( - input=conv1, num_filters=num_filters * 4, filter_size=1, act=None) - - short = shortcut(input, num_filters * 4, stride, is_first=False) - - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - - -def basic_block(input, num_filters, stride, is_first): - conv0 = conv_bn_layer( - input=input, - num_filters=num_filters, - filter_size=3, - act='relu', - stride=stride) - conv1 = conv_bn_layer( - input=conv0, num_filters=num_filters, filter_size=3, act=None) - short = shortcut(input, num_filters, stride, is_first) - return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - - -def network(input, layers=50, class_dim=1000): - supported_layers = [18, 34, 50, 101, 152] - assert layers in supported_layers - depth = None - if layers == 18: - depth = [2, 2, 2, 2] - elif layers == 34 or layers == 50: - depth = [3, 4, 6, 3] - elif layers == 101: - depth = [3, 4, 23, 3] - elif layers == 152: - depth = [3, 8, 36, 3] - num_filters = [64, 128, 256, 512] - with fluid.device_guard("gpu:0"): - conv = conv_bn_layer( - input=input, num_filters=64, filter_size=7, stride=2, act='relu') - conv = fluid.layers.pool2d( - input=conv, - pool_size=3, - pool_stride=2, - pool_padding=1, - pool_type='max') - if layers >= 50: - for block in range(len(depth)): - with fluid.device_guard("gpu:1"): - for i in range(depth[block]): - conv = bottleneck_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1) - - with fluid.device_guard("gpu:2"): - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - else: - for block in range(len(depth)): - with fluid.device_guard("gpu:1"): - for i in range(depth[block]): - conv = basic_block( - input=conv, - num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, - is_first=block == i == 0) - with fluid.device_guard("gpu:2"): - pool = fluid.layers.pool2d( - input=conv, pool_size=7, pool_type='avg', global_pooling=True) - stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc( - input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - return out - - -def train(): - args = parse_args() - lr = args.lr - - with fluid.device_guard("gpu:0"): - image = fluid.layers.data( - name="image", shape=[3, 224, 224], dtype="float32") - label = fluid.layers.data(name="label", shape=[1], dtype="int64") - data_loader = fluid.io.DataLoader.from_generator( - feed_list=[image, label], - capacity=64, - use_double_buffer=True, - iterable=False) - fc = build_network(image, layers=50) - - with fluid.device_guard("gpu:3"): - out, prob = fluid.layers.softmax_with_cross_entropy( - logits=fc, label=label, return_softmax=True) - loss = fluid.layers.mean(out) - acc_top1 = fluid.layers.accuracy(input=prob, label=label, k=1) - acc_top5 = fluid.layers.accuracy(input=prob, label=label, k=5) - - optimizer = fluid.optimizer.SGD(lr) - optimizer = fluid.optimizer.PipelineOptimizer(optimizer, num_microbatches=2) - optimizer.minimize(loss) - - def train_reader(): - for _ in range(4000): - img = np.random.random(size=[3, 224, 224]).astype('float32') - label = np.random.random(size=[1]).astype('int64') - yield img, label - - data_loader.set_sample_generator(train_reader, batch_size=args.batch_size) - - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - - exe.run(fluid.default_startup_program()) - - data_loader.start() - logger.info("begin training...") - exe.train_from_dataset(fluid.default_main_program(), debug=is_profile) - - -if __name__ == "__main__": - train() diff --git a/python/paddle/fluid/tests/demo/pyreader.py b/python/paddle/fluid/tests/demo/pyreader.py deleted file mode 100644 index 6995346ffa61ea65119930296be2fba5a10c5451..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/demo/pyreader.py +++ /dev/null @@ -1,102 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import print_function - -import numpy -import six - -import paddle -import paddle.dataset.mnist as mnist -import paddle.fluid as fluid - - -def network(is_train): - reader = fluid.layers.py_reader( - capacity=10, - shapes=((-1, 784), (-1, 1)), - dtypes=('float32', 'int64'), - name="train_reader" if is_train else "test_reader", - use_double_buffer=True) - img, label = fluid.layers.read_file(reader) - - hidden = img - - for i in six.moves.xrange(2): - hidden = fluid.layers.fc(input=hidden, size=100, act='tanh') - hidden = fluid.layers.dropout( - hidden, dropout_prob=0.5, is_test=not is_train) - - prediction = fluid.layers.fc(input=hidden, size=10, act='softmax') - loss = fluid.layers.cross_entropy(input=prediction, label=label) - return fluid.layers.mean(loss), reader - - -def main(): - train_prog = fluid.Program() - startup_prog = fluid.Program() - - with fluid.program_guard(train_prog, startup_prog): - with fluid.unique_name.guard(): - loss, train_reader = network(True) - adam = fluid.optimizer.Adam(learning_rate=0.01) - adam.minimize(loss) - - test_prog = fluid.Program() - test_startup = fluid.Program() - with fluid.program_guard(test_prog, test_startup): - with fluid.unique_name.guard(): - test_loss, test_reader = network(False) - - use_cuda = fluid.core.is_compiled_with_cuda() - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - fluid.Executor(place).run(startup_prog) - fluid.Executor(place).run(test_startup) - - trainer = fluid.ParallelExecutor( - use_cuda=use_cuda, loss_name=loss.name, main_program=train_prog) - - tester = fluid.ParallelExecutor( - use_cuda=use_cuda, share_vars_from=trainer, main_program=test_prog) - - train_reader.decorate_paddle_reader( - paddle.reader.shuffle( - paddle.batch(mnist.train(), 512), buf_size=8192)) - - test_reader.decorate_paddle_reader(paddle.batch(mnist.test(), 512)) - - for epoch_id in six.moves.xrange(10): - train_reader.start() - try: - while True: - print( - 'train_loss', - numpy.array(trainer.run(fetch_list=[loss.name]))) - except fluid.core.EOFException: - print('End of epoch', epoch_id) - train_reader.reset() - - test_reader.start() - try: - while True: - print( - 'test loss', - numpy.array(tester.run(fetch_list=[test_loss.name]))) - except fluid.core.EOFException: - print('End of testing') - test_reader.reset() - - -if __name__ == '__main__': - main() diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 6220bf62c79c30737f923e744d5670818f54ff6e..a25cba029dd8bac81d6b00c1d9fb710f421ce9d0 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -432,8 +432,6 @@ if(WITH_DISTRIBUTE) list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_lars") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_mnist_train") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_save_load") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_simnet_bow") - list(REMOVE_ITEM DIST_TEST_OPS "test_dist_fleet_ctr") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_text_classification") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_train") list(REMOVE_ITEM DIST_TEST_OPS "test_dist_word2vec") @@ -587,8 +585,10 @@ endif() # setting timeout value for old unittests # set_tests_properties(test_dist_fleet_sparse_embedding_ctr PROPERTIES TIMEOUT 200) -set_tests_properties(test_fused_elemwise_activation_op PROPERTIES TIMEOUT 150) -set_tests_properties(test_gru_op PROPERTIES TIMEOUT 200) -set_tests_properties(test_layer_norm_op PROPERTIES TIMEOUT 150) -set_tests_properties(test_pool3d_op PROPERTIES TIMEOUT 150) -set_tests_properties(test_regularizer PROPERTIES TIMEOUT 150) +if(NOT WIN32 AND NOT APPLE) + set_tests_properties(test_fused_elemwise_activation_op PROPERTIES TIMEOUT 150) + set_tests_properties(test_gru_op PROPERTIES TIMEOUT 200) + set_tests_properties(test_layer_norm_op PROPERTIES TIMEOUT 150) + set_tests_properties(test_pool3d_op PROPERTIES TIMEOUT 150) + set_tests_properties(test_regularizer PROPERTIES TIMEOUT 150) +endif() diff --git a/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py b/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py index 863c001f226f86384e2820cb6877ded48cffa119..15e98481c26b20de4e9fa493fa022380ba1fcd63 100644 --- a/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py +++ b/python/paddle/fluid/tests/unittests/ctr_dataset_reader.py @@ -153,7 +153,7 @@ def gen_fake_line(dnn_data_num=7, return line -def prepare_fake_data(file_nums=8, file_lines=1000): +def prepare_fake_data(file_nums=9, file_lines=1000): """ Create fake data with same type as avazu_ctr_data """ diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py index 73b546b95cfeb8032c6e99eabe24c883d1f5f66c..dc39472d7aed8f52ee3bb0f85a5e503db9093070 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py @@ -196,8 +196,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): fleet.stop_worker() def do_dataset_training(self, fleet): - dnn_input_dim, lr_input_dim, train_file_path = ctr_dataset_reader.prepare_data( - ) + train_file_list = ctr_dataset_reader.prepare_fake_data() exe = fluid.Executor(fluid.CPUPlace()) @@ -206,9 +205,7 @@ class TestDistCTR2x2(FleetDistRunnerBase): thread_num = 2 batch_size = 128 - filelist = [] - for _ in range(thread_num): - filelist.append(train_file_path) + filelist = train_file_list # config dataset dataset = paddle.distributed.fleet.DatasetFactory().create_dataset() diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py index 0de898d6dde217ec6d5cdf53611f986f7b04863f..7a4e7534f07391956cd94577847c8a8f77895818 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_ctr.py @@ -177,7 +177,7 @@ class TestHeterPsCTR2x2(FleetDistHeterRunnerBase): fleet.init_worker() exe.run(fluid.default_startup_program()) - thread_num = 1 + thread_num = int(os.getenv("CPU_NUM", 2)) batch_size = 128 filelist = fleet_util.get_file_shard(train_file_list) print("filelist: {}".format(filelist)) diff --git a/python/paddle/fluid/tests/unittests/dist_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py similarity index 55% rename from python/paddle/fluid/tests/unittests/dist_simnet_bow.py rename to python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py index 9fcba2aede1cea3c78108e7daa8eb34a1ab80048..7d5ca4fc6e3916eab29942c85e88664f60cbf032 100644 --- a/python/paddle/fluid/tests/unittests/dist_simnet_bow.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py @@ -19,6 +19,8 @@ import argparse import time import math import random +import shutil +import tempfile import paddle import paddle.fluid as fluid @@ -29,7 +31,8 @@ from multiprocessing import Process import os import signal from functools import reduce -from test_dist_base import TestDistRunnerBase, runtime_main +from test_dist_fleet_base import runtime_main, FleetDistRunnerBase +from paddle.distributed.fleet.base.util_factory import fleet_util DTYPE = "int64" DATA_URL = 'http://paddle-dist-ce-data.bj.bcebos.com/simnet.train.1000' @@ -49,6 +52,18 @@ fluid.default_startup_program().random_seed = 1 fluid.default_main_program().random_seed = 1 +def fake_simnet_reader(): + def reader(): + for _ in range(1000): + q = np.random.random_integers(0, 1500 - 1, size=1).tolist() + label = np.random.random_integers(0, 1, size=1).tolist() + pt = np.random.random_integers(0, 1500 - 1, size=1).tolist() + nt = np.random.random_integers(0, 1500 - 1, size=1).tolist() + yield [q, label, pt, nt] + + return reader + + def get_acc(cos_q_nt, cos_q_pt, batch_size): cond = fluid.layers.less_than(cos_q_nt, cos_q_pt) cond = fluid.layers.cast(cond, dtype='float64') @@ -75,34 +90,40 @@ def get_loss(cos_q_pt, cos_q_nt): return avg_cost -def get_optimizer(op="sgd"): - if op.upper() == "sgd".upper(): - optimizer = fluid.optimizer.SGD(learning_rate=base_lr) - elif op.upper() == "adam".upper(): - optimizer = fluid.optimizer.Adam(learning_rate=base_lr) - else: - optimizer = fluid.optimizer.SGD(learning_rate=base_lr) - return optimizer - - def train_network(batch_size, is_distributed=False, is_sparse=False, - is_self_contained_lr=False): + is_self_contained_lr=False, + is_pyreader=False): # query q = fluid.layers.data( name="query_ids", shape=[1], dtype="int64", lod_level=1) + # label data + label = fluid.layers.data(name="label", shape=[1], dtype="int64") + # pt + pt = fluid.layers.data( + name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) + # nt + nt = fluid.layers.data( + name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) + + datas = [q, label, pt, nt] + + reader = None + if is_pyreader: + reader = fluid.io.PyReader( + feed_list=datas, + capacity=64, + iterable=False, + use_double_buffer=False) + # embedding q_emb = fluid.embedding( input=q, is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), - name="__emb__", - learning_rate=emb_lr) if is_self_contained_lr else fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), - name="__emb__"), + initializer=fluid.initializer.Constant(value=0.01), name="__emb__"), is_sparse=is_sparse) q_emb = fluid.layers.reshape(q_emb, [-1, emb_dim]) # vsum @@ -115,12 +136,8 @@ def train_network(batch_size, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.01), name="__q_fc__", - learning_rate=base_lr)) - # label data - label = fluid.layers.data(name="label", shape=[1], dtype="int64") - # pt - pt = fluid.layers.data( - name="pos_title_ids", shape=[1], dtype="int64", lod_level=1) + learning_rate=base_lr), ) + # embedding pt_emb = fluid.embedding( input=pt, @@ -129,9 +146,7 @@ def train_network(batch_size, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(value=0.01), name="__emb__", - learning_rate=emb_lr) if is_self_contained_lr else fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), - name="__emb__"), + learning_rate=emb_lr), is_sparse=is_sparse) pt_emb = fluid.layers.reshape(pt_emb, [-1, emb_dim]) # vsum @@ -142,24 +157,16 @@ def train_network(batch_size, input=pt_ss, size=hid_dim, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), - name="__fc__", - learning_rate=base_lr), + initializer=fluid.initializer.Constant(value=0.01), name="__fc__"), bias_attr=fluid.ParamAttr(name="__fc_b__")) - # nt - nt = fluid.layers.data( - name="neg_title_ids", shape=[1], dtype="int64", lod_level=1) + # embedding nt_emb = fluid.embedding( input=nt, is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), - name="__emb__", - learning_rate=emb_lr) if is_self_contained_lr else fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), - name="__emb__"), + initializer=fluid.initializer.Constant(value=0.01), name="__emb__"), is_sparse=is_sparse) nt_emb = fluid.layers.reshape(nt_emb, [-1, emb_dim]) # vsum @@ -170,9 +177,7 @@ def train_network(batch_size, input=nt_ss, size=hid_dim, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), - name="__fc__", - learning_rate=base_lr), + initializer=fluid.initializer.Constant(value=0.01), name="__fc__"), bias_attr=fluid.ParamAttr(name="__fc_b__")) cos_q_pt = fluid.layers.cos_sim(q_fc, pt_fc) cos_q_nt = fluid.layers.cos_sim(q_fc, nt_fc) @@ -180,79 +185,67 @@ def train_network(batch_size, avg_cost = get_loss(cos_q_pt, cos_q_nt) # acc acc = get_acc(cos_q_nt, cos_q_pt, batch_size) - return [avg_cost, acc, cos_q_pt] - - -def combination(x, y): - res = [[[xi, yi] for yi in y] for xi in x] - return res[0] - - -def get_one_data(file_list): - for file in file_list: - contents = [] - with open(file, "r") as fin: - for i in fin: - contents.append(i.strip()) - for index, q in enumerate(contents): - try: - one_data = [[int(j) for j in i.split(" ")] - for i in q.split(";")[:-1]] - if one_data[1][0] + one_data[1][1] != len(one_data) - 3: - q = fin.readline() - continue - tmp = combination(one_data[3:3 + one_data[1][0]], - one_data[3 + one_data[1][0]:]) - except Exception as e: - continue - - for each in tmp: - yield [one_data[2], 0, each[0], each[1]] - - -def get_batch_reader(file_list, batch_size): - def batch_reader(): - res = [] - for i in get_one_data(file_list): - if random.random() <= sample_rate: - res.append(i) - if len(res) >= batch_size: - yield res - res = [] - - return batch_reader - - -def get_train_reader(batch_size): - # The training data set. - train_file = os.path.join(paddle.dataset.common.DATA_HOME, "simnet", - "train") - train_reader = get_batch_reader([train_file], batch_size) - train_feed = ["query_ids", "pos_title_ids", "neg_title_ids", "label"] - return train_reader, train_feed - - -class TestDistSimnetBow2x2(TestDistRunnerBase): - def get_model(self, batch_size=2): - # Train program - avg_cost, acc, predict = \ - train_network(batch_size, - bool(int(os.environ["IS_DISTRIBUTED"])), - bool(int(os.environ["IS_SPARSE"])), - bool(int(os.environ["IS_SELF_CONTAINED_LR"]))) - - inference_program = fluid.default_main_program().clone() - - # Optimization - opt = os.getenv('OPTIMIZER', 'sgd') - opt = get_optimizer(opt) - opt.minimize(avg_cost) - - # Reader - train_reader, _ = get_train_reader(batch_size) - return inference_program, avg_cost, train_reader, train_reader, acc, predict + return avg_cost, acc, cos_q_pt, reader + + +class TestDistSimnetBow2x2(FleetDistRunnerBase): + """ + For test SimnetBow model, use Fleet api + """ + + def net(self, args, batch_size=4, lr=0.01): + avg_cost, _, predict, self.reader = \ + train_network(batch_size=batch_size, is_distributed=False, + is_sparse=True, is_self_contained_lr=False, is_pyreader=(args.reader == "pyreader")) + self.avg_cost = avg_cost + self.predict = predict + + return avg_cost + + def check_model_right(self, dirname): + model_filename = os.path.join(dirname, "__model__") + + with open(model_filename, "rb") as f: + program_desc_str = f.read() + + program = fluid.Program.parse_from_string(program_desc_str) + with open(os.path.join(dirname, "__model__.proto"), "w") as wn: + wn.write(str(program)) + + def do_pyreader_training(self, fleet): + """ + do training using dataset, using fetch handler to catch variable + Args: + fleet(Fleet api): the fleet object of Parameter Server, define distribute training role + """ + + exe = fluid.Executor(fluid.CPUPlace()) + fleet.init_worker() + exe.run(fluid.default_startup_program()) + batch_size = 4 + # reader + train_reader = paddle.batch(fake_simnet_reader(), batch_size=batch_size) + self.reader.decorate_sample_list_generator(train_reader) + for epoch_id in range(1): + self.reader.start() + try: + pass_start = time.time() + while True: + loss_val = exe.run(program=fluid.default_main_program(), + fetch_list=[self.avg_cost.name]) + loss_val = np.mean(loss_val) + message = "TRAIN ---> pass: {} loss: {}\n".format(epoch_id, + loss_val) + fleet_util.print_on_rank(message, 0) + + pass_time = time.time() - pass_start + except fluid.core.EOFException: + self.reader.reset() + fleet.stop_worker() + + def do_dataset_training(self, fleet): + pass if __name__ == "__main__": - paddle.dataset.common.download(DATA_URL, 'simnet', DATA_MD5, "train") runtime_main(TestDistSimnetBow2x2) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/predictor_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/predictor_utils.py index 75bff108dd43665df0fc1c8b166a935946b4fbc7..ba0adaf32e15db71162aed71c042100a0cd50e26 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/predictor_utils.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/predictor_utils.py @@ -81,7 +81,7 @@ class PredictorTools(object): tensor.set_lod(feed_data.lod()) # ensure no diff in multiple repeat times - repeat_time = 10 + repeat_time = 2 for i in range(repeat_time): predictor.zero_copy_run() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py index 3e6fe168b8eaf39286c518c8b4a2ad6d48b0e6bb..29b4f1b05f9c2911b849b323674b3a704a1da297 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py @@ -19,9 +19,11 @@ import unittest import inspect import gast +import paddle import paddle.fluid as fluid import paddle.fluid.dygraph as dygraph +from paddle import to_tensor from paddle.fluid.dygraph import to_variable from paddle.fluid.dygraph.jit import dygraph_to_static_func from paddle.fluid.dygraph.dygraph_to_static.utils import is_dygraph_api @@ -45,11 +47,19 @@ def dyfunc_to_variable_3(x): return res +def dyfunc_to_tensor(x): + res1 = paddle.to_tensor(x, dtype=None, place=None, stop_gradient=True) + res2 = paddle.tensor.to_tensor(data=res1) + res3 = to_tensor(data=res2) + return res3 + + class TestDygraphBasicApi_ToVariable(unittest.TestCase): def setUp(self): self.input = np.ones(5).astype("int32") self.test_funcs = [ - dyfunc_to_variable, dyfunc_to_variable_2, dyfunc_to_variable_3 + dyfunc_to_tensor, dyfunc_to_variable, dyfunc_to_variable_2, + dyfunc_to_variable_3 ] self.place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda( ) else fluid.CPUPlace() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cache_program.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cache_program.py index 8e35dd78457bb59bb4882bc1deeb23539f47012a..b72149a29c73ff9e1fa1975c3caffebb6202e0b7 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cache_program.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cache_program.py @@ -123,7 +123,7 @@ class TestConvertWithCache(unittest.TestCase): @declarative -def sum_even_util_limit(max_len, limit): +def sum_even_until_limit(max_len, limit): ret_sum = fluid.dygraph.to_variable(np.zeros((1)).astype('int32')) for i in range(max_len): if i % 2 > 0: @@ -147,7 +147,7 @@ def sum_under_while(limit): class TestToOutputWithCache(unittest.TestCase): def test_output(self): with fluid.dygraph.guard(): - ret = sum_even_util_limit(80, 10) + ret = sum_even_until_limit(80, 10) self.assertEqual(ret.numpy(), 30) ret = declarative(sum_under_while)(100) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py index e5a33e59a3b97cc06c49247f8b7ab97f92240d26..949286f63efb3357325f25b02f60e938eebd28e8 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_declarative.py @@ -13,13 +13,15 @@ # limitations under the License. import numpy as np +import unittest + import paddle -from paddle.static import InputSpec import paddle.fluid as fluid +from paddle.static import InputSpec from paddle.fluid.dygraph import to_variable, declarative, ProgramTranslator, Layer, jit from paddle.fluid.dygraph.dygraph_to_static.program_translator import ConcreteProgram -import unittest +from test_basic_api_transformation import dyfunc_to_variable program_trans = ProgramTranslator() @@ -181,6 +183,9 @@ def foo_func(a, b, c=1, d=2): class TestDifferentInputSpecCacheProgram(unittest.TestCase): + def setUp(self): + program_trans.enable(True) + def test_with_different_input(self): with fluid.dygraph.guard(fluid.CPUPlace()): x_data = np.ones([16, 10]).astype('float32') @@ -272,5 +277,23 @@ class TestDifferentInputSpecCacheProgram(unittest.TestCase): foo_3.concrete_program +class TestDeclarativeAPI(unittest.TestCase): + def test_error(self): + func = declarative(dyfunc_to_variable) + + paddle.enable_static() + + # Failed to run the callable object decorated by '@paddle.jit.to_static' + # if it does NOT in dynamic mode. + with self.assertRaises(RuntimeError): + func(np.ones(5).astype("int32")) + + program_trans.enable(False) + with self.assertRaises(AssertionError): + # AssertionError: We Only support to_variable in imperative mode, + # please use fluid.dygraph.guard() as context to run it in imperative Mode + func(np.ones(5).astype("int32")) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py index 214cd95d3bc620b3bcadb88e57c7e54a593eaaf4..510b615654751500c33dc3311353ba7e2f8baf40 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_logging_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -86,11 +86,11 @@ class TestLoggingUtils(unittest.TestCase): with mock.patch.object(sys, 'stdout', stream): logging_utils.warn(warn_msg) logging_utils.error(error_msg) - self.translator_logger.verbosity_level = 2 + self.translator_logger.verbosity_level = 1 logging_utils.log(1, log_msg_1) logging_utils.log(2, log_msg_2) - result_msg = '\n'.join([warn_msg, error_msg, log_msg_2, ""]) + result_msg = '\n'.join([warn_msg, error_msg, log_msg_1, ""]) self.assertEqual(result_msg, stream.getvalue()) def test_log_transformed_code(self): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py index 1ef3bd1bf150056816283c83fa3ff6af1e589732..bd600d2f2dbd6341ff7a83d6636047d01cae7859 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mnist.py @@ -154,6 +154,18 @@ class TestMNISTWithToStatic(TestMNIST): msg='dygraph is {}\n static_res is \n{}'.format(dygraph_loss, static_loss)) + def test_mnist_declarative_cpu_vs_mkldnn(self): + dygraph_loss_cpu = self.train_dygraph() + fluid.set_flags({'FLAGS_use_mkldnn': True}) + try: + dygraph_loss_mkldnn = self.train_dygraph() + finally: + fluid.set_flags({'FLAGS_use_mkldnn': False}) + self.assertTrue( + np.allclose(dygraph_loss_cpu, dygraph_loss_mkldnn), + msg='cpu dygraph is {}\n mkldnn dygraph is \n{}'.format( + dygraph_loss_cpu, dygraph_loss_mkldnn)) + def train(self, to_static=False): prog_trans = ProgramTranslator() prog_trans.enable(to_static) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py index 6556b2f03bd5304e290792d07d1d969ab255bfdc..203c8ddb3488c0fef9a0a590378505e5b61233cf 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py @@ -346,6 +346,13 @@ class TestResnet(unittest.TestCase): dygraph_loss)) self.verify_predict() + def test_in_static_mode_mkldnn(self): + fluid.set_flags({'FLAGS_use_mkldnn': True}) + try: + train(to_static=True) + finally: + fluid.set_flags({'FLAGS_use_mkldnn': False}) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/launch_function_helper.py b/python/paddle/fluid/tests/unittests/launch_function_helper.py index 13041827ffeabd3d6b79e4f34a67bd09624e54f6..046268444018799ca4d7f5530cbb6b1c707e062f 100644 --- a/python/paddle/fluid/tests/unittests/launch_function_helper.py +++ b/python/paddle/fluid/tests/unittests/launch_function_helper.py @@ -15,7 +15,8 @@ from multiprocessing import Pool, Process import os import socket from contextlib import closing -import psutil +import time +import sys def launch_func(func, env_dict): @@ -25,19 +26,36 @@ def launch_func(func, env_dict): return proc -def wait(procs, timeout=None): - # wait - decents = [] +def wait(procs, timeout=30): + error = False + begin = time.time() + while True: + alive = False + for p in procs: + p.join(timeout=10) + if p.exitcode is None: + alive = True + continue + elif p.exitcode != 0: + error = True + break + + if not alive: + break + + if error: + break + + if timeout is not None and time.time() - begin >= timeout: + error = True + break + for p in procs: - for child in psutil.Process(p.pid).children(recursive=True): - decents.append(child) - - gone, alive = psutil.wait_procs(decents, timeout=timeout) - for p in alive: - p.kill() - for p in gone: - if p.returncode != 0: - sys.exit(1) + if p.is_alive(): + p.terminate() + + if error: + sys.exit(1) def _find_free_port(port_set): diff --git a/python/paddle/fluid/tests/unittests/mkldnn/check_flags_use_mkldnn.py b/python/paddle/fluid/tests/unittests/mkldnn/check_flags_use_mkldnn.py new file mode 100644 index 0000000000000000000000000000000000000000..8f5715a0d0afcf59ebbe1cc95a6b06dead64c6e2 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/check_flags_use_mkldnn.py @@ -0,0 +1,48 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import unicode_literals +from __future__ import print_function + +import numpy as np +import paddle.fluid as fluid +import os +from paddle.fluid.layer_helper import LayerHelper + + +def check(): + print("check: fluid.core.globals()['FLAGS_use_mkldnn']=", + fluid.core.globals()["FLAGS_use_mkldnn"]) + print("check: fluid.get_flags('FLAGS_use_mkldnn')=", + fluid.get_flags(['FLAGS_use_mkldnn'])) + print("check: DNNL_VERBOSE=", os.environ['DNNL_VERBOSE']) + a_np = np.random.uniform(-2, 2, (10, 20, 30)).astype(np.float32) + helper = LayerHelper(fluid.unique_name.generate(str("test")), act="relu") + func = helper.append_activation + with fluid.dygraph.guard(fluid.core.CPUPlace()): + a = fluid.dygraph.to_variable(a_np) + res1 = func(a) + res2 = np.maximum(a_np, 0) + assert (np.array_equal(res1.numpy(), res2)) + + +if __name__ == '__main__': + try: + check() + for k, v in sorted(os.environ.items()): + print(k + ':', v) + print('\n') + except Exception as e: + print(e) + print(type(e)) diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_flags_use_mkldnn.py b/python/paddle/fluid/tests/unittests/mkldnn/test_flags_use_mkldnn.py new file mode 100644 index 0000000000000000000000000000000000000000..69676d0d70bdd523652c30c4cf066dc6982c46d4 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_flags_use_mkldnn.py @@ -0,0 +1,58 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import unicode_literals +from __future__ import print_function + +import unittest +import os +import sys +import subprocess + + +class TestFlagsUseMkldnn(unittest.TestCase): + def setUp(self): + self._python_interp = sys.executable + self._python_interp += " check_flags_use_mkldnn.py" + + self.env = os.environ.copy() + self.env[str("GLOG_v")] = str("3") + self.env[str("DNNL_VERBOSE")] = str("1") + self.env[str("FLAGS_use_mkldnn")] = str("1") + + def test_flags_use_mkl_dnn(self): + cmd = self._python_interp + + proc = subprocess.Popen( + cmd.split(" "), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + env=self.env) + + out, err = proc.communicate() + returncode = proc.returncode + + print('out', out) + print('err', err) + + assert returncode == 0 + # in python3, type(out) is 'bytes', need use encode + assert out.find( + "dnnl_verbose,exec,cpu,eltwise,jit:avx512_common,forward_training," + "data_f32::blocked:abc:f0 diff_undef::undef::f0,,alg:eltwise_relu". + encode()) != -1 + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py index aff13f0b555299d1c7b453b61be79f5a356a5416..b083e76897cd96cea93d7b90898541de1226ac15 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_mnist.py @@ -114,8 +114,8 @@ class TestMnist(TestParallelDyGraphRunnerBase): model = MNIST() train_reader = paddle.batch( paddle.dataset.mnist.train(), batch_size=2, drop_last=True) - opt = fluid.optimizer.Adam( - learning_rate=1e-3, parameter_list=model.parameters()) + opt = paddle.optimizer.Adam( + learning_rate=1e-3, parameters=model.parameters()) return model, train_reader, opt def run_one_loop(self, model, opt, data): diff --git a/python/paddle/fluid/tests/unittests/simnet_dataset_reader.py b/python/paddle/fluid/tests/unittests/simnet_dataset_reader.py new file mode 100644 index 0000000000000000000000000000000000000000..41eadc13a2ad26ac15b0623147dae5771f371a12 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/simnet_dataset_reader.py @@ -0,0 +1,33 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import logging +import tarfile + +import random + +import paddle +import paddle.fluid.incubate.data_generator as data_generator + +logging.basicConfig() +logger = logging.getLogger("paddle") +logger.setLevel(logging.INFO) + + +class DatasetSimnetReader(data_generator.MultiSlotDataGenerator): + def generate_sample(self, line): + pass diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index d4aafcd27a5aceb3c0b5fa9ddf8343d404bddbf5..14e83fccd655527d8f3012365e4757d23236a445 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -450,7 +450,7 @@ class TestAdamOpV2(unittest.TestCase): import paddle paddle.disable_static() - emb = paddle.nn.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) adam = paddle.optimizer.Adam(0.001, parameters=emb.parameters()) state_dict = adam.state_dict() @@ -504,6 +504,19 @@ class TestAdamOpV2(unittest.TestCase): shape=[1], value=lr, dtype='float32') adam.set_lr(lr_var) + def test_adam_op_invalid_input(self): + paddle.disable_static() + linear = paddle.nn.Linear(10, 10) + with self.assertRaises(ValueError): + adam = paddle.optimizer.Adam( + 0.1, beta1=-1, parameters=linear.parameters()) + with self.assertRaises(ValueError): + adam = paddle.optimizer.Adam( + 0.1, beta2=-1, parameters=linear.parameters()) + with self.assertRaises(ValueError): + adam = paddle.optimizer.Adam( + 0.1, epsilon=-1, parameters=linear.parameters()) + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_adamax_op.py b/python/paddle/fluid/tests/unittests/test_adamax_op.py index a6d1be7616c73019cd8f66dcf0c108cd58ec600b..8ce7656acfae77987b284e29cd85b35d264b20e2 100644 --- a/python/paddle/fluid/tests/unittests/test_adamax_op.py +++ b/python/paddle/fluid/tests/unittests/test_adamax_op.py @@ -184,5 +184,21 @@ def adamax_step(inputs, attributes): return param_out, moment_out, inf_norm_out +class TestAdamaxOpV2(unittest.TestCase): + def test_adamax_op_invalid_input(self): + import paddle + paddle.disable_static() + linear = paddle.nn.Linear(10, 10) + with self.assertRaises(ValueError): + adam = paddle.optimizer.Adamax( + 0.1, beta1=-1, parameters=linear.parameters()) + with self.assertRaises(ValueError): + adam = paddle.optimizer.Adamax( + 0.1, beta2=-1, parameters=linear.parameters()) + with self.assertRaises(ValueError): + adam = paddle.optimizer.Adamax( + 0.1, epsilon=-1, parameters=linear.parameters()) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_adamw_op.py b/python/paddle/fluid/tests/unittests/test_adamw_op.py index 0a7cf54e2e0f15e51ba1b6f7526837f53c7cc2e0..cce24b57d2ca50e96e3ae0cf6d8912a8aea79a31 100644 --- a/python/paddle/fluid/tests/unittests/test_adamw_op.py +++ b/python/paddle/fluid/tests/unittests/test_adamw_op.py @@ -76,6 +76,19 @@ class TestAdamWOp(unittest.TestCase): rets = exe.run(train_prog, feed={"data": data_np}, fetch_list=[loss]) assert rets[0] is not None + def test_adamw_op_invalid_input(self): + paddle.disable_static() + linear = paddle.nn.Linear(10, 10) + with self.assertRaises(ValueError): + adam = paddle.optimizer.AdamW( + 0.1, beta1=-1, parameters=linear.parameters()) + with self.assertRaises(ValueError): + adam = paddle.optimizer.AdamW( + 0.1, beta2=-1, parameters=linear.parameters()) + with self.assertRaises(ValueError): + adam = paddle.optimizer.AdamW( + 0.1, epsilon=-1, parameters=linear.parameters()) + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_affine_grid_op.py b/python/paddle/fluid/tests/unittests/test_affine_grid_op.py index 55612d71a17a7ae9801535bf5a35c83b100aab30..d3e990ca13eb2911ea04ed546b91f58e2db4e440 100644 --- a/python/paddle/fluid/tests/unittests/test_affine_grid_op.py +++ b/python/paddle/fluid/tests/unittests/test_affine_grid_op.py @@ -49,7 +49,6 @@ class TestAffineGridOp(OpTest): self.initTestCase() self.op_type = "affine_grid" theta = np.random.randint(1, 3, self.theta_shape).astype("float32") - theta = np.ones(self.theta_shape).astype("float32") self.inputs = {'Theta': theta} self.attrs = { "use_cudnn": self.use_cudnn, diff --git a/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py b/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py index 7c1f9d802c31ac2c3b244541936ba25018e1487a..1b1b1d7c983282974d2fa46038c35c98de4f9ec2 100644 --- a/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py @@ -218,7 +218,7 @@ def create_test_case(op_type): self.assertTrue("test_arg_api" in result.name) def run_dygraph(self, place): - paddle.disable_static() + paddle.disable_static(place) op = eval("paddle.%s" % (op_type)) data_tensor = paddle.to_tensor(self.input_data) @@ -240,7 +240,7 @@ def create_test_case(op_type): #case 4 result_data = op(data_tensor, axis=-1, keepdim=True) excepted_data = self.numpy_op(self.input_data, axis=-1) - excepted_data = excepted_data.reshape((10)) + excepted_data = excepted_data.reshape((10, 1)) self.assertTrue((result_data.numpy() == excepted_data).all(), True) #case 5 @@ -299,14 +299,42 @@ class TestArgMinMaxOpError(unittest.TestCase): name="test_argmax", shape=[10], dtype="float32") output = paddle.argmax(x=data, dtype="float32") - self.assertRaises(ValueError, test_argmax_attr_type) + self.assertRaises(TypeError, test_argmax_attr_type) def test_argmin_attr_type(): data = paddle.static.data( name="test_argmax", shape=[10], dtype="float32") output = paddle.argmin(x=data, dtype="float32") - self.assertRaises(ValueError, test_argmin_attr_type) + self.assertRaises(TypeError, test_argmin_attr_type) + + def test_argmax_axis_type(): + data = paddle.static.data( + name="test_argmax", shape=[10], dtype="float32") + output = paddle.argmax(x=data, axis=1.2) + + self.assertRaises(TypeError, test_argmax_axis_type) + + def test_argmin_axis_type(): + data = paddle.static.data( + name="test_argmin", shape=[10], dtype="float32") + output = paddle.argmin(x=data, axis=1.2) + + self.assertRaises(TypeError, test_argmin_axis_type) + + def test_argmax_dtype_type(): + data = paddle.static.data( + name="test_argmax", shape=[10], dtype="float32") + output = paddle.argmax(x=data, dtype=1) + + self.assertRaises(TypeError, test_argmax_dtype_type) + + def test_argmin_dtype_type(): + data = paddle.static.data( + name="test_argmin", shape=[10], dtype="float32") + output = paddle.argmin(x=data, dtype=1) + + self.assertRaises(TypeError, test_argmin_dtype_type) if __name__ == '__main__': diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py index 5c705378e515eec4c950f6996e2789df603fcda3..2af0b31d6fc26c59803f29dcdc54979491767dd2 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py @@ -43,6 +43,21 @@ class TestBatchNorm(unittest.TestCase): x_data_4 = np.random.random(size=(2, 1, 3, 3)).astype('float32') x_data_3 = np.random.random(size=(2, 1, 3)).astype('float32') + def error1d_dataformat(): + x_data_4 = np.random.random(size=(2, 1, 3, 3)).astype('float32') + batch_norm1d = paddle.nn.BatchNorm1d(1, data_format='NCDHW') + batch_norm1d(fluid.dygraph.to_variable(x_data_4)) + + def error2d_dataformat(): + x_data_3 = np.random.random(size=(2, 1, 3)).astype('float32') + batch_norm2d = paddle.nn.BatchNorm2d(1, data_format='NCDHW') + batch_norm2d(fluid.dygraph.to_variable(x_data_3)) + + def error3d_dataformat(): + x_data_4 = np.random.random(size=(2, 1, 3, 3)).astype('float32') + batch_norm3d = paddle.nn.BatchNorm3d(1, data_format='NCL') + batch_norm3d(fluid.dygraph.to_variable(x_data_4)) + def error1d(): x_data_4 = np.random.random(size=(2, 1, 3, 3)).astype('float32') batch_norm1d = paddle.nn.BatchNorm1d(1) @@ -62,6 +77,9 @@ class TestBatchNorm(unittest.TestCase): self.assertRaises(ValueError, error1d) self.assertRaises(ValueError, error2d) self.assertRaises(ValueError, error3d) + self.assertRaises(ValueError, error1d_dataformat) + self.assertRaises(ValueError, error2d_dataformat) + self.assertRaises(ValueError, error3d_dataformat) def test_dygraph(self): places = [fluid.CPUPlace()] diff --git a/python/paddle/fluid/tests/unittests/test_clip_op.py b/python/paddle/fluid/tests/unittests/test_clip_op.py index 2e1f9d41747e3a99b4b4a0650a52973459b85c7b..b56d9f6668e8bcbd37443fb88b1f5f4dd40a2511 100644 --- a/python/paddle/fluid/tests/unittests/test_clip_op.py +++ b/python/paddle/fluid/tests/unittests/test_clip_op.py @@ -138,8 +138,9 @@ class TestClipAPI(unittest.TestCase): out_6 = paddle.clip(images, max=max) out_7 = paddle.clip(images, max=-1.) out_8 = paddle.clip(images) + out_9 = paddle.clip(paddle.cast(images, 'float64'), min=0.2, max=0.9) - res1, res2, res3, res4, res5, res6, res7, res8 = exe.run( + res1, res2, res3, res4, res5, res6, res7, res8, res9 = exe.run( fluid.default_main_program(), feed={ "image": data, @@ -147,7 +148,7 @@ class TestClipAPI(unittest.TestCase): "max": np.array([0.8]).astype('float32') }, fetch_list=[ - out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8 + out_1, out_2, out_3, out_4, out_5, out_6, out_7, out_8, out_9 ]) self.assertTrue(np.allclose(res1, data.clip(0.2, 0.8))) @@ -158,6 +159,8 @@ class TestClipAPI(unittest.TestCase): self.assertTrue(np.allclose(res6, data.clip(max=0.8))) self.assertTrue(np.allclose(res7, data.clip(max=-1))) self.assertTrue(np.allclose(res8, data)) + self.assertTrue( + np.allclose(res9, data.astype(np.float64).clip(0.2, 0.9))) def test_clip_dygraph(self): place = fluid.CUDAPlace(0) if fluid.core.is_compiled_with_cuda( diff --git a/python/paddle/fluid/tests/unittests/test_conv1d_layer.py b/python/paddle/fluid/tests/unittests/test_conv1d_layer.py index da527b26bf0608da5a648d92b492ff27cf2802f0..35fce9e9d6ba9d7a2f264bdd5c1f3deb7a2a67e9 100644 --- a/python/paddle/fluid/tests/unittests/test_conv1d_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv1d_layer.py @@ -44,7 +44,7 @@ class Conv1dTestCase(unittest.TestCase): self.spartial_shape = spartial_shape self.filter_size = filter_size self.data_format = data_format - self.channel_last = (self.data_format == "NHWC") + self.channel_last = (self.data_format == "NLC") self.padding = padding self.padding_mode = padding_mode @@ -147,6 +147,14 @@ class Conv1dErrorTestCase(Conv1dTestCase): self.paddle_nn_layer() +class Conv1dTypeErrorTestCase(Conv1dTestCase): + def runTest(self): + place = fluid.CPUPlace() + with dg.guard(place): + with self.assertRaises(TypeError): + self.paddle_nn_layer() + + def add_cases(suite): suite.addTest(Conv1dTestCase(methodName='runTest')) suite.addTest(Conv1dTestCase(methodName='runTest', stride=[1], dilation=2)) @@ -161,6 +169,7 @@ def add_cases(suite): Conv1dTestCase( methodName='runTest', padding=2, data_format='NLC')) suite.addTest(Conv1dTestCase(methodName='runTest', padding=[1])) + suite.addTest(Conv1dTestCase(methodName='runTest', padding=[1, 2])) suite.addTest(Conv1dTestCase(methodName='runTest', padding=2)) suite.addTest(Conv1dTestCase(methodName='runTest')) suite.addTest( @@ -178,7 +187,7 @@ def add_cases(suite): def add_error_cases(suite): suite.addTest( - Conv1dErrorTestCase( + Conv1dTypeErrorTestCase( methodName='runTest', padding_mode="reflect", padding="valid")) suite.addTest( Conv1dErrorTestCase( diff --git a/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py index 73227dd3610376d85fcfc70bb2653dfd927427fd..4c98aacd209dab8e5dc9e7744922a927700c4bb3 100644 --- a/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv1d_transpose_layer.py @@ -201,6 +201,7 @@ def add_cases(suite): ConvTranspose1dTestCase( methodName='runTest', data_format="NLC", stride=3, output_padding=2)) + suite.addTest(ConvTranspose1dTestCase(methodName='runTest', padding=[1, 2])) def add_error_cases(suite): diff --git a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py new file mode 100644 index 0000000000000000000000000000000000000000..0c2520038a82a0b9427b2cbe1d4010a1bc8e040c --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py @@ -0,0 +1,163 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Test cloud role maker.""" + +from __future__ import print_function +import os +import unittest +import paddle.fluid.generator as generator + +import time # temp for debug +import paddle.fluid as fluid +import numpy as np +import paddle +import paddle.fluid.core as core + + +class TestGeneratorSeed(unittest.TestCase): + """ + Test cases for cpu generator seed. + """ + + def test_gen_dropout_dygraph(self): + gen = paddle.manual_seed(12343) + + fluid.enable_dygraph() + + gen.manual_seed(111111111) + st = paddle.get_cuda_rng_state() + + x = fluid.layers.uniform_random( + [2, 10], dtype="float32", min=0.0, max=1.0) + x_again = fluid.layers.uniform_random( + [2, 10], dtype="float32", min=0.0, max=1.0) + x_third = fluid.layers.uniform_random( + [2, 10], dtype="float32", min=0.0, max=1.0) + print("x: {}".format(x.numpy())) + print("x_again: {}".format(x_again.numpy())) + x = x + x_again + x_third + y = fluid.layers.dropout(x, 0.5) + + paddle.set_cuda_rng_state(st) + + x1 = fluid.layers.uniform_random( + [2, 10], dtype="float32", min=0.0, max=1.0) + x1_again = fluid.layers.uniform_random( + [2, 10], dtype="float32", min=0.0, max=1.0) + x1_third = fluid.layers.uniform_random( + [2, 10], dtype="float32", min=0.0, max=1.0) + x1 = x1 + x1_again + x1_third + y1 = fluid.layers.dropout(x1, 0.5) + y_np = y.numpy() + y1_np = y1.numpy() + + if core.is_compiled_with_cuda(): + print(">>>>>>> dropout dygraph >>>>>>>") + self.assertTrue(np.allclose(y_np, y1_np)) + + def test_generator_gaussian_random_dygraph(self): + """Test Generator seed.""" + fluid.enable_dygraph() + + paddle.manual_seed(12312321111) + x = fluid.layers.gaussian_random([120], dtype="float32") + st1 = paddle.get_cuda_rng_state() + x1 = fluid.layers.gaussian_random([120], dtype="float32") + paddle.set_cuda_rng_state(st1) + x2 = fluid.layers.gaussian_random([120], dtype="float32") + paddle.manual_seed(12312321111) + x3 = fluid.layers.gaussian_random([120], dtype="float32") + x_np = x.numpy() + x1_np = x1.numpy() + x2_np = x2.numpy() + x3_np = x3.numpy() + + if core.is_compiled_with_cuda(): + print(">>>>>>> gaussian random dygraph >>>>>>>") + self.assertTrue(np.allclose(x1_np, x2_np)) + self.assertTrue(np.allclose(x_np, x3_np)) + + def test_generator_randint_dygraph(self): + """Test Generator seed.""" + + fluid.enable_dygraph() + + gen = paddle.manual_seed(12312321111) + x = paddle.randint(low=10, shape=[10], dtype="int32") + st1 = gen.get_state() + x1 = paddle.randint(low=10, shape=[10], dtype="int32") + gen.set_state(st1) + x2 = paddle.randint(low=10, shape=[10], dtype="int32") + paddle.manual_seed(12312321111) + x3 = paddle.randint(low=10, shape=[10], dtype="int32") + x_np = x.numpy() + x1_np = x1.numpy() + x2_np = x2.numpy() + x3_np = x3.numpy() + + if core.is_compiled_with_cuda(): + print(">>>>>>> randint dygraph >>>>>>>") + self.assertTrue(np.allclose(x1_np, x2_np)) + self.assertTrue(np.allclose(x_np, x3_np)) + + def test_gen_TruncatedNormal_initializer(self): + fluid.disable_dygraph() + + gen = paddle.manual_seed(123123143) + cur_state = paddle.get_cuda_rng_state() + + startup_program = fluid.Program() + train_program = fluid.Program() + with fluid.program_guard(train_program, startup_program): + # example 1: + # attr shape is a list which doesn't contain tensor Variable. + x = fluid.layers.uniform_random(shape=[2, 10]) + result_1 = fluid.layers.fc( + input=x, + size=10, + param_attr=fluid.initializer.TruncatedNormal( + loc=0.0, scale=2.0)) + result_2 = fluid.layers.fc( + input=x, + size=10, + param_attr=fluid.initializer.TruncatedNormal( + loc=0.0, scale=2.0)) + + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(startup_program) + out1 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + + paddle.manual_seed(123123143) + with fluid.program_guard(train_program, startup_program): + exe.run(startup_program) + out2 = exe.run(train_program, + feed={}, + fetch_list=[result_1, result_2]) + + out1_res1 = np.array(out1[0]) + out1_res2 = np.array(out1[1]) + out2_res1 = np.array(out2[0]) + out2_res2 = np.array(out2[1]) + + if core.is_compiled_with_cuda(): + print(">>>>>>> truncated normal static >>>>>>>") + self.assertTrue(np.allclose(out1_res1, out2_res1)) + self.assertTrue(np.allclose(out1_res2, out2_res2)) + self.assertTrue(not np.allclose(out1_res2, out1_res1)) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py b/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py new file mode 100755 index 0000000000000000000000000000000000000000..2a80e20d692c88497e7edccd6eca5509e3522871 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_deprecated_decorator.py @@ -0,0 +1,152 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import paddle +import numpy as np +import paddle +import paddle.fluid as fluid +from paddle.static import Program, program_guard +import unittest +import paddle.fluid.core as core +import sys + +LOWEST_WARNING_POSTION = 3 +ERROR_WARNING_POSTION = sys.maxsize + +# custom paddle version +paddle.version.major = '1' +paddle.version.minor = '8' +paddle.version.patch = '0' +paddle.version.rc = '0' +paddle.__version__ = '1.8.0' +paddle.version.full_version = '1.8.0' +print("current paddle version: ", paddle.__version__) + +paddle.disable_static() + + +def get_warning_index(api): + """ + Given an paddle API, return the index of the Warinng information in its doc string if exists; + If Warinng information doesn't exist, return the default ERROR_WARNING_POSTION, sys.maxsize. + + Args: + API (python object) + + Returns: + index (int): the index of the Warinng information in its doc string if exists. + """ + + doc_lst = api.__doc__.splitlines() + for idx, val in enumerate(doc_lst): + if val.startswith("Warning: ") and val.endswith( + " instead." + ) and "and will be removed in future versions." in val: + return idx + return ERROR_WARNING_POSTION + + +class TestDeprecatedDocorator(unittest.TestCase): + """ + tests for paddle's Deprecated Docorator. + test_fluid_data: test for old fluid.data API. + test_fluid_elementwise_mul: test for old fluid.layers.elementwise_xxx APIs. + test_new_multiply: test for new api, which should not insert warning information. + test_ops_elementwise_mul: test for C++ elementwise_mul op, which should not insert warning information. + """ + + def test_fluid_data(self): + """ + test old fluid elementwise_mul api, it should fire Warinng function, + which insert the Warinng info on top of API's doc string. + """ + # Initialization + x = fluid.data(name='x', shape=[3, 2, 1], dtype='float32') + + # expected + expected = LOWEST_WARNING_POSTION + + # captured + captured = get_warning_index(fluid.data) + + # testting + self.assertGreater(expected, captured) + + def test_fluid_elementwise_mul(self): + """ + test old fluid elementwise_mul api, it should trigger Warinng function, + which insert the Warinng info on top of API's doc string. + """ + + # Initialization + a = np.random.uniform(0.1, 1, [51, 76]).astype(np.float32) + b = np.random.uniform(0.1, 1, [51, 76]).astype(np.float32) + x = paddle.to_tensor(a) + y = paddle.to_tensor(b) + res = fluid.layers.elementwise_mul(x, y) + + # expected + expected = LOWEST_WARNING_POSTION + + # captured + captured = get_warning_index(fluid.layers.elementwise_mul) + + # testting + self.assertGreater(expected, captured) + + def test_new_multiply(self): + """ + Test for new multiply api, expected result should be False. + """ + + a = np.random.uniform(0.1, 1, [51, 76]).astype(np.float32) + b = np.random.uniform(0.1, 1, [51, 76]).astype(np.float32) + x = paddle.to_tensor(a) + y = paddle.to_tensor(b) + res = paddle.multiply(x, y) + + # expected + expected = LOWEST_WARNING_POSTION + + # captured + captured = get_warning_index(paddle.multiply) + + # testting + self.assertLess(expected, captured) + + def test_ops_elementwise_mul(self): + """ + Test for new C++ elementwise_op, expected result should be True, + because not matter what fluid.layers.elementwise_mul is deprecated. + """ + + a = np.random.uniform(0.1, 1, [51, 76]).astype(np.float32) + b = np.random.uniform(0.1, 1, [51, 76]).astype(np.float32) + x = paddle.to_tensor(a) + y = paddle.to_tensor(b) + res = core.ops.elementwise_mul(x, y) + + # expected + expected = LOWEST_WARNING_POSTION + + # captured + captured = get_warning_index(fluid.layers.elementwise_mul) + + # testting + self.assertGreater(expected, captured) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_directory_migration.py b/python/paddle/fluid/tests/unittests/test_directory_migration.py index 2919ec5e9ca97b1d59af46a54b2d702cb6de4a14..529fff158c55fc30248b9f5a88c8c615a8b55c79 100644 --- a/python/paddle/fluid/tests/unittests/test_directory_migration.py +++ b/python/paddle/fluid/tests/unittests/test_directory_migration.py @@ -43,7 +43,7 @@ class TestDirectory(unittest.TestCase): 'paddle.distributed.prepare_context', 'paddle.DataParallel', 'paddle.jit', 'paddle.jit.TracedLayer', 'paddle.jit.to_static', 'paddle.jit.ProgramTranslator', 'paddle.jit.TranslatedLayer', - 'paddle.jit.save', 'paddle.jit.load', 'paddle.jit.SaveLoadConfig', + 'paddle.jit.save', 'paddle.jit.load', 'paddle.SaveLoadConfig', 'paddle.NoamDecay', 'paddle.PiecewiseDecay', 'paddle.NaturalExpDecay', 'paddle.ExponentialDecay', 'paddle.InverseTimeDecay', 'paddle.PolynomialDecay', diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py index faff81fa84fb5fa66c9ff14f782d2301e3964672..f4d368b6b6f52f3071320eaffbeedc8d14d63d2e 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_base.py +++ b/python/paddle/fluid/tests/unittests/test_dist_base.py @@ -488,6 +488,50 @@ class TestParallelDyGraphRunnerBase(object): model.clear_gradients() return out_losses + def run_gpu_fleet_api_trainer(self, args): + import paddle.distributed.fleet as fleet + import paddle.distributed.fleet.base.role_maker as role_maker + # 1. enable dygraph + paddle.disable_static() + + # 2. init seed + seed = 90 + paddle.static.default_startup_program().random_seed = seed + paddle.static.default_main_program().random_seed = seed + np.random.seed(seed) + random.seed = seed + # get trainer id + args.trainer_id = paddle.distributed.get_rank() + + # 3. init parallel env + if args.update_method == "nccl2": + fleet.init(is_collective=True) + + # 4. train model + model, train_reader, opt = self.get_model() + if args.update_method == "nccl2": + opt = fleet.distributed_optimizer(opt) + model = fleet.distributed_model(model) + + out_losses = [] + for step_id, data in enumerate(train_reader()): + data = self._get_data(data, args) + if step_id == RUN_STEP: + break + loss = self.run_one_loop(model, opt, data) + out_losses.append(loss.numpy()) + + if args.update_method == "nccl2": + loss = model.scale_loss(loss) + + loss.backward() + if args.update_method == "nccl2": + model.apply_collective_grads() + + opt.step() + opt.clear_grad() + print_to_out(out_losses) + def runtime_main(test_class): parser = argparse.ArgumentParser(description='Run dist test.') @@ -687,7 +731,8 @@ class TestDistBase(unittest.TestCase): envs['COVERAGE_FILE'] = os.getenv('COVERAGE_FILE', '') cmd += " -m coverage run --branch -p" - cmd += " %s --role trainer --lr %f" % (model, self._lr) + cmd += " %s --role trainer --update_method local --lr %f" % (model, + self._lr) if batch_size != DEFAULT_BATCH_SIZE: cmd += " --batch_size %d" % batch_size @@ -850,6 +895,7 @@ class TestDistBase(unittest.TestCase): if self.__use_cuda: tr_cmd += " --use_cuda" env.update({ + "FLAGS_selected_gpus": "{}".format(0), "CUDA_VISIBLE_DEVICES": "{}".format(trainer_id % 2), "PADDLE_TRAINERS_NUM": "{}".format(trainer_num), "PADDLE_TRAINER_ID": "{}".format(trainer_id), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py index b506f179143412e2bdb5d9eda511d90a0a3eea6d..e2336caac1c07f555280b82ba8fcfa7e5ec7f5b8 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ctr.py @@ -156,40 +156,5 @@ class TestDistCtrHalfAsync2x2(TestFleetBase): "dist_fleet_ctr.py", delta=1e-5, check_error_log=True) -class TestDistCtrPsGpuPyreaderAsync2x2(TestFleetBase): - def _setup_config(self): - self._mode = "async" - self._reader = "pyreader" - - def check_with_place(self, - model_file, - delta=1e-3, - check_error_log=False, - need_envs={}): - required_envs = { - "PATH": os.getenv("PATH", ""), - "PYTHONPATH": os.getenv("PYTHONPATH", ""), - "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), - "FLAGS_rpc_deadline": "30000", # 5sec to fail fast - "http_proxy": "", - "FLAGS_communicator_send_queue_size": "2", - "FLAGS_communicator_max_merge_var_num": "2", - "CPU_NUM": "2", - "SAVE_MODEL": "1" - } - - required_envs.update(need_envs) - - if check_error_log: - required_envs["GLOG_v"] = "3" - required_envs["GLOG_logtostderr"] = "1" - - tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) - - def test_dist_train(self): - self.check_with_place( - "dist_fleet_ctr_ps_gpu.py", delta=1e-5, check_error_log=True) - - if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py index 0fe7c386c1eeb751f34cf681778132310c304d51..7d18e935f58b6588adbef913c10d3ad497f07b53 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_geo.py @@ -21,7 +21,7 @@ import paddle.fluid.incubate.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory from test_dist_fleet_base import TestFleetBase -from dist_simnet_bow import train_network +from dist_fleet_simnet_bow import train_network class TestDistGeoCtr_2x2(TestFleetBase): @@ -72,7 +72,7 @@ class TestGeoSgdTranspiler(unittest.TestCase): strategy = StrategyFactory.create_geo_strategy(5) - avg_cost, _, _ = train_network(batch_size, is_distribute, is_sparse) + avg_cost, _, _, _ = train_network(batch_size, is_distribute, is_sparse) optimizer = fluid.optimizer.SGD(0.1) optimizer = fleet.distributed_optimizer(optimizer, strategy) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_grad_clip.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_grad_clip.py index 46616f3dde486e61488d6852ca9efc37a066ab0b..3c68af474cf7cae96a9fa62688460f84123438f5 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_grad_clip.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_grad_clip.py @@ -21,7 +21,7 @@ import paddle.fluid.incubate.fleet.base.role_maker as role_maker from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.transpiler.distribute_transpiler import DistributeTranspilerConfig from test_dist_fleet_base import TestFleetBase -from dist_simnet_bow import train_network +from dist_fleet_simnet_bow import train_network @unittest.skip(reason="Skip unstable ut, add it after PR 22957 merged") @@ -44,7 +44,7 @@ class TestDistGeoClipByGlobalNormTranspiler(unittest.TestCase): strategy.geo_sgd_mode = True strategy.geo_sgd_need_push_nums = 5 - avg_cost, _, _ = train_network(batch_size, is_distribute, is_sparse) + avg_cost, _, _, _ = train_network(batch_size, is_distribute, is_sparse) fluid.clip.set_gradient_clip( clip=fluid.clip.GradientClipByGlobalNorm(2.0)) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py index c3ffd50dc8da16f4a19c8da5383fe7f763aa7a72..02a739c060cd2bd58ecec4d7dc65b65e8a3a35a7 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_ctr.py @@ -36,13 +36,45 @@ class TestDistHeterDatasetAsync2x2(TestFleetHeterBase): "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), "FLAGS_rpc_deadline": "5000", # 5sec to fail fast "http_proxy": "", - "CPU_NUM": "1" + "CPU_NUM": "3" } required_envs.update(need_envs) if check_error_log: - required_envs["GLOG_v"] = "4" + required_envs["GLOG_v"] = "3" + required_envs["GLOG_logtostderr"] = "1" + + tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) + + def test_dist_train(self): + self.check_with_place( + "dist_fleet_heter_ctr.py", delta=1e-5, check_error_log=True) + + +class TestDistHeterPyreaderAsync2x2(TestFleetHeterBase): + def _setup_config(self): + self._mode = "async" + self._reader = "pyreader" + + def check_with_place(self, + model_file, + delta=1e-3, + check_error_log=False, + need_envs={}): + required_envs = { + "PATH": os.getenv("PATH", ""), + "PYTHONPATH": os.getenv("PYTHONPATH", ""), + "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), + "FLAGS_rpc_deadline": "5000", # 5sec to fail fast + "http_proxy": "", + "CPU_NUM": "3" + } + + required_envs.update(need_envs) + + if check_error_log: + required_envs["GLOG_v"] = "3" required_envs["GLOG_logtostderr"] = "1" tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py new file mode 100644 index 0000000000000000000000000000000000000000..ec34993905e3cfc4603ac48987a690b7fa8a5439 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_simnet.py @@ -0,0 +1,56 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import unittest +import tempfile +from test_dist_fleet_base import TestFleetBase + + +class TestDistSimnetASync2x2(TestFleetBase): + def _setup_config(self): + self._mode = "async" + self._reader = "pyreader" + + def check_with_place(self, + model_file, + delta=1e-3, + check_error_log=False, + need_envs={}): + required_envs = { + "PATH": os.getenv("PATH", ""), + "PYTHONPATH": os.getenv("PYTHONPATH", ""), + "LD_LIBRARY_PATH": os.getenv("LD_LIBRARY_PATH", ""), + "FLAGS_rpc_deadline": "5000", # 5sec to fail fast + "http_proxy": "", + "CPU_NUM": "2" + } + + required_envs.update(need_envs) + + if check_error_log: + required_envs["GLOG_v"] = "3" + required_envs["GLOG_logtostderr"] = "1" + + tr0_losses, tr1_losses = self._run_cluster(model_file, required_envs) + + def test_dist_train(self): + self.check_with_place( + "dist_fleet_simnet_bow.py", delta=1e-5, check_error_log=True) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_dist_simnet_bow.py b/python/paddle/fluid/tests/unittests/test_dist_simnet_bow.py deleted file mode 100644 index 3189f092413c1f6f1526a5ca66b27f91c95082b1..0000000000000000000000000000000000000000 --- a/python/paddle/fluid/tests/unittests/test_dist_simnet_bow.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from __future__ import print_function - -import os -import unittest - -from test_dist_base import TestDistBase - -import os -flag_name = os.path.splitext(__file__)[0] - - -class TestDistSimnetBowDense2x2(TestDistBase): - def _setup_config(self): - self._sync_mode = True - self._enforce_place = "CPU" - - def test_simnet_bow(self): - need_envs = { - "IS_DISTRIBUTED": '0', - "IS_SPARSE": '0', - 'IS_SELF_CONTAINED_LR': '1' - } - self.check_with_place( - "dist_simnet_bow.py", - delta=1e-5, - check_error_log=True, - need_envs=need_envs, - log_name=flag_name) - - -class TestDistSimnetBow2x2DenseAsync(TestDistBase): - def _setup_config(self): - self._sync_mode = False - self._enforce_place = "CPU" - - # FIXME(typhoonzero): fix async tests later - def notest_simnet_bow(self): - need_envs = { - "IS_DISTRIBUTED": '0', - "IS_SPARSE": '0', - 'IS_SELF_CONTAINED_LR': '1', - } - self.check_with_place( - "dist_simnet_bow.py", - delta=100, - check_error_log=True, - need_envs=need_envs, - log_name=flag_name) - - -class TestDistSimnetBowSparse2x2(TestDistBase): - def _setup_config(self): - self._sync_mode = True - self._enforce_place = "CPU" - - def test_simnet_bow(self): - need_envs = { - "IS_DISTRIBUTED": '0', - "IS_SPARSE": '1', - 'IS_SELF_CONTAINED_LR': '1' - } - self.check_with_place( - "dist_simnet_bow.py", - delta=1e-5, - check_error_log=True, - need_envs=need_envs, - log_name=flag_name) - - -class TestDistSimnetBow2x2SparseAsync(TestDistBase): - def _setup_config(self): - self._sync_mode = False - self._enforce_place = "CPU" - - def test_simnet_bow(self): - need_envs = { - "IS_DISTRIBUTED": '0', - "IS_SPARSE": '1', - 'IS_SELF_CONTAINED_LR': '1' - } - self.check_with_place( - "dist_simnet_bow.py", - delta=100, - check_error_log=True, - need_envs=need_envs, - log_name=flag_name) - - -# FIXME(tangwei): Learningrate variable is not created on pserver. -class TestDistSimnetBow2x2LookupTableSync(TestDistBase): - def _setup_config(self): - self._sync_mode = True - self._enforce_place = "CPU" - - def test_simnet_bow(self): - need_envs = { - "IS_DISTRIBUTED": '0', - "IS_SPARSE": '1', - 'IS_SELF_CONTAINED_LR': '1' - } - self.check_with_place( - "dist_simnet_bow.py", - delta=1e-5, - check_error_log=True, - need_envs=need_envs, - log_name=flag_name) - - -class TestDistSimnetBow2x2LookupTableAsync(TestDistBase): - def _setup_config(self): - self._sync_mode = False - self._enforce_place = "CPU" - - def test_simnet_bow(self): - need_envs = { - "IS_DISTRIBUTED": '0', - "IS_SPARSE": '1', - 'IS_SELF_CONTAINED_LR': '1' - } - self.check_with_place( - "dist_simnet_bow.py", - delta=100, - check_error_log=True, - need_envs=need_envs, - log_name=flag_name) - - -class TestDistSimnetBow2x2LookupTableNotContainLRSync(TestDistBase): - def _setup_config(self): - self._sync_mode = True - self._enforce_place = "CPU" - - def test_simnet_bow(self): - need_envs = { - "IS_DISTRIBUTED": '0', - "IS_SPARSE": '1', - 'IS_SELF_CONTAINED_LR': '0' - } - self.check_with_place( - "dist_simnet_bow.py", - delta=1e-5, - check_error_log=True, - need_envs=need_envs, - log_name=flag_name) - - -if __name__ == "__main__": - unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_distribution.py b/python/paddle/fluid/tests/unittests/test_distribution.py index 533ad9604cf0d879371796fb197e61e931fb479f..47a1c407230527d53327ba57d7b5d7a979bd7d49 100644 --- a/python/paddle/fluid/tests/unittests/test_distribution.py +++ b/python/paddle/fluid/tests/unittests/test_distribution.py @@ -40,8 +40,11 @@ class DistributionNumpy(): class UniformNumpy(DistributionNumpy): def __init__(self, low, high): - self.low = np.array(low).astype('float32') - self.high = np.array(high).astype('float32') + self.low = np.array(low) + self.high = np.array(high) + if str(self.low.dtype) not in ['float32', 'float64']: + self.low = self.low.astype('float32') + self.high = self.high.astype('float32') def sample(self, shape): shape = tuple(shape) + (self.low + self.high).shape @@ -49,13 +52,13 @@ class UniformNumpy(DistributionNumpy): (self.high - self.low)) def log_prob(self, value): - lb = np.less(self.low, value).astype('float32') - ub = np.less(value, self.high).astype('float32') + lb = np.less(self.low, value).astype(self.low.dtype) + ub = np.less(value, self.high).astype(self.low.dtype) return np.log(lb * ub) - np.log(self.high - self.low) def probs(self, value): - lb = np.less(self.low, value).astype('float32') - ub = np.less(value, self.high).astype('float32') + lb = np.less(self.low, value).astype(self.low.dtype) + ub = np.less(value, self.high).astype(self.low.dtype) return (lb * ub) / (self.high - self.low) def entropy(self): @@ -64,8 +67,11 @@ class UniformNumpy(DistributionNumpy): class NormalNumpy(DistributionNumpy): def __init__(self, loc, scale): - self.loc = np.array(loc).astype('float32') - self.scale = np.array(scale).astype('float32') + self.loc = np.array(loc) + self.scale = np.array(scale) + if str(self.loc.dtype) not in ['float32', 'float64']: + self.loc = self.loc.astype('float32') + self.scale = self.scale.astype('float32') def sample(self, shape): shape = tuple(shape) + (self.loc + self.scale).shape @@ -83,8 +89,8 @@ class NormalNumpy(DistributionNumpy): (2. * var)) / (math.sqrt(2 * math.pi) * self.scale) def entropy(self): - return 0.5 + 0.5 * np.log(np.array(2. * math.pi).astype( - 'float32')) + np.log(self.scale) + return 0.5 + 0.5 * np.log( + np.array(2. * math.pi).astype(self.loc.dtype)) + np.log(self.scale) def kl_divergence(self, other): var_ratio = (self.scale / other.scale) @@ -94,724 +100,571 @@ class NormalNumpy(DistributionNumpy): return 0.5 * (var_ratio + t1 - 1 - np.log(var_ratio)) -class DistributionTest(unittest.TestCase): - def setUp(self, use_gpu=False): +class UniformTest(unittest.TestCase): + def setUp(self, use_gpu=False, batch_size=5, dims=6): self.use_gpu = use_gpu if not use_gpu: - place = fluid.CPUPlace() + self.place = fluid.CPUPlace() self.gpu_id = -1 else: - place = fluid.CUDAPlace(0) + self.place = fluid.CUDAPlace(0) self.gpu_id = 0 - self.executor = fluid.Executor(place) - - def build_normal_common_net(self, batch_size, dims, sample_shape, loc_float, - scale_float, other_loc_float, other_scale_float, - scale_np, other_scale_np, loc_np, other_loc_np, - loc, scale, other_loc, other_scale, values): - """Generate Normal object and get the output of its methods including - ``sample``, ``entropy``, ``log_prob``, ``probs`` and ``kl_divergence``. - Parameters ``loc`` and ``scale`` have different data types to test different situations. - - Args: - batch_size(int): The first dimension of the shape of parameters(loc and scale). - dims(int): The second dimension of the shape of parameters. - sample_shape(int): The sample value used in ``sample`` method. - loc_float(float): Generated in function ``get_normal_random_input``, loc is a float number. - scale_float(float): Generated in function ``get_normal_random_input``, scale is a float number. - other_loc_float(float): Generated in function ``get_normal_random_input``, other_loc is a - float number. It is the first parameter in another Normal object used in ``kl_divergence`` - method. - other_scale_float(float): Generated in function ``get_normal_random_input``, other_scale is a - float number. It is the second parameter in another Normal object used in ``kl_divergence`` - method. - scale_np(numpy.ndarray): Generated in function ``get_normal_random_input``, An numpy array - whose shape is [batch_size, dims]. - other_scale_np(numpy.ndarray): Generated in function ``get_normal_random_input``, other_scale_np - is an numpy array. It is the second parameter in another Normal object used in ``kl_divergence`` - method. - loc_np(numpy.ndarray): Generated in function ``get_normal_random_input``, An numpy array - whose shape is [batch_size, dims]. - other_loc_np(numpy.ndarray): Generated in function ``get_normal_random_input``, other_loc_np - is an numpy array. It is the first parameter in another Normal object used in ``kl_divergence`` - method. - loc(Tensor): In dynamic mode, loc is generated in ``build_normal_dygraph``, it's a Tensor filled - with ``loc_np`` data. In static mode, loc is generated in ``build_normal_static``, ``layers.data`` - method is used to get a Placeholder whose shape is [dims]. - scale(Tensor): In dynamic mode, scale is generated in ``build_normal_dygraph``, it's a Tensor filled - with ``scale_np`` data. In static mode, scale is generated in ``build_normal_static``, ``layers.data`` - method is used to get a Placeholder whose shape is [dims]. - other_loc(Tensor): In dynamic mode, other_loc is generated in ``build_normal_dygraph``, it's a Tensor - filled with ``other_loc_np`` data. In static mode, other_loc is generated in ``build_normal_static``, - ``layers.data`` method is used to get a Placeholder whose shape is [dims]. It is the first parameter - in another Normal object used in ``kl_divergence`` method. - other_scale(Tensor): In dynamic mode, other_scale is generated in ``build_normal_dygraph``, it's a Tensor - filled with ``other_scale_np`` data. In static mode, other_scale is generated in ``build_normal_static``, - ``layers.data`` method is used to get a Placeholder whose shape is [dims]. It is the second parameter - in another Normal object used in ``kl_divergence`` method. - values(Tensor): In dynamic mode, values is generated in ``build_normal_dygraph``, it's a Tensor filled with - ``values_np`` data. In static mode, values is generated in ``build_normal_static``, ``layers.data`` - method is used to get a Placeholder whose shape is [dims]. - - Returns: - List: The elements of the list are the output of sample, entropy, log_prob, probs, kl_divergence methods. - The inputs' type of these methods can be float, np.ndarray and Tensor. And broadcast will be considered. - - """ - normal_int = Normal(int(loc_float), int(scale_float)) - normal_float = Normal(loc_float, scale_float) - other_normal_float = Normal(other_loc_float, other_scale_float) - - normal_float_np_broadcast = Normal(loc_float, scale_np) - other_normal_float_np_broadcast = Normal(other_loc_float, - other_scale_np) - - normal_np = Normal(loc_np, scale_np) - other_normal_np = Normal(other_loc_np, other_scale_np) - - normal_variable = Normal(loc, scale) - other_normal_variable = Normal(other_loc, other_scale) - - sample_int = normal_int.sample([batch_size, dims]) - sample_float = normal_float.sample([batch_size, dims]) - sample_float_np_broadcast = normal_float_np_broadcast.sample( - [batch_size, dims]) - sample_np = normal_np.sample([batch_size, dims]) - sample_variable = normal_variable.sample([batch_size, dims]) - - sample_int_diff = normal_int.sample([sample_shape]) - sample_float_diff = normal_float.sample([sample_shape]) - sample_float_np_broadcast_diff = normal_float_np_broadcast.sample( - [sample_shape]) - sample_np_diff = normal_np.sample([sample_shape]) - sample_variable_diff = normal_variable.sample([sample_shape]) - - entropy_int = normal_int.entropy() - entropy_float = normal_float.entropy() - entropy_float_np_broadcast = normal_float_np_broadcast.entropy() - entropy_np = normal_np.entropy() - entropy_variable = normal_variable.entropy() - - lp_float_np_broadcast = normal_float_np_broadcast.log_prob(values) - lp_np = normal_np.log_prob(values) - lp_variable = normal_variable.log_prob(values) - - p_float_np_broadcast = normal_float_np_broadcast.probs(values) - p_np = normal_np.probs(values) - p_variable = normal_variable.probs(values) - - kl_float = normal_float.kl_divergence(other_normal_float) - kl_float_np_broadcast = normal_float_np_broadcast.kl_divergence( - other_normal_float_np_broadcast) - kl_np = normal_np.kl_divergence(other_normal_np) - kl_variable = normal_variable.kl_divergence(other_normal_variable) - - fetch_list = [ - sample_int, sample_float, sample_float_np_broadcast, sample_np, - sample_variable, sample_int_diff, sample_float_diff, - sample_float_np_broadcast_diff, sample_np_diff, - sample_variable_diff, entropy_int, entropy_float, - entropy_float_np_broadcast, entropy_np, entropy_variable, - lp_float_np_broadcast, lp_np, lp_variable, p_float_np_broadcast, - p_np, p_variable, kl_float, kl_float_np_broadcast, kl_np, - kl_variable - ] - return fetch_list - - def build_normal_static(self, test_program, batch_size, dims, sample_shape, - loc_float, scale_float, other_loc_float, - other_scale_float, scale_np, other_scale_np, loc_np, - other_loc_np, values_np): - """ - In static mode, generate feed data of Normal network, and get output fetch_list using - ``build_normal_common_net``. - - Args: - test_program: In static mode, the Program object. - other args can refer to function ``build_normal_common_net``. - - Returns: - feed_vars: The feed data of Normal network in static mode. - fetch_list: The output is generated by function ``build_normal_common_net``. - """ - with fluid.program_guard(test_program): - loc = layers.data(name='loc', shape=[dims], dtype='float32') - scale = layers.data(name='scale', shape=[dims], dtype='float32') - - other_loc = layers.data( - name='other_loc', shape=[dims], dtype='float32') - other_scale = layers.data( - name='other_scale', shape=[dims], dtype='float32') - values = layers.data(name='values', shape=[dims], dtype='float32') + self.init_numpy_data(batch_size, dims) - fetch_list = self.build_normal_common_net( - batch_size, dims, sample_shape, loc_float, scale_float, - other_loc_float, other_scale_float, scale_np, other_scale_np, - loc_np, other_loc_np, loc, scale, other_loc, other_scale, - values) + paddle.disable_static(self.place) + self.init_dynamic_data(batch_size, dims) - feed_vars = { - 'loc': loc_np, - 'scale': scale_np, - 'other_loc': other_loc_np, - 'other_scale': other_scale_np, - 'values': values_np - } - return feed_vars, fetch_list - - def build_normal_dygraph(self, batch_size, dims, sample_shape, loc_float, - scale_float, other_loc_float, other_scale_float, - scale_np, other_scale_np, loc_np, other_loc_np, - values_np): - """ - In dynamic mode, generate input data of Normal network, and get output fetch_list using - ``build_normal_common_net``. - - Args: - refer to function ``build_normal_common_net``. - - Returns: - fetch_list_numpy: The output is generated by function ``build_normal_common_net``. Transform - these tensor to numpy.ndarray. - """ - loc = paddle.to_tensor(loc_np) - scale = paddle.to_tensor(scale_np) - other_loc = paddle.to_tensor(other_loc_np) - other_scale = paddle.to_tensor(other_scale_np) - values = paddle.to_tensor(values_np) - - fetch_list = self.build_normal_common_net( - batch_size, dims, sample_shape, loc_float, scale_float, - other_loc_float, other_scale_float, scale_np, other_scale_np, - loc_np, other_loc_np, loc, scale, other_loc, other_scale, values) - fetch_list_numpy = [t.numpy() for t in fetch_list] - return fetch_list_numpy - - def get_normal_random_input(self, batch_size, dims): - """ - Generate input data ``loc`` and ``scale`` used in Normal network. - - Args: - refer to function ``build_normal_common_net``. - - Returns: - List: Different data type of ``loc`` and ``scale``, including float, numpy.ndarray. - By the way, ``other_loc`` and ``other_scale`` are used in ``kl_divergence`` method. - refer to ``args`` in function ``build_normal_common_net``. - """ - loc_np = np.random.randn(batch_size, dims).astype('float32') - other_loc_np = np.random.randn(batch_size, dims).astype('float32') - - loc_float = (np.random.ranf() - 0.5) * 4 - scale_float = (np.random.ranf() - 0.5) * 4 - while scale_float < 0: - scale_float = (np.random.ranf() - 0.5) * 4 - - other_loc_float = (np.random.ranf() - 0.5) * 4 - other_scale_float = (np.random.ranf() - 0.5) * 4 - while other_scale_float < 0: - other_scale_float = (np.random.ranf() - 0.5) * 4 - - scale_np = np.random.randn(batch_size, dims).astype('float32') - other_scale_np = np.random.randn(batch_size, dims).astype('float32') - values_np = np.random.randn(batch_size, dims).astype('float32') - - while not np.all(scale_np > 0): - scale_np = np.random.randn(batch_size, dims).astype('float32') - while not np.all(other_scale_np > 0): - other_scale_np = np.random.randn(batch_size, dims).astype('float32') - return [ - loc_np, other_loc_np, loc_float, scale_float, other_loc_float, - other_scale_float, scale_np, other_scale_np, values_np - ] - - def compare_normal_with_numpy(self, - data_list, - output_list, - batch_size=2, - dims=3, - sample_shape=7, - tolerance=1e-6): - """ - Compare the outputs of Normal's methods in paddle and numpy. If the outputs are not consistent, - raise errors. - - Args: - data_list: Input data generated by function ``get_normal_random_input``. - output_list: The outputs of Normal's methods in static or dynamic mode. - batch_size(int): The first dimension of the shape of parameters(loc and scale). - dims(int): The second dimension of the shape of parameters. - sample_shape(int): The sample value used in ``sample`` method. - tolerance(float): The tolerance of the error. - """ - loc_np, other_loc_np, loc_float, scale_float, other_loc_float, other_scale_float, scale_np, other_scale_np, values_np = data_list - - np_normal_int = NormalNumpy(int(loc_float), int(scale_float)) - np_normal_float = NormalNumpy(loc_float, scale_float) - np_other_normal_float = NormalNumpy(other_loc_float, other_scale_float) - np_normal_float_np_broadcast = NormalNumpy(loc_float, scale_np) - np_other_normal_float_np_broadcast = NormalNumpy(other_loc_float, - other_scale_np) - np_normal = NormalNumpy(loc_np, scale_np) - np_other_normal = NormalNumpy(other_loc_np, other_scale_np) - - gt_sample_int = np_normal_int.sample([batch_size, dims]) - gt_sample_float = np_normal_float.sample([batch_size, dims]) - gt_sample_float_np_broadcast = np_normal_float_np_broadcast.sample( - [batch_size, dims]) - gt_sample_np = np_normal.sample([batch_size, dims]) - - gt_sample_int_diff = np_normal_int.sample([sample_shape]) - gt_sample_float_diff = np_normal_float.sample([sample_shape]) - gt_sample_float_np_broadcast_diff = np_normal_float_np_broadcast.sample( - [sample_shape]) - gt_sample_np_diff = np_normal.sample([sample_shape]) - - gt_entropy_int = np_normal_int.entropy() - gt_entropy_float = np_normal_float.entropy() - gt_entropy_float_np_broadcast = np_normal_float_np_broadcast.entropy() - gt_entropy = np_normal.entropy() - gt_lp_float_np_broadcast = np_normal_float_np_broadcast.log_prob( - values_np) - gt_lp = np_normal.log_prob(values_np) - gt_p_float_np_broadcast = np_normal_float_np_broadcast.probs(values_np) - gt_p = np_normal.probs(values_np) - gt_kl_float = np_normal_float.kl_divergence(np_other_normal_float) - gt_kl_float_np_broadcast = np_normal_float_np_broadcast.kl_divergence( - np_other_normal_float_np_broadcast) - gt_kl = np_normal.kl_divergence(np_other_normal) - - [ - output_sample_int, output_sample_float, - output_sample_float_np_broadcast, output_sample_np, - output_sample_variable, output_sample_int_diff, - output_sample_float_diff, output_sample_float_np_broadcast_diff, - output_sample_np_diff, output_sample_variable_diff, - output_entropy_int, output_entropy_float, - output_entropy_float_np_broadcast, output_entropy_np, - output_entropy_variable, output_lp_float_np_broadcast, output_lp_np, - output_lp_variable, output_p_float_np_broadcast, output_p_np, - output_p_variable, output_kl_float, output_kl_float_np_broadcast, - output_kl_np, output_kl_variable - ] = output_list - - np.testing.assert_equal(output_sample_int.shape, gt_sample_int.shape) - np.testing.assert_equal(output_sample_float.shape, - gt_sample_float.shape) - np.testing.assert_equal(output_sample_float_np_broadcast.shape, - gt_sample_float_np_broadcast.shape) - np.testing.assert_equal(output_sample_np.shape, gt_sample_np.shape) - np.testing.assert_equal(output_sample_variable.shape, - gt_sample_np.shape) - np.testing.assert_equal(output_sample_int_diff.shape, - gt_sample_int_diff.shape) - np.testing.assert_equal(output_sample_float_diff.shape, - gt_sample_float_diff.shape) - np.testing.assert_equal(output_sample_float_np_broadcast_diff.shape, - gt_sample_float_np_broadcast_diff.shape) - np.testing.assert_equal(output_sample_np_diff.shape, - gt_sample_np_diff.shape) - np.testing.assert_equal(output_sample_variable_diff.shape, - gt_sample_np_diff.shape) - np.testing.assert_allclose( - output_entropy_int, gt_entropy_int, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_entropy_float, - gt_entropy_float, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_entropy_float_np_broadcast, - gt_entropy_float_np_broadcast, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_entropy_np, gt_entropy, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_entropy_variable, gt_entropy, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_lp_float_np_broadcast, - gt_lp_float_np_broadcast, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_lp_np, gt_lp, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_lp_variable, gt_lp, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_p_float_np_broadcast, - gt_p_float_np_broadcast, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_p_np, gt_p, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_p_variable, gt_p, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_kl_float, gt_kl_float, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_kl_float_np_broadcast, - gt_kl_float_np_broadcast, - rtol=tolerance, - atol=tolerance) + paddle.enable_static() + self.test_program = fluid.Program() + self.executor = fluid.Executor(self.place) + self.init_static_data(batch_size, dims) + + def init_numpy_data(self, batch_size, dims): + # low ans high are 'float' + self.low_np = np.random.uniform(-2, 1) + self.high_np = np.random.uniform(1, 3) + self.values_np = np.array([1.0]).astype('float32') + + def init_dynamic_data(self, batch_size, dims): + self.dynamic_low = self.low_np + self.dynamic_high = self.high_np + self.dynamic_values = paddle.to_tensor(self.values_np) + + def init_static_data(self, batch_size, dims): + self.static_low = self.low_np + self.static_high = self.high_np + with fluid.program_guard(self.test_program): + self.static_values = layers.data( + name='values', shape=[], dtype='float32') + + def compare_with_numpy(self, fetch_list, sample_shape=7, tolerance=1e-6): + sample, entropy, log_prob, probs = fetch_list + + np_uniform = UniformNumpy(self.low_np, self.high_np) + np_sample = np_uniform.sample([sample_shape]) + np_entropy = np_uniform.entropy() + np_lp = np_uniform.log_prob(self.values_np) + np_p = np_uniform.probs(self.values_np) + + np.testing.assert_equal(sample.shape, np_sample.shape) np.testing.assert_allclose( - output_kl_np, gt_kl, rtol=tolerance, atol=tolerance) + entropy, np_entropy, rtol=tolerance, atol=tolerance) np.testing.assert_allclose( - output_kl_variable, gt_kl, rtol=tolerance, atol=tolerance) - - def test_normal_distribution_static(self, - batch_size=2, - dims=3, - sample_shape=7, - tolerance=1e-6): - """ - Test Normal's methods in static mode. - - Args: - refer to ``compare_normal_with_numpy`` function. - """ - test_program = fluid.Program() - data_list = self.get_normal_random_input(batch_size, dims) - loc_np, other_loc_np, loc_float, scale_float, other_loc_float, other_scale_float, scale_np, other_scale_np, values_np = data_list - - feed_vars, fetch_list = self.build_normal_static( - test_program, batch_size, dims, sample_shape, loc_float, - scale_float, other_loc_float, other_scale_float, scale_np, - other_scale_np, loc_np, other_loc_np, values_np) - self.executor.run(fluid.default_startup_program()) + log_prob, np_lp, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose(probs, np_p, rtol=tolerance, atol=tolerance) - output_list = self.executor.run(program=test_program, - feed=feed_vars, - fetch_list=fetch_list) - - self.compare_normal_with_numpy(data_list, output_list, batch_size, dims, - sample_shape, tolerance) - - def test_normal_distribution_dygraph(self, - batch_size=2, - dims=3, - sample_shape=7, - tolerance=1e-6): - """ - Test Normal's methods in dynamic mode. - - Args: - refer to ``compare_normal_with_numpy`` function. - """ - paddle.disable_static() - data_list = self.get_normal_random_input(batch_size, dims) - loc_np, other_loc_np, loc_float, scale_float, other_loc_float, other_scale_float, scale_np, other_scale_np, values_np = data_list - - output_list = self.build_normal_dygraph( - batch_size, dims, sample_shape, loc_float, scale_float, - other_loc_float, other_scale_float, scale_np, other_scale_np, - loc_np, other_loc_np, values_np) - - self.compare_normal_with_numpy(data_list, output_list, batch_size, dims, - sample_shape, tolerance) + def test_uniform_distribution_dygraph(self, sample_shape=7, tolerance=1e-6): + paddle.disable_static(self.place) + uniform = Uniform(self.dynamic_low, self.dynamic_high) + sample = uniform.sample([sample_shape]).numpy() + entropy = uniform.entropy().numpy() + log_prob = uniform.log_prob(self.dynamic_values).numpy() + probs = uniform.probs(self.dynamic_values).numpy() + fetch_list = [sample, entropy, log_prob, probs] + + self.compare_with_numpy(fetch_list) + + def test_uniform_distribution_static(self, sample_shape=7, tolerance=1e-6): paddle.enable_static() + with fluid.program_guard(self.test_program): + uniform = Uniform(self.static_low, self.static_high) + sample = uniform.sample([sample_shape]) + entropy = uniform.entropy() + log_prob = uniform.log_prob(self.static_values) + probs = uniform.probs(self.static_values) + fetch_list = [sample, entropy, log_prob, probs] - def build_uniform_common_net(self, batch_size, dims, sample_shape, - low_float, high_float, high_np, low_np, - values_np, low, high, values): - """Generate Uniform object and get the output of its methods including ``sample``, ``entropy``, - ``log_prob`` and ``probs``. - Parameters ``low`` and ``high`` have different data types to test different situations. - - Args: - batch_size(int): The first dimension of the shape of parameters(low and high). - dims(int): The second dimension of the shape of parameters. - sample_shape(int): The sample value used in ``sample`` method. - low_float(float): Parameter ``low`` is a float number. - high_float(float): Parameter ``high`` is a float number. - high_np(numpy.ndarray): An numpy array whose shape is [batch_size, dims]. - low_np(numpy.ndarray): An numpy array whose shape is [batch_size, dims]. - values_np(numpy.ndarray): The input of ``log_prob`` and ``probs`` methods. An numpy array whose - shape is [batch_size, dims]. - low(Tensor): In dynamic mode, low is generated in ``build_uniform_dygraph``, it's a Tensor filled - with ``low_np`` data. In static mode, low is generated in ``build_uniform_static``. - high(Tensor): In dynamic mode, high is generated in ``build_uniform_dygraph``, it's a Tensor filled - with ``high_np`` data. In static mode, high is generated in ``build_uniform_static``. - values(Tensor): In dynamic mode, values is generated in ``build_uniform_dygraph``, it's a Tensor - filled with ``values_np`` data. In static mode, values is generated in ``build_uniform_static``. - - Returns: - List: The elements of the list are the output of sample, entropy, log_prob, probs methods. - The inputs' type of these methods can be float, np.ndarray and Tensor. And broadcast will be - considered. - - """ - uniform_int = Uniform(int(low_float), int(high_float)) - uniform_float = Uniform(low_float, high_float) - uniform_float_np_broadcast = Uniform(low_float, high_np) - uniform_np = Uniform(low_np, high_np) - uniform_variable = Uniform(low, high) - - sample_int = uniform_int.sample([batch_size, dims]) - sample_float = uniform_float.sample([batch_size, dims]) - sample_float_np_broadcast = uniform_float_np_broadcast.sample( - [batch_size, dims]) - sample_np = uniform_np.sample([batch_size, dims]) - sample_variable = uniform_variable.sample([batch_size, dims]) - - sample_int_diff = uniform_int.sample([sample_shape]) - sample_float_diff = uniform_float.sample([sample_shape]) - sample_float_np_broadcast_diff = uniform_float_np_broadcast.sample( - [sample_shape]) - sample_np_diff = uniform_np.sample([sample_shape]) - sample_variable_diff = uniform_variable.sample([sample_shape]) - - entropy_int = uniform_int.entropy() - entropy_float = uniform_float.entropy() - entropy_float_np_broadcast = uniform_float_np_broadcast.entropy() - entropy_np = uniform_np.entropy() - entropy_variable = uniform_variable.entropy() - - lp_float_np_broadcast = uniform_float_np_broadcast.log_prob(values) - lp_np = uniform_np.log_prob(values) - lp_variable = uniform_variable.log_prob(values) - - p_float_np_broadcast = uniform_float_np_broadcast.probs(values) - p_np = uniform_np.probs(values) - p_variable = uniform_variable.probs(values) - - fetch_list = [ - sample_int, sample_float, sample_float_np_broadcast, sample_np, - sample_variable, sample_int_diff, sample_float_diff, - sample_float_np_broadcast_diff, sample_np_diff, - sample_variable_diff, entropy_int, entropy_float, - entropy_float_np_broadcast, entropy_np, entropy_variable, - lp_float_np_broadcast, lp_np, lp_variable, p_float_np_broadcast, - p_np, p_variable - ] - return fetch_list - - def build_uniform_static(self, test_program, batch_size, dims, sample_shape, - low_float, high_float, high_np, low_np, values_np): - """ - In static mode, generate feed data of Uniform network, and get output fetch_list using - ``build_uniform_common_net``. - - Args: - test_program: In static mode, the Program object. - other args can refer to function ``build_uniform_common_net``. - - Returns: - feed_vars: The feed data of Uniform network in static mode. - fetch_list: The output is generated by function ``build_uniform_common_net``. - """ - with fluid.program_guard(test_program): - low = layers.data(name='low', shape=[dims], dtype='float32') - high = layers.data(name='high', shape=[dims], dtype='float32') - - values = layers.data(name='values', shape=[dims], dtype='float32') - - fetch_list = self.build_uniform_common_net( - batch_size, dims, sample_shape, low_float, high_float, high_np, - low_np, values_np, low, high, values) - - feed_vars = {'low': low_np, 'high': high_np, 'values': values_np} - return feed_vars, fetch_list - - def build_uniform_dygraph(self, batch_size, dims, sample_shape, low_float, - high_float, high_np, low_np, values_np): - """ - In dynamic mode, generate input data of Uniform network, and get output fetch_list using - ``build_uniform_common_net``. - - Args: - refer to function ``build_uniform_common_net``. - - Returns: - fetch_list_numpy: The output is generated by function ``build_uniform_common_net``. Transform - these tensor to numpy.ndarray. - """ - low = paddle.to_tensor(low_np) - high = paddle.to_tensor(high_np) - values = paddle.to_tensor(values_np) - - fetch_list = self.build_uniform_common_net( - batch_size, dims, sample_shape, low_float, high_float, high_np, - low_np, values_np, low, high, values) - fetch_list_numpy = [t.numpy() for t in fetch_list] - return fetch_list_numpy - - def compare_uniform_with_numpy(self, - data_list, - output_list, - batch_size=2, - dims=3, - sample_shape=7, - tolerance=1e-6): - """ - Compare the outputs of Uniform's methods in paddle and numpy. If the outputs are not consistent, - raise errors. - - Args: - data_list: Input data including float and numpy.ndarray type of ``low`` and ``high`` parameters. - output_list: The outputs of Uniform's methods in static or dynamic mode. - batch_size(int): The first dimension of the shape of parameters(low and high). - dims(int): The second dimension of the shape of parameters. - sample_shape(int): The sample value used in ``sample`` method. - tolerance(float): The tolerance of the error. - """ - [low_np, low_float, high_float, high_np, values_np] = data_list - - np_uniform_int = UniformNumpy(int(low_float), int(high_float)) - np_uniform_float = UniformNumpy(low_float, high_float) - np_uniform_float_np_broadcast = UniformNumpy(low_float, high_np) - np_uniform = UniformNumpy(low_np, high_np) - - gt_sample_int = np_uniform_int.sample([batch_size, dims]) - gt_sample_float = np_uniform_float.sample([batch_size, dims]) - gt_sample_float_np_broadcast = np_uniform_float_np_broadcast.sample( - [batch_size, dims]) - gt_sample_np = np_uniform.sample([batch_size, dims]) - gt_sample_int_diff = np_uniform_int.sample([sample_shape]) - gt_sample_float_diff = np_uniform_float.sample([sample_shape]) - gt_sample_float_np_broadcast_diff = np_uniform_float_np_broadcast.sample( - [sample_shape]) - gt_sample_np_diff = np_uniform.sample([sample_shape]) - gt_entropy_int = np_uniform_int.entropy() - gt_entropy_float = np_uniform_float.entropy() - gt_entropy_float_np_broadcast = np_uniform_float_np_broadcast.entropy() - gt_entropy = np_uniform.entropy() - gt_lp_float_np_broadcast = np_uniform_float_np_broadcast.log_prob( - values_np) - gt_lp = np_uniform.log_prob(values_np) - gt_p_float_np_broadcast = np_uniform_float_np_broadcast.probs(values_np) - gt_p = np_uniform.probs(values_np) - - [ - output_sample_int, output_sample_float, - output_sample_float_np_broadcast, output_sample_np, - output_sample_variable, output_sample_int_diff, - output_sample_float_diff, output_sample_float_np_broadcast_diff, - output_sample_np_diff, output_sample_variable_diff, - output_entropy_int, output_entropy_float, - output_entropy_float_np_broadcast, output_entropy_np, - output_entropy_variable, output_lp_float_np_broadcast, output_lp_np, - output_lp_variable, output_p_float_np_broadcast, output_p_np, - output_p_variable - ] = output_list - - np.testing.assert_equal(output_sample_int.shape, gt_sample_int.shape) - np.testing.assert_equal(output_sample_float.shape, - gt_sample_float.shape) - np.testing.assert_equal(output_sample_float_np_broadcast.shape, - gt_sample_float_np_broadcast.shape) - np.testing.assert_equal(output_sample_np.shape, gt_sample_np.shape) - np.testing.assert_equal(output_sample_variable.shape, - gt_sample_np.shape) - np.testing.assert_equal(output_sample_int_diff.shape, - gt_sample_int_diff.shape) - np.testing.assert_equal(output_sample_float_diff.shape, - gt_sample_float_diff.shape) - np.testing.assert_equal(output_sample_float_np_broadcast_diff.shape, - gt_sample_float_np_broadcast_diff.shape) - np.testing.assert_equal(output_sample_np_diff.shape, - gt_sample_np_diff.shape) - np.testing.assert_equal(output_sample_variable_diff.shape, - gt_sample_np_diff.shape) - np.testing.assert_allclose( - output_entropy_int, gt_entropy_int, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_entropy_float, - gt_entropy_float, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_entropy_float_np_broadcast, - gt_entropy_float_np_broadcast, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_entropy_np, gt_entropy, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_entropy_variable, gt_entropy, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_lp_float_np_broadcast, - gt_lp_float_np_broadcast, - rtol=tolerance, - atol=tolerance) - np.testing.assert_allclose( - output_lp_np, gt_lp, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_lp_variable, gt_lp, rtol=tolerance, atol=tolerance) - np.testing.assert_allclose( - output_p_float_np_broadcast, - gt_p_float_np_broadcast, - rtol=tolerance, - atol=tolerance) + feed_vars = { + 'low': self.low_np, + 'high': self.high_np, + 'values': self.values_np + } + + self.executor.run(fluid.default_startup_program()) + fetch_list = self.executor.run(program=self.test_program, + feed=feed_vars, + fetch_list=fetch_list) + + self.compare_with_numpy(fetch_list) + + +class UniformTest2(UniformTest): + def init_numpy_data(self, batch_size, dims): + # low ans high are 'int' + self.low_np = int(np.random.uniform(-2, 1)) + self.high_np = int(np.random.uniform(1, 3)) + self.values_np = np.array([1.0]).astype('float32') + + +class UniformTest3(UniformTest): + def init_numpy_data(self, batch_size, dims): + # test broadcast: low is float, high is numpy.ndarray with dtype 'float32'. + self.low_np = np.random.uniform(-2, 1) + self.high_np = np.random.uniform(-5.0, 5.0, + (batch_size, dims)).astype('float32') + self.values_np = np.random.randn(batch_size, dims).astype('float32') + + def init_static_data(self, batch_size, dims): + self.static_low = self.low_np + self.static_high = self.high_np + with fluid.program_guard(self.test_program): + self.static_values = layers.data( + name='values', shape=[dims], dtype='float32') + + +class UniformTest4(UniformTest): + def init_numpy_data(self, batch_size, dims): + # low and high are numpy.ndarray with dtype 'float32'. + self.low_np = np.random.randn(batch_size, dims).astype('float32') + self.high_np = np.random.uniform(-5.0, 5.0, + (batch_size, dims)).astype('float32') + self.values_np = np.random.randn(batch_size, dims).astype('float32') + + def init_static_data(self, batch_size, dims): + self.static_low = self.low_np + self.static_high = self.high_np + with fluid.program_guard(self.test_program): + self.static_values = layers.data( + name='values', shape=[dims], dtype='float32') + + +class UniformTest5(UniformTest): + def init_numpy_data(self, batch_size, dims): + # low and high are numpy.ndarray with dtype 'float64'. + self.low_np = np.random.randn(batch_size, dims).astype('float64') + self.high_np = np.random.uniform(-5.0, 5.0, + (batch_size, dims)).astype('float64') + self.values_np = np.random.randn(batch_size, dims).astype('float64') + + def init_dynamic_data(self, batch_size, dims): + self.dynamic_low = self.low_np + self.dynamic_high = self.high_np + self.dynamic_values = paddle.to_tensor(self.values_np, dtype='float64') + + def init_static_data(self, batch_size, dims): + self.static_low = self.low_np + self.static_high = self.high_np + with fluid.program_guard(self.test_program): + self.static_values = layers.data( + name='values', shape=[dims], dtype='float64') + + +class UniformTest6(UniformTest): + def init_numpy_data(self, batch_size, dims): + # low and high are Tensor with dtype 'VarType.FP32'. + self.low_np = np.random.randn(batch_size, dims).astype('float32') + self.high_np = np.random.uniform(-5.0, 5.0, + (batch_size, dims)).astype('float32') + self.values_np = np.random.randn(batch_size, dims).astype('float32') + + def init_dynamic_data(self, batch_size, dims): + self.dynamic_low = paddle.to_tensor(self.low_np) + self.dynamic_high = paddle.to_tensor(self.high_np) + self.dynamic_values = paddle.to_tensor(self.values_np) + + def init_static_data(self, batch_size, dims): + with fluid.program_guard(self.test_program): + self.static_low = layers.data( + name='low', shape=[dims], dtype='float32') + self.static_high = layers.data( + name='high', shape=[dims], dtype='float32') + self.static_values = layers.data( + name='values', shape=[dims], dtype='float32') + + +class UniformTest7(UniformTest): + def init_numpy_data(self, batch_size, dims): + # low and high are Tensor with dtype 'VarType.FP64'. + self.low_np = np.random.randn(batch_size, dims).astype('float64') + self.high_np = np.random.uniform(-5.0, 5.0, + (batch_size, dims)).astype('float64') + self.values_np = np.random.randn(batch_size, dims).astype('float64') + + def init_dynamic_data(self, batch_size, dims): + self.dynamic_low = paddle.to_tensor(self.low_np, dtype='float64') + self.dynamic_high = paddle.to_tensor(self.high_np, dtype='float64') + self.dynamic_values = paddle.to_tensor(self.values_np, dtype='float64') + + def init_static_data(self, batch_size, dims): + with fluid.program_guard(self.test_program): + self.static_low = layers.data( + name='low', shape=[dims], dtype='float64') + self.static_high = layers.data( + name='high', shape=[dims], dtype='float64') + self.static_values = layers.data( + name='values', shape=[dims], dtype='float64') + + +class UniformTest8(UniformTest): + def init_numpy_data(self, batch_size, dims): + # low and high are Tensor with dtype 'VarType.FP64'. value's dtype is 'VarType.FP32'. + self.low_np = np.random.randn(batch_size, dims).astype('float64') + self.high_np = np.random.uniform(-5.0, 5.0, + (batch_size, dims)).astype('float64') + self.values_np = np.random.randn(batch_size, dims).astype('float32') + + def init_dynamic_data(self, batch_size, dims): + self.dynamic_low = paddle.to_tensor(self.low_np, dtype='float64') + self.dynamic_high = paddle.to_tensor(self.high_np, dtype='float64') + self.dynamic_values = paddle.to_tensor(self.values_np, dtype='float32') + + def init_static_data(self, batch_size, dims): + with fluid.program_guard(self.test_program): + self.static_low = layers.data( + name='low', shape=[dims], dtype='float64') + self.static_high = layers.data( + name='high', shape=[dims], dtype='float64') + self.static_values = layers.data( + name='values', shape=[dims], dtype='float32') + + +class NormalTest(unittest.TestCase): + def setUp(self, use_gpu=False, batch_size=2, dims=3): + self.use_gpu = use_gpu + if not use_gpu: + self.place = fluid.CPUPlace() + self.gpu_id = -1 + else: + self.place = fluid.CUDAPlace(0) + self.gpu_id = 0 + + self.init_numpy_data(batch_size, dims) + + paddle.disable_static(self.place) + self.init_dynamic_data(batch_size, dims) + + paddle.enable_static() + self.test_program = fluid.Program() + self.executor = fluid.Executor(self.place) + self.init_static_data(batch_size, dims) + + def init_numpy_data(self, batch_size, dims): + # loc ans scale are 'float' + self.loc_np = (np.random.ranf() - 0.5) * 4 + self.scale_np = (np.random.ranf() - 0.5) * 4 + while self.scale_np < 0: + self.scale_np = (np.random.ranf() - 0.5) * 4 + # used to construct another Normal object to calculate kl_divergence + self.other_loc_np = (np.random.ranf() - 0.5) * 4 + self.other_scale_np = (np.random.ranf() - 0.5) * 4 + while self.other_scale_np < 0: + self.other_scale_np = (np.random.ranf() - 0.5) * 4 + self.values_np = np.random.ranf(1).astype('float32') + + def init_dynamic_data(self, batch_size, dims): + self.dynamic_loc = self.loc_np + self.dynamic_scale = self.scale_np + self.dynamic_other_loc = self.other_loc_np + self.dynamic_other_scale = self.other_scale_np + self.dynamic_values = paddle.to_tensor(self.values_np) + + def init_static_data(self, batch_size, dims): + self.static_loc = self.loc_np + self.static_scale = self.scale_np + self.static_other_loc = self.other_loc_np + self.static_other_scale = self.other_scale_np + with fluid.program_guard(self.test_program): + self.static_values = layers.data( + name='values', shape=[], dtype='float32') + + def compare_with_numpy(self, fetch_list, sample_shape=7, tolerance=1e-6): + sample, entropy, log_prob, probs, kl = fetch_list + + np_normal = NormalNumpy(self.loc_np, self.scale_np) + np_sample = np_normal.sample([sample_shape]) + np_entropy = np_normal.entropy() + np_lp = np_normal.log_prob(self.values_np) + np_p = np_normal.probs(self.values_np) + np_other_normal = NormalNumpy(self.other_loc_np, self.other_scale_np) + np_kl = np_normal.kl_divergence(np_other_normal) + + np.testing.assert_equal(sample.shape, np_sample.shape) np.testing.assert_allclose( - output_p_np, gt_p, rtol=tolerance, atol=tolerance) + entropy, np_entropy, rtol=tolerance, atol=tolerance) np.testing.assert_allclose( - output_p_variable, gt_p, rtol=tolerance, atol=tolerance) - - def test_uniform_distribution_static(self, - batch_size=2, - dims=3, - sample_shape=7, - tolerance=1e-6): - """ - Test Uniform's methods in static mode. - - Args: - refer to ``compare_uniform_with_numpy`` function. - """ - test_program = fluid.Program() - - low_np = np.random.randn(batch_size, dims).astype('float32') - low_float = np.random.uniform(-2, 1) - high_float = np.random.uniform(1, 3) - high_np = np.random.uniform(-5.0, 5.0, - (batch_size, dims)).astype('float32') - values_np = np.random.randn(batch_size, dims).astype('float32') - - data_list = [low_np, low_float, high_float, high_np, values_np] - - feed_vars, fetch_list = self.build_uniform_static( - test_program, batch_size, dims, sample_shape, low_float, high_float, - high_np, low_np, values_np) + log_prob, np_lp, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose(probs, np_p, rtol=tolerance, atol=tolerance) + np.testing.assert_allclose(kl, np_kl, rtol=tolerance, atol=tolerance) - self.executor.run(fluid.default_startup_program()) + def test_normal_distribution_dygraph(self, sample_shape=7, tolerance=1e-6): + paddle.disable_static(self.place) + normal = Normal(self.dynamic_loc, self.dynamic_scale) + + sample = normal.sample([sample_shape]).numpy() + entropy = normal.entropy().numpy() + log_prob = normal.log_prob(self.dynamic_values).numpy() + probs = normal.probs(self.dynamic_values).numpy() + other_normal = Normal(self.dynamic_other_loc, self.dynamic_other_scale) + kl = normal.kl_divergence(other_normal).numpy() - # result calculated by paddle - output_list = self.executor.run(program=test_program, - feed=feed_vars, - fetch_list=fetch_list) - self.compare_uniform_with_numpy(data_list, output_list, batch_size, - dims, sample_shape, tolerance) - - def test_uniform_distribution_dygraph(self, - batch_size=2, - dims=3, - sample_shape=7, - tolerance=1e-6): - """ - Test Uniform's methods in dynamic mode. - - Args: - refer to ``compare_uniform_with_numpy`` function. - """ - paddle.disable_static() - - low_np = np.random.randn(batch_size, dims).astype('float32') - low_float = np.random.uniform(-2, 1) - high_float = np.random.uniform(1, 3) - high_np = np.random.uniform(-5.0, 5.0, - (batch_size, dims)).astype('float32') - values_np = np.random.randn(batch_size, dims).astype('float32') - - data_list = [low_np, low_float, high_float, high_np, values_np] - output_list = self.build_uniform_dygraph(batch_size, dims, sample_shape, - low_float, high_float, high_np, - low_np, values_np) - - self.compare_uniform_with_numpy(data_list, output_list, batch_size, - dims, sample_shape, tolerance) + fetch_list = [sample, entropy, log_prob, probs, kl] + self.compare_with_numpy(fetch_list) + + def test_normal_distribution_static(self, sample_shape=7, tolerance=1e-6): paddle.enable_static() + with fluid.program_guard(self.test_program): + normal = Normal(self.static_loc, self.static_scale) + + sample = normal.sample([sample_shape]) + entropy = normal.entropy() + log_prob = normal.log_prob(self.static_values) + probs = normal.probs(self.static_values) + other_normal = Normal(self.static_other_loc, + self.static_other_scale) + kl = normal.kl_divergence(other_normal) + + fetch_list = [sample, entropy, log_prob, probs, kl] + + feed_vars = { + 'loc': self.loc_np, + 'scale': self.scale_np, + 'values': self.values_np, + 'other_loc': self.other_loc_np, + 'other_scale': self.other_scale_np + } + + self.executor.run(fluid.default_startup_program()) + fetch_list = self.executor.run(program=self.test_program, + feed=feed_vars, + fetch_list=fetch_list) + + self.compare_with_numpy(fetch_list) + + +class NormalTest2(NormalTest): + def init_numpy_data(self, batch_size, dims): + # loc ans scale are 'int' + self.loc_np = int((np.random.ranf() - 0.5) * 8) + self.scale_np = int((np.random.ranf() - 0.5) * 8) + while self.scale_np < 0: + self.scale_np = int((np.random.ranf() - 0.5) * 8) + # used to construct another Normal object to calculate kl_divergence + self.other_loc_np = int((np.random.ranf() - 0.5) * 8) + self.other_scale_np = int((np.random.ranf() - 0.5) * 8) + while self.other_scale_np < 0: + self.other_scale_np = int((np.random.ranf() - 0.5) * 8) + self.values_np = np.random.ranf(1).astype('float32') + + +class NormalTest3(NormalTest): + def init_numpy_data(self, batch_size, dims): + # test broadcast: loc is float, scale is numpy.ndarray with dtype 'float32'. + self.loc_np = (np.random.ranf() - 0.5) * 4 + self.scale_np = np.random.randn(batch_size, dims).astype('float32') + while not np.all(self.scale_np > 0): + self.scale_np = np.random.randn(batch_size, dims).astype('float32') + self.values_np = np.random.randn(batch_size, dims).astype('float32') + # used to construct another Normal object to calculate kl_divergence + self.other_loc_np = (np.random.ranf() - 0.5) * 4 + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float32') + while not np.all(self.scale_np > 0): + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float32') + + def init_static_data(self, batch_size, dims): + self.static_loc = self.loc_np + self.static_scale = self.scale_np + self.static_other_loc = self.other_loc_np + self.static_other_scale = self.other_scale_np + with fluid.program_guard(self.test_program): + self.static_values = layers.data( + name='values', shape=[dims], dtype='float32') + + +class NormalTest4(NormalTest): + def init_numpy_data(self, batch_size, dims): + # loc and scale are numpy.ndarray with dtype 'float32'. + self.loc_np = np.random.randn(batch_size, dims).astype('float32') + self.scale_np = np.random.randn(batch_size, dims).astype('float32') + while not np.all(self.scale_np > 0): + self.scale_np = np.random.randn(batch_size, dims).astype('float32') + self.values_np = np.random.randn(batch_size, dims).astype('float32') + # used to construct another Normal object to calculate kl_divergence + self.other_loc_np = np.random.randn(batch_size, dims).astype('float32') + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float32') + while not np.all(self.scale_np > 0): + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float32') + + def init_static_data(self, batch_size, dims): + self.static_loc = self.loc_np + self.static_scale = self.scale_np + self.static_other_loc = self.other_loc_np + self.static_other_scale = self.other_scale_np + with fluid.program_guard(self.test_program): + self.static_values = layers.data( + name='values', shape=[dims], dtype='float32') + + +class NormalTest5(NormalTest): + def init_numpy_data(self, batch_size, dims): + # loc and scale are numpy.ndarray with dtype 'float64'. + self.loc_np = np.random.randn(batch_size, dims).astype('float64') + self.scale_np = np.random.randn(batch_size, dims).astype('float64') + while not np.all(self.scale_np > 0): + self.scale_np = np.random.randn(batch_size, dims).astype('float64') + self.values_np = np.random.randn(batch_size, dims).astype('float64') + # used to construct another Normal object to calculate kl_divergence + self.other_loc_np = np.random.randn(batch_size, dims).astype('float64') + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float64') + while not np.all(self.scale_np > 0): + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float64') + + def init_dynamic_data(self, batch_size, dims): + self.dynamic_loc = self.loc_np + self.dynamic_scale = self.scale_np + self.dynamic_other_loc = self.other_loc_np + self.dynamic_other_scale = self.other_scale_np + self.dynamic_values = paddle.to_tensor(self.values_np, dtype='float64') + + def init_static_data(self, batch_size, dims): + self.static_loc = self.loc_np + self.static_scale = self.scale_np + self.static_other_loc = self.other_loc_np + self.static_other_scale = self.other_scale_np + with fluid.program_guard(self.test_program): + self.static_values = layers.data( + name='values', shape=[dims], dtype='float64') + + +class NormalTest6(NormalTest): + def init_data(self, batch_size=2, dims=3): + # loc and scale are Tensor with dtype 'VarType.FP32'. + self.loc_np = np.random.randn(batch_size, dims).astype('float32') + self.scale_np = np.random.randn(batch_size, dims).astype('float32') + while not np.all(self.scale_np > 0): + self.scale_np = np.random.randn(batch_size, dims).astype('float32') + self.values_np = np.random.randn(batch_size, dims).astype('float32') + self.loc = paddle.to_tensor(self.loc_np) + self.scale = paddle.to_tensor(self.scale_np) + self.values = paddle.to_tensor(self.values_np) + # used to construct another Normal object to calculate kl_divergence + self.other_loc_np = np.random.randn(batch_size, dims).astype('float32') + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float32') + while not np.all(self.scale_np > 0): + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float32') + self.other_loc = paddle.to_tensor(self.other_loc_np) + self.other_scale = paddle.to_tensor(self.other_scale_np) + + def init_numpy_data(self, batch_size, dims): + # loc and scale are Tensor with dtype 'VarType.FP32'. + self.loc_np = np.random.randn(batch_size, dims).astype('float32') + self.scale_np = np.random.randn(batch_size, dims).astype('float32') + while not np.all(self.scale_np > 0): + self.scale_np = np.random.randn(batch_size, dims).astype('float32') + self.values_np = np.random.randn(batch_size, dims).astype('float32') + # used to construct another Normal object to calculate kl_divergence + self.other_loc_np = np.random.randn(batch_size, dims).astype('float32') + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float32') + while not np.all(self.scale_np > 0): + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float32') + + def init_dynamic_data(self, batch_size, dims): + self.dynamic_loc = paddle.to_tensor(self.loc_np) + self.dynamic_scale = paddle.to_tensor(self.scale_np) + self.dynamic_values = paddle.to_tensor(self.values_np) + self.dynamic_other_loc = paddle.to_tensor(self.other_loc_np) + self.dynamic_other_scale = paddle.to_tensor(self.other_scale_np) + + def init_static_data(self, batch_size, dims): + with fluid.program_guard(self.test_program): + self.static_loc = layers.data( + name='loc', shape=[dims], dtype='float32') + self.static_scale = layers.data( + name='scale', shape=[dims], dtype='float32') + self.static_values = layers.data( + name='values', shape=[dims], dtype='float32') + self.static_other_loc = layers.data( + name='other_loc', shape=[dims], dtype='float32') + self.static_other_scale = layers.data( + name='other_scale', shape=[dims], dtype='float32') + + +class NormalTest7(NormalTest): + def init_numpy_data(self, batch_size, dims): + # loc and scale are Tensor with dtype 'VarType.FP64'. + self.loc_np = np.random.randn(batch_size, dims).astype('float64') + self.scale_np = np.random.randn(batch_size, dims).astype('float64') + while not np.all(self.scale_np > 0): + self.scale_np = np.random.randn(batch_size, dims).astype('float64') + self.values_np = np.random.randn(batch_size, dims).astype('float64') + # used to construct another Normal object to calculate kl_divergence + self.other_loc_np = np.random.randn(batch_size, dims).astype('float64') + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float64') + while not np.all(self.scale_np > 0): + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float64') + + def init_dynamic_data(self, batch_size, dims): + self.dynamic_loc = paddle.to_tensor(self.loc_np, dtype='float64') + self.dynamic_scale = paddle.to_tensor(self.scale_np, dtype='float64') + self.dynamic_values = paddle.to_tensor(self.values_np, dtype='float64') + self.dynamic_other_loc = paddle.to_tensor( + self.other_loc_np, dtype='float64') + self.dynamic_other_scale = paddle.to_tensor( + self.other_scale_np, dtype='float64') + + def init_static_data(self, batch_size, dims): + with fluid.program_guard(self.test_program): + self.static_loc = layers.data( + name='loc', shape=[dims], dtype='float64') + self.static_scale = layers.data( + name='scale', shape=[dims], dtype='float64') + self.static_values = layers.data( + name='values', shape=[dims], dtype='float64') + self.static_other_loc = layers.data( + name='other_loc', shape=[dims], dtype='float64') + self.static_other_scale = layers.data( + name='other_scale', shape=[dims], dtype='float64') + + +class NormalTest8(NormalTest): + def init_numpy_data(self, batch_size, dims): + # loc and scale are Tensor with dtype 'VarType.FP64'. value's dtype is 'VarType.FP32'. + self.loc_np = np.random.randn(batch_size, dims).astype('float64') + self.scale_np = np.random.randn(batch_size, dims).astype('float64') + while not np.all(self.scale_np > 0): + self.scale_np = np.random.randn(batch_size, dims).astype('float64') + self.values_np = np.random.randn(batch_size, dims).astype('float32') + # used to construct another Normal object to calculate kl_divergence + self.other_loc_np = np.random.randn(batch_size, dims).astype('float64') + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float64') + while not np.all(self.scale_np > 0): + self.other_scale_np = np.random.randn(batch_size, + dims).astype('float64') + + def init_dynamic_data(self, batch_size, dims): + self.dynamic_loc = paddle.to_tensor(self.loc_np, dtype='float64') + self.dynamic_scale = paddle.to_tensor(self.scale_np, dtype='float64') + self.dynamic_values = paddle.to_tensor(self.values_np) + self.dynamic_other_loc = paddle.to_tensor( + self.other_loc_np, dtype='float64') + self.dynamic_other_scale = paddle.to_tensor( + self.other_scale_np, dtype='float64') + + def init_static_data(self, batch_size, dims): + with fluid.program_guard(self.test_program): + self.static_loc = layers.data( + name='loc', shape=[dims], dtype='float64') + self.static_scale = layers.data( + name='scale', shape=[dims], dtype='float64') + self.static_values = layers.data( + name='values', shape=[dims], dtype='float32') + self.static_other_loc = layers.data( + name='other_loc', shape=[dims], dtype='float64') + self.static_other_scale = layers.data( + name='other_scale', shape=[dims], dtype='float64') class DistributionTestError(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py index ceec1190279212fbe6f3f128bdd1397cdb9ea1a2..7b9e25e1d4ae8dbb8e4a03d93a7d9c0f9dd18ea6 100644 --- a/python/paddle/fluid/tests/unittests/test_dropout_op.py +++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py @@ -40,6 +40,23 @@ class TestDropoutOp(OpTest): self.check_grad(['X'], 'Out') +class TestDropoutOpInput1d(OpTest): + def setUp(self): + self.op_type = "dropout" + self.inputs = {'X': np.random.random((2000, )).astype("float32")} + self.attrs = {'dropout_prob': 0.0, 'fix_seed': True, 'is_test': False} + self.outputs = { + 'Out': self.inputs['X'], + 'Mask': np.ones((2000)).astype('uint8') + } + + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['X'], 'Out') + + class TestDropoutOp2(TestDropoutOp): def setUp(self): self.op_type = "dropout" @@ -436,6 +453,13 @@ class TestDropoutFAPIError(unittest.TestCase): self.assertRaises(ValueError, test_axis_max) + def test_axis_min(): + # minimum of axis should greater equal than 0 + x2 = fluid.data(name='x2', shape=[3, 4, 5, 6], dtype="float32") + paddle.nn.functional.dropout(x2, axis=[0, -1]) + + self.assertRaises(ValueError, test_axis_min) + def test_axis_len(): # length of axis should not greater than dimensions of x x2 = fluid.data(name='x2', shape=[3, 4, 5, 6], dtype="float32") @@ -648,9 +672,11 @@ class TestAlphaDropoutFAPI(unittest.TestCase): res1 = paddle.nn.functional.alpha_dropout(x=input, p=0.) res2 = paddle.nn.functional.alpha_dropout( x=input, p=0., training=False) + res3 = paddle.nn.functional.alpha_dropout(x=input, p=1.) in_np = np.random.random([40, 40]).astype("float32") res_np = in_np + res_np3 = np.zeros_like(in_np) exe = fluid.Executor(place) res_list = [res1, res2] @@ -659,6 +685,10 @@ class TestAlphaDropoutFAPI(unittest.TestCase): feed={"input": in_np}, fetch_list=[res]) self.assertTrue(np.allclose(fetches[0], res_np)) + fetches = exe.run(fluid.default_main_program(), + feed={"input": in_np}, + fetch_list=[res3]) + self.assertTrue(np.allclose(fetches[0], res_np3)) def test_static(self): for place in self.places: @@ -669,15 +699,18 @@ class TestAlphaDropoutFAPI(unittest.TestCase): with fluid.dygraph.guard(place): in_np = np.random.random([40, 40]).astype("float32") res_np = in_np + res_np3 = np.zeros_like(in_np) input = fluid.dygraph.to_variable(in_np) res1 = paddle.nn.functional.alpha_dropout(x=input, p=0.) res2 = paddle.nn.functional.alpha_dropout( x=input, p=0., training=False) + res3 = paddle.nn.functional.alpha_dropout(x=input, p=1.) res_list = [res1, res2] for res in res_list: self.assertTrue(np.allclose(res.numpy(), res_np)) + self.assertTrue(np.allclose(res3.numpy(), res_np3)) class TestAlphaDropoutFAPIError(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py index f5d8b4f704da8acd97475444346522f63d3724fd..cab6160d761004877896deea8d44ca02c9de2e1e 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_mod_op.py @@ -220,6 +220,14 @@ class TestRemainderAPI(unittest.TestCase): z_expected = np.array([0, 1, 1, -1]) self.assertEqual(np.allclose(z_expected, z.numpy()), True) + np_x = np.array([-3, 3]) + np_y = np.array([[2, 3], [-2, -1]]) + x = paddle.to_tensor(np_x, dtype="int64") + y = paddle.to_tensor(np_y, dtype="int64") + z = x % y + z_expected = np.array([[1, 0], [-1, 0]]) + self.assertEqual(np.allclose(z_expected, z.numpy()), True) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py index 3475320eeebc55a14dd569410610b70ae35e65a3..43069470680c7d49071ce54bf3649962c56f06ea 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py @@ -350,6 +350,14 @@ class TestFillConstantOpError(unittest.TestCase): dtype='int16', out=x1) + self.assertRaises( + TypeError, + fluid.layers.fill_constant, + shape=[1.1], + value=5, + dtype='float32', + out=x1) + # The argument dtype of fill_constant_op must be one of bool, float16, #float32, float64, int32 or int64 x2 = fluid.layers.data(name='x2', shape=[1], dtype="int32") diff --git a/python/paddle/fluid/tests/unittests/test_fleet_base.py b/python/paddle/fluid/tests/unittests/test_fleet_base.py index 9e651dea24ba7f35f3785093da8ac73dde07be5a..4ced9841ee43e02a3d1e3f292bf97200dec29f5c 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_base.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_base.py @@ -18,6 +18,7 @@ import paddle.distributed.fleet as fleet import paddle.distributed.fleet.base.role_maker as role_maker import os import paddle.fluid as fluid +import numpy as np class TestFleetBase(unittest.TestCase): @@ -125,5 +126,110 @@ class TestFleetBase(unittest.TestCase): self.assertRaises(Exception, fleet.init_worker) +class TestFleetDygraph(unittest.TestCase): + def setUp(self): + os.environ[ + "PADDLE_TRAINER_ENDPOINTS"] = "127.0.0.1:36213,127.0.0.1:36214" + os.environ["PADDLE_CURRENT_ENDPOINTS"] = "127.0.0.1:36213" + os.environ["PADDLE_TRAINERS_NUM"] = "2" + os.environ["PADDLE_TRAINER_ID"] = "0" + + def test_dygraph_method(self): + paddle.disable_static() + value = np.arange(26).reshape(2, 13).astype("float32") + a = fluid.dygraph.to_variable(value) + layer = paddle.nn.Linear(13, 5) + adam = paddle.optimizer.Adam( + learning_rate=0.01, parameters=layer.parameters()) + # remove init cause this UT cannot launch distributed task + adam = fleet.distributed_optimizer(adam) + dp_layer = fleet.distributed_model(layer) + lr = 0.001 + adam.set_lr(lr) + cur_lr = adam.get_lr() + assert (lr == cur_lr) + state_dict = adam.state_dict() + adam.set_state_dict(state_dict) + + +class TestFleetBaseSingleRunCollective(unittest.TestCase): + def setUp(self): + os.environ.pop("PADDLE_TRAINER_ENDPOINTS") + + def gen_data(self): + return { + "x": np.random.random(size=(128, 32)).astype('float32'), + "y": np.random.randint( + 2, size=(128, 1)).astype('int64') + } + + def test_single_run_collective_minimize(self): + input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32') + input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64') + + fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh') + prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax') + cost = fluid.layers.cross_entropy(input=prediction, label=input_y) + avg_cost = paddle.mean(x=cost) + + fleet.init(is_collective=True) + optimizer = fluid.optimizer.SGD(learning_rate=0.001) + optimizer = fleet.distributed_optimizer(optimizer) + optimizer.minimize(avg_cost) + + place = fluid.CUDAPlace(0) if paddle.fluid.is_compiled_with_cuda( + ) else fluid.CPUPlace() + + exe = fluid.Executor(place) + exe.run(paddle.static.default_startup_program()) + + for i in range(10): + cost_val = exe.run(feed=self.gen_data(), fetch_list=[avg_cost.name]) + print("cost of step[{}] = {}".format(i, cost_val)) + + +class TestFleetBaseSingleRunPS(unittest.TestCase): + def setUp(self): + os.environ.pop("PADDLE_PSERVERS_IP_PORT_LIST") + + def gen_data(self): + return { + "x": np.random.random(size=(128, 32)).astype('float32'), + "y": np.random.randint( + 2, size=(128, 1)).astype('int64') + } + + def test_single_run_ps_minimize(self): + input_x = paddle.static.data(name="x", shape=[-1, 32], dtype='float32') + input_y = paddle.static.data(name="y", shape=[-1, 1], dtype='int64') + + fc_1 = fluid.layers.fc(input=input_x, size=64, act='tanh') + prediction = fluid.layers.fc(input=fc_1, size=2, act='softmax') + cost = fluid.layers.cross_entropy(input=prediction, label=input_y) + avg_cost = paddle.mean(x=cost) + + fleet.init() + strategy = paddle.distributed.fleet.DistributedStrategy() + optimizer = fluid.optimizer.SGD(learning_rate=0.01) + optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) + optimizer.minimize(avg_cost) + if fleet.is_server(): + fleet.init_server() + fleet.run_server() + elif fleet.is_worker(): + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(paddle.static.default_startup_program()) + step = 100 + for i in range(step): + cost_val = exe.run(program=fluid.default_main_program(), + feed=self.gen_data(), + fetch_list=[avg_cost.name]) + print("worker_index: %d, step%d cost = %f" % + (fleet.worker_index(), i, cost_val[0])) + fleet.save_persistables(exe, "fleet_single_model/") + print("save fleet models done.") + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py index 9eec73116cc283b58d3ee39cefb9256e12d4ef15..927c155ff1116a821a13730a9d2a779a7c68b254 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_graph_execution_meta_optimizer.py @@ -190,7 +190,7 @@ class TestFleetGraphExecutionMetaOptimizer(unittest.TestCase): avg_cost = paddle.fluid.layers.mean(x=cost) strategy = paddle.distributed.fleet.DistributedStrategy() - optimizer = paddle.optimizer.SGD(learning_rate=0.01) + optimizer = paddle.fluid.optimizer.SGD(learning_rate=0.01) optimizer = fleet.distributed_optimizer( optimizer, strategy=strategy) optimizer.minimize(avg_cost) diff --git a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py index fc668ce3493e96e0790af522a439367fe10455f3..dddc6811ef08bdf8504cb6b4fe09813336875b10 100644 --- a/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py +++ b/python/paddle/fluid/tests/unittests/test_gaussian_random_op.py @@ -239,24 +239,24 @@ class TestGaussianRandomAPI(unittest.TestCase): def test_default_dtype(self): paddle.disable_static() - def test_default_fp_16(): + def test_default_fp16(): paddle.framework.set_default_dtype('float16') - paddle.tensor.random.gaussian_random([2, 3]) + paddle.tensor.random.gaussian([2, 3]) - self.assertRaises(TypeError, test_default_fp_16) + self.assertRaises(TypeError, test_default_fp16) - def test_default_fp_32(): + def test_default_fp32(): paddle.framework.set_default_dtype('float32') - out = paddle.tensor.random.gaussian_random([2, 3]) + out = paddle.tensor.random.gaussian([2, 3]) self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP32) - def test_default_fp_64(): + def test_default_fp64(): paddle.framework.set_default_dtype('float64') - out = paddle.tensor.random.gaussian_random([2, 3]) + out = paddle.tensor.random.gaussian([2, 3]) self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64) - test_default_fp_64() - test_default_fp_32() + test_default_fp64() + test_default_fp32() paddle.enable_static() @@ -265,24 +265,24 @@ class TestStandardNormalDtype(unittest.TestCase): def test_default_dtype(self): paddle.disable_static() - def test_default_fp_16(): + def test_default_fp16(): paddle.framework.set_default_dtype('float16') paddle.tensor.random.standard_normal([2, 3]) - self.assertRaises(TypeError, test_default_fp_16) + self.assertRaises(TypeError, test_default_fp16) - def test_default_fp_32(): + def test_default_fp32(): paddle.framework.set_default_dtype('float32') out = paddle.tensor.random.standard_normal([2, 3]) self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP32) - def test_default_fp_64(): + def test_default_fp64(): paddle.framework.set_default_dtype('float64') out = paddle.tensor.random.standard_normal([2, 3]) self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64) - test_default_fp_64() - test_default_fp_32() + test_default_fp64() + test_default_fp32() paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py index 654e8d6f129e1ffe0dce59113ca88a16d348f210..a46b9b0ca78bf37e1c421a08a6fa8c5353c6d45d 100644 --- a/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_group_norm_op_v2.py @@ -35,24 +35,33 @@ class TestDygraphGroupNormv2(unittest.TestCase): def compute_v1(x): with fluid.dygraph.guard(p): - gn = fluid.dygraph.GroupNorm(channels=2, groups=2) + gn = fluid.dygraph.GroupNorm(channels=6, groups=2) y = gn(fluid.dygraph.to_variable(x)) return y.numpy() def compute_v2(x): with fluid.dygraph.guard(p): - gn = paddle.nn.GroupNorm(num_channels=2, num_groups=2) + gn = paddle.nn.GroupNorm(num_channels=6, num_groups=2) y = gn(fluid.dygraph.to_variable(x)) return y.numpy() + def test_weight_bias_false(): + with fluid.dygraph.guard(p): + gn = paddle.nn.GroupNorm( + num_channels=6, + num_groups=2, + weight_attr=False, + bias_attr=False) + x = np.random.randn(*shape).astype("float32") y1 = compute_v1(x) y2 = compute_v2(x) self.assertTrue(np.allclose(y1, y2)) + test_weight_bias_false() def test_static(self): places = [fluid.CPUPlace()] - if core.is_compiled_with_cuda() and core.op_support_gpu("layer_norm"): + if core.is_compiled_with_cuda() and core.op_support_gpu("group_norm"): places.append(fluid.CUDAPlace(0)) for p in places: exe = fluid.Executor(p) @@ -60,7 +69,7 @@ class TestDygraphGroupNormv2(unittest.TestCase): def compute_v1(x_np): with program_guard(Program(), Program()): - gn = fluid.dygraph.GroupNorm(channels=2, groups=2) + gn = fluid.dygraph.GroupNorm(channels=6, groups=2) x = fluid.data(name='x', shape=x_np.shape, dtype=x_np.dtype) y = gn(x) exe.run(fluid.default_startup_program()) @@ -69,7 +78,7 @@ class TestDygraphGroupNormv2(unittest.TestCase): def compute_v2(x_np): with program_guard(Program(), Program()): - gn = paddle.nn.GroupNorm(num_channels=2, num_groups=2) + gn = paddle.nn.GroupNorm(num_channels=6, num_groups=2) x = fluid.data(name='x', shape=x_np.shape, dtype=x_np.dtype) y = gn(x) exe.run(fluid.default_startup_program()) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index 74cfeab601b04d9624a5f6e48fd06c6cbf3715f8..22f16287c33f96a43361b5fe4ed5d0fe3edbb1bc 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -652,7 +652,7 @@ class TestDygraphUtils(unittest.TestCase): a_np = np.random.uniform(-2, 2, (10, 20, 30)).astype(np.float32) helper = LayerHelper(fluid.unique_name.generate("test"), act="relu") func = helper.append_activation - with fluid.dygraph.guard(): + with fluid.dygraph.guard(fluid.core.CPUPlace()): a = fluid.dygraph.to_variable(a_np) fluid.set_flags({'FLAGS_use_mkldnn': True}) try: diff --git a/python/paddle/fluid/tests/unittests/test_imperative_decorator.py b/python/paddle/fluid/tests/unittests/test_imperative_decorator.py index 820206a3ce630eb92a36a154ca7cdec62de2ce34..13ca1840d0d24c73577a547f186d4f03b13bca28 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_decorator.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_decorator.py @@ -28,7 +28,7 @@ class TestTracerMode(unittest.TestCase): def get_tracer_mode(self): assert fluid.in_dygraph_mode(), "Dygraph mode must be enabled" - @paddle.no_grad() + @fluid.dygraph.no_grad def no_grad_func(self, a): self.assertEqual(self.tracer._train_mode, False) return a @@ -56,35 +56,17 @@ class TestTracerMode(unittest.TestCase): def need_no_grad_func(a, b=1): return a + b - decorated_func = paddle.no_grad()(need_no_grad_func) + decorated_func = fluid.dygraph.no_grad(need_no_grad_func) self.assertTrue( str(inspect.getargspec(decorated_func)) == str(inspect.getargspec(need_no_grad_func))) self.assertEqual(self.tracer._train_mode, self.init_mode) - def test_gen(): - for i in range(3): - yield i - - a = 0 - for i in test_gen(): - a += i - - @paddle.no_grad() - def test_wrapped_gen(): - for i in range(3): - yield i - - b = 0 - for i in test_wrapped_gen(): - b += i - - self.assertEqual(a, b) - with fluid.dygraph.guard(): self.check_not_support_rlt(False) + paddle.enable_static() with new_program_scope(): self.check_not_support_rlt(True) @@ -94,5 +76,48 @@ class TestTracerMode2(TestTracerMode): self.init_mode = False +class TestNoGradClass(unittest.TestCase): + @paddle.no_grad() + def no_grad_func(self, a): + self.assertEqual(self.tracer._train_mode, False) + return a + + def test_main(self): + paddle.disable_static() + + self.tracer = framework._dygraph_tracer() + self.tracer._train_mode = True + + self.assertEqual(self.no_grad_func(1), 1) + self.assertEqual(self.no_grad_func.__name__, "no_grad_func") + + def need_no_grad_func(a, b=1): + return a + b + + decorated_func = paddle.no_grad()(need_no_grad_func) + self.assertEqual( + str(inspect.getargspec(decorated_func)), + str(inspect.getargspec(need_no_grad_func))) + + def test_gen(): + for i in range(3): + yield i + + a = 0 + for i in test_gen(): + a += i + + @paddle.no_grad() + def test_wrapped_gen(): + for i in range(3): + yield i + + b = 0 + for i in test_wrapped_gen(): + b += i + + self.assertEqual(a, b) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py index 619e9e8e90783365b5f0d718783a14468520c8d4..887e50f07c55cc991d7816609253039ce0d48d7d 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_optimizer_v2.py @@ -401,9 +401,7 @@ class TestOptimizerLearningRate(unittest.TestCase): a = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") linear = fluid.dygraph.nn.Linear(10, 10) - a = fluid.dygraph.to_variable(a) - b = linear(a) loss = fluid.layers.reduce_mean(b) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py index 48aea3a584dd25667704b22d99d1074c481bb76c..22e19efcb58d19c41835565de2c8c01fe253702a 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load.py @@ -374,8 +374,7 @@ class TestDygraphPtbRnn(unittest.TestCase): adam._learning_rate.step_num = 0 para_state_dict, opti_state_dict = paddle.load("./test_dy") - print(opti_state_dict['LR_Scheduler']) - adam.set_dict(opti_state_dict) + adam.set_state_dict(opti_state_dict) opti_dict = adam.state_dict() for k, v in opti_dict.items(): @@ -393,7 +392,7 @@ class TestDygraphPtbRnn(unittest.TestCase): var.set(np.zeros_like(np_t), place) - ptb_model.set_dict(para_state_dict) + ptb_model.set_state_dict(stat_dict=para_state_dict) state_dict = ptb_model.state_dict() @@ -483,7 +482,7 @@ class TestDygraphPtbRnn(unittest.TestCase): if isinstance(adam._learning_rate, LearningRateDecay): adam._learning_rate.step_num = 0 - adam.set_dict(self.opti_dict) + adam.set_state_dict(self.opti_dict) opti_dict = adam.state_dict() for k, v in opti_dict.items(): if isinstance(v, core.VarBase): @@ -500,7 +499,7 @@ class TestDygraphPtbRnn(unittest.TestCase): var.set(np.zeros_like(np_t), place) - ptb_model.set_dict(self.state_dict) + ptb_model.set_state_dict(self.state_dict) state_dict = ptb_model.state_dict() @@ -593,7 +592,7 @@ class TestDygraphPtbRnn(unittest.TestCase): if isinstance(adam._learning_rate, LearningRateDecay): adam._learning_rate.step_num = 0 - adam.set_dict(np_opti_dict) + adam.set_state_dict(np_opti_dict) opti_dict = adam.state_dict() for k, v in opti_dict.items(): @@ -613,7 +612,7 @@ class TestDygraphPtbRnn(unittest.TestCase): var.set(np.zeros_like(np_t), place) - ptb_model.set_dict(np_state_dict) + ptb_model.set_state_dict(np_state_dict) state_dict = ptb_model.state_dict() @@ -656,8 +655,8 @@ class TestDygraphPtbRnn(unittest.TestCase): last_hidden = None last_cell = None - adam.set_dict(self.opti_dict) - ptb_model.set_dict(self.state_dict) + adam.set_state_dict(self.opti_dict) + ptb_model.set_state_dict(self.state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') @@ -745,8 +744,8 @@ class TestDygraphPtbRnn(unittest.TestCase): last_cell = None state_dict, opti_dict = fluid.load_dygraph("./test_dy") - adam.set_dict(opti_dict) - ptb_model.set_dict(state_dict) + adam.set_state_dict(opti_dict) + ptb_model.set_state_dict(state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') @@ -849,8 +848,8 @@ class TestDygraphPtbRnn(unittest.TestCase): for k, v in self.state_dict.items(): np_state_dict[k] = v.numpy() - adam.set_dict(np_opti_dict) - ptb_model.set_dict(np_state_dict) + adam.set_state_dict(np_opti_dict) + ptb_model.set_state_dict(np_state_dict) for i in range(1): x_data = np.arange(12).reshape(4, 3).astype('int64') y_data = np.arange(1, 13).reshape(4, 3).astype('int64') @@ -912,6 +911,22 @@ class TestDygraphPtbRnn(unittest.TestCase): para_state_dict, opti_state_dict = paddle.load( os.path.join('saved_dy', 'emb_dy.pdopt')) + def test_load_compatible_with_keep_name_table(self): + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding([10, 10]) + state_dict = emb.state_dict() + paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + + para_state_dict, opti_state_dict = paddle.load( + os.path.join('saved_dy', 'emb_dy'), True) + self.assertTrue(para_state_dict != None) + self.assertTrue(opti_state_dict == None) + + para_state_dict, opti_state_dict = paddle.load( + os.path.join('saved_dy', 'emb_dy'), keep_name_table=True) + self.assertTrue(para_state_dict != None) + self.assertTrue(opti_state_dict == None) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index e81d1c8610f6bebffadf930b67dc14a4a418ef05..3eb413a62664057c56567d5834b216110fac04fb 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -297,6 +297,7 @@ class TestDygraphPtbRnn(unittest.TestCase): paddle.save(self.state_dict, "./test_dy_v2") def testLoadAndSetVarBase(self): + self.setUp() seed = 90 hidden_size = 10 vocab_size = 1000 @@ -917,6 +918,29 @@ class TestDygraphPtbRnn(unittest.TestCase): para_state_dict, opti_state_dict = paddle.load( os.path.join('saved_dy', 'emb_dy.pdopt')) + def test_no_state_in_input_dict(self): + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding([10, 10]) + state_dict = emb.state_dict() + paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + + para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy')) + para_state_dict.pop('weight') + + emb.set_state_dict(para_state_dict) + + def test_state_shape_mismatch(self): + with fluid.dygraph.guard(): + emb = fluid.dygraph.Embedding([10, 10]) + state_dict = emb.state_dict() + paddle.save(state_dict, os.path.join('saved_dy', 'emb_dy')) + + para_state_dict, _ = paddle.load(os.path.join('saved_dy', 'emb_dy')) + para_state_dict['weight'] = np.expand_dims( + para_state_dict['weight'], axis=-1) + + emb.set_state_dict(para_state_dict) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py index b02ba1a584b52dbbc99fcc8ed7bad438e7a9dd46..c45c144e3ad44c5781ea1f1d7d61028b56d8a254 100644 --- a/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_instance_norm_op_v2.py @@ -48,7 +48,13 @@ class TestInstanceNorm(unittest.TestCase): instance_norm3d = paddle.nn.BatchNorm3d(1) instance_norm3d(fluid.dygraph.to_variable(x_data_4)) + def weight_bias_false(): + x_data_4 = np.random.random(size=(2, 1, 3, 3)).astype('float32') + instance_norm3d = paddle.nn.BatchNorm3d( + 1, weight_attr=False, bias_attr=False) + with fluid.dygraph.guard(p): + weight_bias_false() self.assertRaises(ValueError, error1d) self.assertRaises(ValueError, error2d) self.assertRaises(ValueError, error3d) diff --git a/python/paddle/fluid/tests/unittests/test_jit_save_load.py b/python/paddle/fluid/tests/unittests/test_jit_save_load.py index 87b6e76a6d0ab7f5fba7c4526734d81475e1540e..f7fcc1ff561b90dc1b78a67ffbe7c047ed06d0e9 100644 --- a/python/paddle/fluid/tests/unittests/test_jit_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_jit_save_load.py @@ -183,25 +183,6 @@ class TestJitSaveLoad(unittest.TestCase): with self.assertRaises(ValueError): model_dict, _ = fluid.dygraph.load_dygraph(model_path) - def test_load_dygraph_no_var_info(self): - model_path = "model.test_jit_save_load.no_var_info" - self.train_and_save_model(model_path=model_path) - # remove `__variables.info__` - var_info_path = os.path.join(model_path, EXTRA_VAR_INFO_FILENAME) - os.remove(var_info_path) - new_layer = LinearNet(784, 1) - with self.assertRaises(RuntimeError): - model_dict, _ = fluid.dygraph.load_dygraph(model_path) - - def test_load_dygraph_not_var_file(self): - model_path = "model.test_jit_save_load.no_var_file" - configs = fluid.dygraph.jit.SaveLoadConfig() - configs.params_filename = "__params__" - self.train_and_save_model(model_path=model_path, configs=configs) - new_layer = LinearNet(784, 1) - with self.assertRaises(RuntimeError): - model_dict, _ = fluid.dygraph.load_dygraph(model_path) - class LinearNetMultiInput(fluid.dygraph.Layer): def __init__(self, in_size, out_size): diff --git a/python/paddle/fluid/tests/unittests/test_linspace.py b/python/paddle/fluid/tests/unittests/test_linspace.py index 6d1f42111eebff0f469317ddf2a9ec7698a7ae1e..03cb84ec99e0259a33a086c3d3e5a71abea09d2b 100644 --- a/python/paddle/fluid/tests/unittests/test_linspace.py +++ b/python/paddle/fluid/tests/unittests/test_linspace.py @@ -154,16 +154,16 @@ class TestLinspaceOpError(unittest.TestCase): self.assertRaises(TypeError, test_step_dtype) def test_start_dtype(): - start = fluid.data(shape=[1], dtype="int32", name="start") + start = fluid.data(shape=[1], dtype="float64", name="start") fluid.layers.linspace(start, 10, 1, dtype="float32") - self.assertRaises(TypeError, test_start_dtype) + self.assertRaises(ValueError, test_start_dtype) def test_end_dtype(): - end = fluid.data(shape=[1], dtype="int32", name="end") + end = fluid.data(shape=[1], dtype="float64", name="end") fluid.layers.linspace(0, end, 1, dtype="float32") - self.assertRaises(TypeError, test_end_dtype) + self.assertRaises(ValueError, test_end_dtype) def test_num_dtype(): num = fluid.data(shape=[1], dtype="int32", name="step") diff --git a/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py new file mode 100644 index 0000000000000000000000000000000000000000..ed1939dbe279f28883d9e33178f1cfa256140e33 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_load_state_dict_from_old_format.py @@ -0,0 +1,165 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import os +import six +import unittest +import numpy as np + +import paddle +import paddle.fluid as fluid +from paddle.fluid import core +from test_imperative_base import new_program_scope + + +def convolutional_neural_network(img): + conv_pool_1 = fluid.nets.simple_img_conv_pool( + input=img, + filter_size=5, + num_filters=20, + pool_size=2, + pool_stride=2, + act="relu") + conv_pool_1 = fluid.layers.batch_norm(conv_pool_1) + conv_pool_2 = fluid.nets.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=50, + pool_size=2, + pool_stride=2, + act="relu") + prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax') + return prediction + + +def static_train_net(img, label): + prediction = convolutional_neural_network(img) + + loss = fluid.layers.cross_entropy(input=prediction, label=label) + avg_loss = fluid.layers.mean(loss) + + optimizer = fluid.optimizer.SGD(learning_rate=0.001) + optimizer.minimize(avg_loss) + + return prediction, avg_loss + + +class TestLoadStateDictFromSaveInferenceModel(unittest.TestCase): + def setUp(self): + self.seed = 90 + self.epoch_num = 1 + self.batch_size = 128 + self.batch_num = 10 + + def train_and_save_model(self): + with new_program_scope(): + startup_program = fluid.default_startup_program() + main_program = fluid.default_main_program() + + img = fluid.data( + name='img', shape=[None, 1, 28, 28], dtype='float32') + label = fluid.data(name='label', shape=[None, 1], dtype='int64') + + prediction, avg_loss = static_train_net(img, label) + + place = fluid.CUDAPlace(0) if core.is_compiled_with_cuda( + ) else fluid.CPUPlace() + + exe = fluid.Executor(place) + + feeder = fluid.DataFeeder(feed_list=[img, label], place=place) + exe.run(startup_program) + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=100), + batch_size=self.batch_size) + + for _ in range(0, self.epoch_num): + for batch_id, data in enumerate(train_reader()): + exe.run(main_program, + feed=feeder.feed(data), + fetch_list=[avg_loss]) + + if batch_id > self.batch_num: + break + + static_param_dict = {} + for param in fluid.default_main_program().all_parameters(): + static_param_dict[param.name] = fluid.executor._fetch_var( + param.name) + + fluid.io.save_inference_model( + self.save_dirname, ["img"], [prediction], + exe, + model_filename=self.model_filename, + params_filename=self.params_filename) + + return static_param_dict + + def check_load_state_dict(self, orig_dict, load_dict): + for var_name, value in six.iteritems(orig_dict): + self.assertTrue(np.array_equal(value, load_dict[var_name])) + + def test_load_default(self): + self.save_dirname = "static_mnist.load_state_dict.default" + self.model_filename = None + self.params_filename = None + orig_param_dict = self.train_and_save_model() + + configs = paddle.SaveLoadConfig() + configs.separate_params = True + load_param_dict, _ = paddle.load(self.save_dirname, configs) + self.check_load_state_dict(orig_param_dict, load_param_dict) + + def test_load_with_model_filename(self): + self.save_dirname = "static_mnist.load_state_dict.model_filename" + self.model_filename = "static_mnist.model" + self.params_filename = None + orig_param_dict = self.train_and_save_model() + + configs = paddle.SaveLoadConfig() + configs.separate_params = True + configs.model_filename = self.model_filename + load_param_dict, _ = paddle.load(self.save_dirname, configs) + self.check_load_state_dict(orig_param_dict, load_param_dict) + + def test_load_with_param_filename(self): + self.save_dirname = "static_mnist.load_state_dict.param_filename" + self.model_filename = None + self.params_filename = "static_mnist.params" + orig_param_dict = self.train_and_save_model() + + configs = paddle.SaveLoadConfig() + configs.params_filename = self.params_filename + load_param_dict, _ = paddle.load(self.save_dirname, configs) + self.check_load_state_dict(orig_param_dict, load_param_dict) + + def test_load_with_model_and_param_filename(self): + self.save_dirname = "static_mnist.load_state_dict.model_and_param_filename" + self.model_filename = "static_mnist.model" + self.params_filename = "static_mnist.params" + orig_param_dict = self.train_and_save_model() + + configs = paddle.SaveLoadConfig() + configs.params_filename = self.params_filename + configs.model_filename = self.model_filename + load_param_dict, _ = paddle.load(self.save_dirname, configs) + self.check_load_state_dict(orig_param_dict, load_param_dict) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py index 98d8b7f9f88d2f8892bb2ac8190fbb3c9f19e047..44a653521a9c4878f6135c7f78f4e779c929e7d3 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py @@ -59,7 +59,7 @@ class TestLookupTableOpWithTensorIds(OpTest): def setUp(self): self.op_type = "lookup_table_v2" table = np.random.random((17, 31)).astype("float64") - ids = np.random.randint(low=0, high=17, size=(2, 4, 5)).astype("int64") + ids = np.random.randint(low=0, high=17, size=(2, 4, 5)).astype("int32") self.inputs = {'W': table, 'Ids': ids} self.outputs = {'Out': table[ids.flatten()].reshape((2, 4, 5, 31))} @@ -100,7 +100,7 @@ class TestLookupTableOpWithTensorIdsAndPadding(TestLookupTableOpWithTensorIds): class TestLookupTableWIsSelectedRows(unittest.TestCase): def prepare_ids(self, scope, place): ids_tensor = scope.var('Ids').get_tensor() - ids_array = np.array([0, 4, 3, 5]).astype("int64") + ids_array = np.array([0, 4, 3, 5]).astype("int32") ids_tensor.set(ids_array, place) return ids_array diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py new file mode 100644 index 0000000000000000000000000000000000000000..e0edf9019356f38eb3c74b9cadfa6ae575e9b823 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_dygraph.py @@ -0,0 +1,36 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest + + +class EmbeddingDygraph(unittest.TestCase): + def test_1(self): + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + # example 1 + inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64') + inp_word.shape # [2, 3] + dict_size = 20 + + emb = nn.Embedding(dict_size, 32, weight_attr='emb.w', sparse=False) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py new file mode 100644 index 0000000000000000000000000000000000000000..c9c91ceb39de42c44f9ce81658aa79b896999552 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py @@ -0,0 +1,82 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle.fluid as fluid +import paddle.nn.functional as functional + + +class EmbeddingStatic(unittest.TestCase): + def test_1(self): + prog = fluid.Program() + with fluid.program_guard(prog): + + def test_bad_x(): + initializer = fluid.initializer.NumpyArrayInitializer( + np.random.random(size=(128, 100))) + + param_attr = fluid.ParamAttr( + name="emb_weight", + learning_rate=0.5, + initializer=initializer, + trainable=True) + + weight = prog.global_block().create_parameter( + (128, 100), attr=param_attr, dtype="float32") + + label = fluid.layers.data( + name="label", + shape=[4], + append_batch_size=False, + dtype="int64") + + emb = functional.embedding( + x=label, weight=weight, sparse=True, name="embedding") + + test_bad_x() + + def test_2(self): + prog = fluid.Program() + with fluid.program_guard(prog): + + def test_bad_x(): + initializer = fluid.initializer.NumpyArrayInitializer( + np.random.random(size=(128, 100))) + + param_attr = fluid.ParamAttr( + name="emb_weight", + learning_rate=0.5, + initializer=initializer, + trainable=True) + + weight = prog.global_block().create_parameter( + (128, 100), attr=param_attr, dtype="float32") + + label = fluid.layers.data( + name="label", + shape=[4], + append_batch_size=False, + dtype="int32") + + emb = functional.embedding( + x=label, weight=weight, sparse=True, name="embedding") + + test_bad_x() + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_normal.py b/python/paddle/fluid/tests/unittests/test_normal.py index 3e6855feaf491727203063f5c75c68301abbe05e..995a1f26ff6eb86c9198a164bcef80bebe3a8e79 100644 --- a/python/paddle/fluid/tests/unittests/test_normal.py +++ b/python/paddle/fluid/tests/unittests/test_normal.py @@ -18,6 +18,7 @@ import paddle import copy np.random.seed(10) +paddle.manual_seed(10) class TestNormalAPI(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index 2e6e516aa2edde79e6524b4b35507ea95876ec53..91d705223316360b8c05954259724a5f7d246440 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -832,8 +832,8 @@ class TestRecomputeOptimizer(unittest.TestCase): recompute_optimizer = optimizer.RecomputeOptimizer(sgd_optimizer) recompute_optimizer._set_checkpoints([b1_out]) try: - stat_dict = {} - recompute_optimizer.load(stat_dict) + state_dict = {} + recompute_optimizer.load(state_dict) except NotImplementedError as e: self.assertEqual( "load function is not supported by Recompute Optimizer for now", diff --git a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_mnist.py index bac196b1ab52b604a85321a5473d455d2616bf0d..9cc507aa9b7918e854d56f1c8482f1b875910fb4 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_dygraph_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_dygraph_mnist.py @@ -47,5 +47,21 @@ class TestParallelDygraphMnistSpawn(TestDistSpawnRunner): self.check_dist_result_with_spawn(test_class=TestMnist, delta=1e-5) +class TestFleetDygraphMnist(TestDistBase): + def _setup_config(self): + self._sync_mode = False + self._nccl2_mode = True + self._dygraph = True + self._gpu_fleet_api = True + + def test_mnist(self): + if fluid.core.is_compiled_with_cuda(): + self.check_with_place( + "parallel_dygraph_mnist.py", + delta=1e-5, + check_error_log=True, + log_name=flag_name) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_pool1d_api.py b/python/paddle/fluid/tests/unittests/test_pool1d_api.py index 1c05b96f1fc61234028e940f6403ae08a0186027..25216175d59935535a352b02afc3c8f371cedd63 100644 --- a/python/paddle/fluid/tests/unittests/test_pool1d_api.py +++ b/python/paddle/fluid/tests/unittests/test_pool1d_api.py @@ -143,6 +143,27 @@ class TestPool1d_API(unittest.TestCase): result = avg_pool1d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) + def check_avg_dygraph_padding_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = F.avg_pool1d( + input, + kernel_size=2, + stride=2, + padding=[1], + count_include_pad=True) + + result_np = avg_pool1D_forward_naive( + input_np, ksize=[2], strides=[2], paddings=[1], exclusive=False) + + self.assertTrue(np.allclose(result.numpy(), result_np)) + + avg_pool1d_dg = paddle.nn.AvgPool1d( + kernel_size=2, stride=None, padding=1, count_include_pad=True) + result = avg_pool1d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + def check_max_static_results(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): input = fluid.data(name="input", shape=[2, 3, 32], dtype="float32") diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_api.py b/python/paddle/fluid/tests/unittests/test_pool2d_api.py index 93a2be6de342efc4e8284e7c352137d0a3a1bcb9..91faf78418b0d3a92a3cb6a167b6024b1beb3898 100644 --- a/python/paddle/fluid/tests/unittests/test_pool2d_api.py +++ b/python/paddle/fluid/tests/unittests/test_pool2d_api.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from test_pool2d_op import adaptive_start_index, adaptive_end_index, pool2D_forward_naive +from test_pool2d_op import adaptive_start_index, adaptive_end_index, pool2D_forward_naive, avg_pool2D_forward_naive, max_pool2D_forward_naive import unittest from op_test import OpTest import numpy as np @@ -68,6 +68,47 @@ class TestPool2d_API(unittest.TestCase): result = avg_pool2d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) + def check_avg_dygraph_padding_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = avg_pool2d( + input, kernel_size=2, stride=2, padding=1, ceil_mode=False) + + result_np = avg_pool2D_forward_naive( + input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[1, 1], + ceil_mode=False, + exclusive=False) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + avg_pool2d_dg = paddle.nn.layer.AvgPool2d( + kernel_size=2, stride=2, padding=1, ceil_mode=False) + result = avg_pool2d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_avg_dygraph_ceilmode_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = avg_pool2d( + input, kernel_size=2, stride=2, padding=0, ceil_mode=True) + + result_np = avg_pool2D_forward_naive( + input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + ceil_mode=True) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + avg_pool2d_dg = paddle.nn.layer.AvgPool2d( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + result = avg_pool2d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + def check_max_static_results(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): input = fluid.data( @@ -108,6 +149,70 @@ class TestPool2d_API(unittest.TestCase): result = max_pool2d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) + def check_max_dygraph_nhwc_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable( + np.transpose(input_np, [0, 2, 3, 1])) + result = max_pool2d( + input, + kernel_size=2, + stride=2, + padding=0, + return_indices=False, + data_format="NHWC") + + result_np = pool2D_forward_naive( + input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + pool_type='max') + self.assertTrue( + np.allclose( + np.transpose(result.numpy(), [0, 3, 1, 2]), result_np)) + + def check_max_dygraph_padding_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = max_pool2d( + input, kernel_size=2, stride=2, padding=1, ceil_mode=False) + + result_np = max_pool2D_forward_naive( + input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[1, 1], + ceil_mode=False, + exclusive=False) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + max_pool2d_dg = paddle.nn.layer.MaxPool2d( + kernel_size=2, stride=2, padding=1, ceil_mode=False) + result = max_pool2d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_max_dygraph_ceilmode_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = max_pool2d( + input, kernel_size=2, stride=2, padding=0, ceil_mode=True) + + result_np = max_pool2D_forward_naive( + input_np, + ksize=[2, 2], + strides=[2, 2], + paddings=[0, 0], + ceil_mode=True) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + max_pool2d_dg = paddle.nn.layer.MaxPool2d( + kernel_size=2, stride=2, padding=0, ceil_mode=True) + result = max_pool2d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + def check_max_dygraph_stride_is_none(self, place): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32]).astype("float32") @@ -215,6 +320,9 @@ class TestPool2d_API(unittest.TestCase): self.check_avg_dygraph_stride_is_none(place) self.check_max_dygraph_padding(place) self.check_avg_divisor(place) + self.check_max_dygraph_padding_results(place) + self.check_max_dygraph_ceilmode_results(place) + self.check_max_dygraph_nhwc_results(place) class TestPool2dError_API(unittest.TestCase): @@ -370,6 +478,22 @@ class TestPool2dError_API(unittest.TestCase): self.assertRaises(ValueError, run8) + def run9(): + with fluid.dygraph.guard(): + input_np = np.random.uniform(-1, 1, + [2, 3, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + res_pd = max_pool2d( + input_pd, + kernel_size=2, + stride=2, + padding=0, + ceil_mode=False, + data_format='NHWC', + return_indices=True) + + self.assertRaises(ValueError, run9) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_api.py b/python/paddle/fluid/tests/unittests/test_pool3d_api.py index cc078e9aae7aafe55e937b80270dd012fd64ff70..a77f1cdd57d7bade92e2a4f914dc3d91624d4845 100644 --- a/python/paddle/fluid/tests/unittests/test_pool3d_api.py +++ b/python/paddle/fluid/tests/unittests/test_pool3d_api.py @@ -22,7 +22,7 @@ import paddle.fluid.core as core from op_test import OpTest import paddle.fluid as fluid from paddle.nn.functional import avg_pool3d, max_pool3d -from test_pool3d_op import adaptive_start_index, adaptive_end_index, pool3D_forward_naive +from test_pool3d_op import adaptive_start_index, adaptive_end_index, pool3D_forward_naive, avg_pool3D_forward_naive, max_pool3D_forward_naive class TestPool3d_API(unittest.TestCase): @@ -73,6 +73,58 @@ class TestPool3d_API(unittest.TestCase): result = avg_pool3d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) + def check_avg_dygraph_padding_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = avg_pool3d( + input, + kernel_size=2, + stride=2, + padding=1, + ceil_mode=False, + count_include_pad=True) + + result_np = avg_pool3D_forward_naive( + input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[1, 1, 1], + ceil_mode=False, + exclusive=False) + + self.assertTrue(np.allclose(result.numpy(), result_np)) + + avg_pool3d_dg = paddle.nn.layer.AvgPool3d( + kernel_size=2, + stride=None, + padding=1, + ceil_mode=False, + count_include_pad=True) + result = avg_pool3d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_avg_dygraph_ceilmode_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = avg_pool3d( + input, kernel_size=2, stride=2, padding=0, ceil_mode=True) + + result_np = avg_pool3D_forward_naive( + input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + ceil_mode=True) + + self.assertTrue(np.allclose(result.numpy(), result_np)) + + avg_pool3d_dg = paddle.nn.layer.AvgPool3d( + kernel_size=2, stride=None, padding=0, ceil_mode=True) + result = avg_pool3d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + def check_max_static_results(self, place): with fluid.program_guard(fluid.Program(), fluid.Program()): input = fluid.data( @@ -112,6 +164,74 @@ class TestPool3d_API(unittest.TestCase): result = max_pool3d_dg(input) self.assertTrue(np.allclose(result.numpy(), result_np)) + def check_max_dygraph_ndhwc_results(self, place): + print("run ndchw max pool3d") + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable( + np.transpose(input_np, [0, 2, 3, 4, 1])) + result = max_pool3d( + input, + kernel_size=2, + stride=2, + padding=0, + data_format="NDHWC", + return_indices=False) + + result_np = pool3D_forward_naive( + input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + pool_type='max') + + self.assertTrue( + np.allclose( + np.transpose(result.numpy(), [0, 4, 1, 2, 3]), result_np)) + + def check_max_dygraph_ceilmode_results(self, place): + print("run ceil mode max pool3d") + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = max_pool3d( + input, kernel_size=2, stride=2, padding=0, ceil_mode=True) + + result_np = max_pool3D_forward_naive( + input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[0, 0, 0], + ceil_mode=True) + + self.assertTrue(np.allclose(result.numpy(), result_np)) + + max_pool3d_dg = paddle.nn.layer.MaxPool3d( + kernel_size=2, stride=None, padding=0, ceil_mode=True) + result = max_pool3d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + + def check_max_dygraph_padding_results(self, place): + with fluid.dygraph.guard(place): + input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") + input = fluid.dygraph.to_variable(input_np) + result = max_pool3d( + input, kernel_size=2, stride=2, padding=1, ceil_mode=False) + + result_np = max_pool3D_forward_naive( + input_np, + ksize=[2, 2, 2], + strides=[2, 2, 2], + paddings=[1, 1, 1], + ceil_mode=False) + + self.assertTrue(np.allclose(result.numpy(), result_np)) + + max_pool3d_dg = paddle.nn.layer.MaxPool3d( + kernel_size=2, stride=None, padding=1, ceil_mode=False) + result = max_pool3d_dg(input) + self.assertTrue(np.allclose(result.numpy(), result_np)) + def check_max_dygraph_stride_is_none(self, place): with fluid.dygraph.guard(place): input_np = np.random.random([2, 3, 32, 32, 32]).astype("float32") @@ -205,6 +325,8 @@ class TestPool3d_API(unittest.TestCase): self.check_max_dygraph_stride_is_none(place) self.check_max_dygraph_padding(place) self.check_avg_divisor(place) + self.check_max_dygraph_ndhwc_results(place) + self.check_max_dygraph_ceilmode_results(place) class TestPool3dError_API(unittest.TestCase): @@ -336,6 +458,21 @@ class TestPool3dError_API(unittest.TestCase): self.assertRaises(ValueError, run9) + def run10(): + with fluid.dygraph.guard(): + input_np = np.random.uniform( + -1, 1, [2, 3, 32, 32, 32]).astype(np.float32) + input_pd = fluid.dygraph.to_variable(input_np) + res_pd = max_pool3d( + input_pd, + kernel_size=2, + stride=2, + padding=0, + data_format='NDHWC', + return_indices=True) + + self.assertRaises(ValueError, run10) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_rand_op.py b/python/paddle/fluid/tests/unittests/test_rand_op.py index 1eceeaadfec651ade5031ddc7e6a012244050e84..4b8fe8c7e4786417de2f80dbb9953530781f9189 100644 --- a/python/paddle/fluid/tests/unittests/test_rand_op.py +++ b/python/paddle/fluid/tests/unittests/test_rand_op.py @@ -120,24 +120,24 @@ class TestRandDtype(unittest.TestCase): def test_default_dtype(self): paddle.disable_static() - def test_default_fp_16(): + def test_default_fp16(): paddle.framework.set_default_dtype('float16') paddle.tensor.random.rand([2, 3]) - self.assertRaises(TypeError, test_default_fp_16) + self.assertRaises(TypeError, test_default_fp16) - def test_default_fp_32(): + def test_default_fp32(): paddle.framework.set_default_dtype('float32') out = paddle.tensor.random.rand([2, 3]) self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP32) - def test_default_fp_64(): + def test_default_fp64(): paddle.framework.set_default_dtype('float64') out = paddle.tensor.random.rand([2, 3]) self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64) - test_default_fp_64() - test_default_fp_32() + test_default_fp64() + test_default_fp32() paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_randint_op.py b/python/paddle/fluid/tests/unittests/test_randint_op.py index 88b07f5df83f8f967f8ba76e78b37ecfb2c54276..7880b48cd7d5a006d78b836be3d9d2f0b1e04c5e 100644 --- a/python/paddle/fluid/tests/unittests/test_randint_op.py +++ b/python/paddle/fluid/tests/unittests/test_randint_op.py @@ -58,6 +58,11 @@ class TestRandintOpError(unittest.TestCase): self.assertRaises(TypeError, paddle.randint, 5, dtype='float32') self.assertRaises(ValueError, paddle.randint, 5, 5) self.assertRaises(ValueError, paddle.randint, -5) + self.assertRaises(TypeError, paddle.randint, 5, shape=['2']) + shape_tensor = paddle.static.data('X', [1]) + self.assertRaises(TypeError, paddle.randint, 5, shape=shape_tensor) + self.assertRaises( + TypeError, paddle.randint, 5, shape=[shape_tensor]) class TestRandintOp_attr_tensorlist(OpTest): diff --git a/python/paddle/fluid/tests/unittests/test_reduce_op.py b/python/paddle/fluid/tests/unittests/test_reduce_op.py index cf35f9dbcdaaae1357ccdfd6b5cba85ac98d2037..b0b85f633a2bf613cdbdcc2ba7b31b5d970da8ca 100644 --- a/python/paddle/fluid/tests/unittests/test_reduce_op.py +++ b/python/paddle/fluid/tests/unittests/test_reduce_op.py @@ -475,87 +475,71 @@ class API_TestSumOpError(unittest.TestCase): def test_errors(self): def test_dtype1(): with fluid.program_guard(fluid.Program(), fluid.Program()): - data = fluid.data(name="data", shape=[10], dtype="float32") - paddle.sum(data, dtype="int32") + data = fluid.data(name="data", shape=[10], dtype="float64") + paddle.sum(data, dtype="float32") self.assertRaises(ValueError, test_dtype1) def test_dtype2(): with fluid.program_guard(fluid.Program(), fluid.Program()): - data = fluid.data(name="data", shape=[10], dtype="float32") - paddle.sum(data, dtype="float32") + data = fluid.data(name="data", shape=[10], dtype="int64") + paddle.sum(data, dtype="int32") self.assertRaises(ValueError, test_dtype2) def test_dtype3(): with fluid.program_guard(fluid.Program(), fluid.Program()): - data = fluid.data(name="data", shape=[10], dtype="int32") - paddle.sum(data, dtype="bool") + data = fluid.data(name="data", shape=[10], dtype="float64") + paddle.sum(data, dtype="int32") self.assertRaises(ValueError, test_dtype3) - def test_dtype4(): + def test_type(): with fluid.program_guard(fluid.Program(), fluid.Program()): data = fluid.data(name="data", shape=[10], dtype="int32") - paddle.sum(data, dtype="int32") + paddle.sum(data, dtype="bool") - self.assertRaises(ValueError, test_dtype3) + self.assertRaises(TypeError, test_type) class API_TestSumOp(unittest.TestCase): - def test_static(self): - with fluid.program_guard(fluid.Program(), fluid.Program()): - data = fluid.data("data", shape=[10, 10], dtype="float32") - result_sum = paddle.sum(x=data, axis=1, dtype="float64") - place = fluid.CPUPlace() - exe = fluid.Executor(place) - input_data = np.random.rand(10, 10).astype(np.float32) - res, = exe.run(feed={"data": input_data}, fetch_list=[result_sum]) - self.assertEqual( - (res == np.sum(input_data.astype(np.float64), axis=1)).all(), True) + def run_static(self, + shape, + x_dtype, + attr_axis, + attr_dtype=None, + np_axis=None): + if np_axis is None: + np_axis = attr_axis with fluid.program_guard(fluid.Program(), fluid.Program()): - data = fluid.data("data", shape=[10, 10], dtype="int32") - result_sum = paddle.sum(x=data, axis=1, dtype="int64") - place = fluid.CPUPlace() - exe = fluid.Executor(place) - input_data = np.random.randint(10, size=(10, 10)).astype(np.int32) - res, = exe.run(feed={"data": input_data}, fetch_list=[result_sum]) - self.assertEqual( - (res == np.sum(input_data.astype(np.int64), axis=1)).all(), True) + data = fluid.data("data", shape=shape, dtype=x_dtype) + result_sum = paddle.sum(x=data, axis=attr_axis, dtype=attr_dtype) - with fluid.program_guard(fluid.Program(), fluid.Program()): - data = fluid.data("data", shape=[10, 10], dtype="int32") - result_sum = paddle.sum(x=data, axis=1) - place = fluid.CPUPlace() - exe = fluid.Executor(place) - input_data = np.random.randint(10, size=(10, 10)).astype(np.int32) + exe = fluid.Executor(fluid.CPUPlace()) + input_data = np.random.rand(*shape).astype(x_dtype) res, = exe.run(feed={"data": input_data}, fetch_list=[result_sum]) - self.assertEqual((res == np.sum(input_data, axis=1)).all(), True) - with fluid.program_guard(fluid.Program(), fluid.Program()): - data = fluid.data("data", shape=[10, 10], dtype="int32") - result_sum = paddle.sum(x=data, axis=1) - place = fluid.CPUPlace() - exe = fluid.Executor(place) - input_data = np.random.randint(10, size=(10, 10)).astype(np.int32) - res, = exe.run(feed={"data": input_data}, fetch_list=[result_sum]) - self.assertEqual((res == np.sum(input_data, axis=1)).all(), True) + self.assertTrue( + np.allclose( + res, np.sum(input_data.astype(attr_dtype), axis=np_axis))) - with fluid.program_guard(fluid.Program(), fluid.Program()): - input_data = np.random.randint(10, size=(5, 5, 5)).astype(np.int32) - data = fluid.data("data", shape=[5, 5, 5], dtype="int32") - sum1 = paddle.sum(x=data, axis=[0, 1]) - sum2 = paddle.sum(x=data, axis=()) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - res1, res2 = exe.run(feed={"data": input_data}, - fetch_list=[sum1, sum2]) - - self.assertEqual((res1 == np.sum(input_data, axis=(0, 1))).all(), True) - self.assertEqual( - (res2 == np.sum(input_data, axis=(0, 1, 2))).all(), True) + def test_static(self): + shape = [10, 10] + axis = 1 + + self.run_static(shape, "int32", axis, attr_dtype=None) + self.run_static(shape, "int32", axis, attr_dtype="int32") + self.run_static(shape, "int32", axis, attr_dtype="int64") + + self.run_static(shape, "float32", axis, attr_dtype=None) + self.run_static(shape, "float32", axis, attr_dtype="float32") + self.run_static(shape, "float32", axis, attr_dtype="float64") + + shape = [5, 5, 5] + self.run_static(shape, "int32", (0, 1), attr_dtype="int32") + self.run_static( + shape, "int32", (), attr_dtype="int32", np_axis=(0, 1, 2)) def test_dygraph(self): np_x = np.random.random([2, 3, 4]).astype('int32') diff --git a/python/paddle/fluid/tests/unittests/test_regularizer.py b/python/paddle/fluid/tests/unittests/test_regularizer.py index 44087c5421a5ee66273ef35b935926d42dcc37ae..167a8a017c24a01a6475a03835222d33c601396e 100644 --- a/python/paddle/fluid/tests/unittests/test_regularizer.py +++ b/python/paddle/fluid/tests/unittests/test_regularizer.py @@ -106,9 +106,9 @@ def bow_net(data, label, dict_dim, is_sparse=False, - emb_dim=128, - hid_dim=128, - hid_dim2=96, + emb_dim=8, + hid_dim=8, + hid_dim2=6, class_dim=2): """ BOW net @@ -132,8 +132,8 @@ class TestRegularizer(unittest.TestCase): def setUp(self): self.word_dict = paddle.dataset.imdb.word_dict() reader = paddle.batch( - paddle.dataset.imdb.train(self.word_dict), batch_size=8)() - self.train_data = [next(reader) for _ in range(5)] + paddle.dataset.imdb.train(self.word_dict), batch_size=1)() + self.train_data = [next(reader) for _ in range(1)] def get_places(self): places = [core.CPUPlace()] @@ -245,14 +245,14 @@ class TestRegularizer(unittest.TestCase): sgd.minimize(loss) with fluid.dygraph.guard(): input = fluid.dygraph.to_variable( - np.random.randn(3, 5).astype('float32')) + np.random.randn(3, 2).astype('float32')) paddle.manual_seed(1) paddle.framework.random._manual_program_seed(1) linear1 = fluid.dygraph.Linear( - 5, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) + 2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) linear2 = fluid.dygraph.Linear( - 5, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) + 2, 2, param_attr=fc_param_attr, bias_attr=fc_param_attr) loss1 = linear1(input) loss1.backward() diff --git a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py index f7b9d4214d36a422a3ec94dc410e58c6c827ef4c..ddac7f6b98b19d204d20ccdff75c6d4fcae50d4d 100644 --- a/python/paddle/fluid/tests/unittests/test_rmsprop_op.py +++ b/python/paddle/fluid/tests/unittests/test_rmsprop_op.py @@ -276,6 +276,19 @@ class TestRMSPropV2(unittest.TestCase): learning_rate=0.1, momentum=None) + def test_rmsprop_op_invalid_input(self): + paddle.disable_static() + linear = paddle.nn.Linear(10, 10) + with self.assertRaises(ValueError): + adam = paddle.optimizer.RMSProp( + 0.1, epsilon=-1, parameters=linear.parameters()) + with self.assertRaises(ValueError): + adam = paddle.optimizer.RMSProp( + 0.1, momentum=-1, parameters=linear.parameters()) + with self.assertRaises(ValueError): + adam = paddle.optimizer.RMSProp( + 0.1, rho=-1, parameters=linear.parameters()) + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_stack_op.py b/python/paddle/fluid/tests/unittests/test_stack_op.py index fd5c02c55db4c22d9edd604b7998a5405961d596..8dd71c5a558094ce6f259105eeb1aafb834ad6dc 100644 --- a/python/paddle/fluid/tests/unittests/test_stack_op.py +++ b/python/paddle/fluid/tests/unittests/test_stack_op.py @@ -182,6 +182,11 @@ class API_test(unittest.TestCase): expected_result = np.stack([input1, input2, input3], axis=0) self.assertTrue(np.allclose(expected_result, result)) + def test_single_tensor_error(self): + with fluid.program_guard(fluid.Program(), fluid.Program()): + x = paddle.rand([2, 3]) + self.assertRaises(TypeError, paddle.stack, x) + class API_DygraphTest(unittest.TestCase): def test_out(self): @@ -192,18 +197,23 @@ class API_DygraphTest(unittest.TestCase): x1 = fluid.dygraph.to_variable(data1) x2 = fluid.dygraph.to_variable(data2) x3 = fluid.dygraph.to_variable(data3) - result = paddle.stack([x1, x2, x3], axis=0) + result = paddle.stack([x1, x2, x3]) result_np = result.numpy() - expected_result = np.stack([data1, data2, data3], axis=0) + expected_result = np.stack([data1, data2, data3]) self.assertTrue(np.allclose(expected_result, result_np)) with fluid.dygraph.guard(): y1 = fluid.dygraph.to_variable(data1) - result = paddle.stack(y1, axis=0) + result = paddle.stack([y1], axis=0) result_np_2 = result.numpy() - expected_result_2 = np.stack(data1, axis=0) + expected_result_2 = np.stack([data1], axis=0) self.assertTrue(np.allclose(expected_result_2, result_np_2)) + def test_single_tensor_error(self): + with fluid.dygraph.guard(): + x = paddle.to_tensor([1, 2, 3]) + self.assertRaises(Exception, paddle.stack, x) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_translated_layer.py b/python/paddle/fluid/tests/unittests/test_translated_layer.py new file mode 100644 index 0000000000000000000000000000000000000000..20c51b9afbafac9ba1fa032aea446383bc2b9796 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_translated_layer.py @@ -0,0 +1,157 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import paddle +import paddle.nn as nn +import paddle.optimizer as opt + +BATCH_SIZE = 16 +BATCH_NUM = 4 +EPOCH_NUM = 4 +SEED = 10 + +IMAGE_SIZE = 784 +CLASS_NUM = 10 + + +# define a random dataset +class RandomDataset(paddle.io.Dataset): + def __init__(self, num_samples): + self.num_samples = num_samples + + def __getitem__(self, idx): + np.random.seed(SEED) + image = np.random.random([IMAGE_SIZE]).astype('float32') + label = np.random.randint(0, CLASS_NUM - 1, (1, )).astype('int64') + return image, label + + def __len__(self): + return self.num_samples + + +class LinearNet(nn.Layer): + def __init__(self): + super(LinearNet, self).__init__() + self._linear = nn.Linear(IMAGE_SIZE, CLASS_NUM) + + @paddle.jit.to_static + def forward(self, x): + return self._linear(x) + + +def train(layer, loader, loss_fn, opt): + for epoch_id in range(EPOCH_NUM): + for batch_id, (image, label) in enumerate(loader()): + out = layer(image) + loss = loss_fn(out, label) + loss.backward() + opt.step() + opt.clear_grad() + print("Epoch {} batch {}: loss = {}".format(epoch_id, batch_id, + np.mean(loss.numpy()))) + return loss + + +class TestTranslatedLayer(unittest.TestCase): + def setUp(self): + # enable dygraph mode + place = paddle.CPUPlace() + paddle.disable_static(place) + + # config seed + paddle.manual_seed(SEED) + paddle.framework.random._manual_program_seed(SEED) + + # create network + self.layer = LinearNet() + self.loss_fn = nn.CrossEntropyLoss() + self.sgd = opt.SGD(learning_rate=0.001, + parameters=self.layer.parameters()) + + # create data loader + dataset = RandomDataset(BATCH_NUM * BATCH_SIZE) + self.loader = paddle.io.DataLoader( + dataset, + places=place, + batch_size=BATCH_SIZE, + shuffle=True, + drop_last=True, + num_workers=2) + + # train + train(self.layer, self.loader, self.loss_fn, self.sgd) + + # save + self.model_path = "linear.example.model" + paddle.jit.save(self.layer, self.model_path) + + def test_inference_and_fine_tuning(self): + self.load_and_inference() + self.load_and_fine_tuning() + + def load_and_inference(self): + # load + translated_layer = paddle.jit.load(self.model_path) + + # inference + x = paddle.randn([1, IMAGE_SIZE], 'float32') + + self.layer.eval() + orig_pred = self.layer(x) + + translated_layer.eval() + pred = translated_layer(x) + + self.assertTrue(np.array_equal(orig_pred.numpy(), pred.numpy())) + + def load_and_fine_tuning(self): + # load + translated_layer = paddle.jit.load(self.model_path) + + # train original layer continue + self.layer.train() + orig_loss = train(self.layer, self.loader, self.loss_fn, self.sgd) + + # fine-tuning + translated_layer.train() + sgd = opt.SGD(learning_rate=0.001, + parameters=translated_layer.parameters()) + loss = train(translated_layer, self.loader, self.loss_fn, sgd) + + self.assertTrue( + np.array_equal(orig_loss.numpy(), loss.numpy()), + msg="original loss:\n{}\nnew loss:\n{}\n".format(orig_loss.numpy(), + loss.numpy())) + + def test_get_program(self): + # load + translated_layer = paddle.jit.load(self.model_path) + + program = translated_layer.program() + self.assertTrue(isinstance(program, paddle.static.Program)) + + def test_get_program_method_not_exists(self): + # load + translated_layer = paddle.jit.load(self.model_path) + + with self.assertRaises(ValueError): + program = translated_layer.program('not_exists') + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py index 56dc27a9a5b136829ce410b50998e23b77510665..5ecf25c53b794f07e298b986eff5700698b8bff7 100644 --- a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py +++ b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py @@ -239,12 +239,12 @@ class TestUniformRandomOpSelectedRows(unittest.TestCase): op = Operator( "uniform_random", Out="X", - shape=[100, 784], + shape=[1000, 784], min=-5.0, max=10.0, seed=10) op.run(scope, place) - self.assertEqual(out.get_tensor().shape(), [100, 784]) + self.assertEqual(out.get_tensor().shape(), [1000, 784]) hist, prob = output_hist(np.array(out.get_tensor())) self.assertTrue( np.allclose( @@ -260,15 +260,15 @@ class TestUniformRandomOpSelectedRowsWithDiagInit( op = Operator( "uniform_random", Out="X", - shape=[100, 784], + shape=[500, 784], min=-5.0, max=10.0, seed=10, - diag_num=100, + diag_num=500, diag_step=784, diag_val=1.0) op.run(scope, place) - self.assertEqual(out.get_tensor().shape(), [100, 784]) + self.assertEqual(out.get_tensor().shape(), [500, 784]) hist, prob = output_hist_diag(np.array(out.get_tensor())) self.assertTrue( np.allclose( @@ -391,7 +391,7 @@ class TestUniformRandomOpSelectedRowsShapeTensor(unittest.TestCase): scope = core.Scope() out = scope.var("X").get_selected_rows() shape_tensor = scope.var("Shape").get_tensor() - shape_tensor.set(np.array([100, 784]).astype("int64"), place) + shape_tensor.set(np.array([1000, 784]).astype("int64"), place) paddle.manual_seed(10) op = Operator( "uniform_random", @@ -401,7 +401,7 @@ class TestUniformRandomOpSelectedRowsShapeTensor(unittest.TestCase): max=10.0, seed=10) op.run(scope, place) - self.assertEqual(out.get_tensor().shape(), [100, 784]) + self.assertEqual(out.get_tensor().shape(), [1000, 784]) hist, prob = output_hist(np.array(out.get_tensor())) self.assertTrue( np.allclose( @@ -423,7 +423,7 @@ class TestUniformRandomOpSelectedRowsShapeTensorList(unittest.TestCase): scope = core.Scope() out = scope.var("X").get_selected_rows() shape_1 = scope.var("shape1").get_tensor() - shape_1.set(np.array([100]).astype("int64"), place) + shape_1.set(np.array([1000]).astype("int64"), place) shape_2 = scope.var("shape2").get_tensor() shape_2.set(np.array([784]).astype("int64"), place) paddle.manual_seed(10) @@ -435,7 +435,7 @@ class TestUniformRandomOpSelectedRowsShapeTensorList(unittest.TestCase): max=10.0, seed=10) op.run(scope, place) - self.assertEqual(out.get_tensor().shape(), [100, 784]) + self.assertEqual(out.get_tensor().shape(), [1000, 784]) hist, prob = output_hist(np.array(out.get_tensor())) self.assertTrue( np.allclose( @@ -540,24 +540,24 @@ class TestUniformDtype(unittest.TestCase): def test_default_dtype(self): paddle.disable_static() - def test_default_fp_16(): + def test_default_fp16(): paddle.framework.set_default_dtype('float16') paddle.tensor.random.uniform([2, 3]) - self.assertRaises(TypeError, test_default_fp_16) + self.assertRaises(TypeError, test_default_fp16) - def test_default_fp_32(): + def test_default_fp32(): paddle.framework.set_default_dtype('float32') out = paddle.tensor.random.uniform([2, 3]) self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP32) - def test_default_fp_64(): + def test_default_fp64(): paddle.framework.set_default_dtype('float64') out = paddle.tensor.random.uniform([2, 3]) self.assertEqual(out.dtype, fluid.core.VarDesc.VarType.FP64) - test_default_fp_64() - test_default_fp_32() + test_default_fp64() + test_default_fp32() paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_var_base.py b/python/paddle/fluid/tests/unittests/test_var_base.py index c8383bb950d3ed7b2cdfafa185b0ad156bf7c7bf..deb49a3ffc2b5febf97680bc652e9695fb253373 100644 --- a/python/paddle/fluid/tests/unittests/test_var_base.py +++ b/python/paddle/fluid/tests/unittests/test_var_base.py @@ -33,16 +33,28 @@ class TestVarBase(unittest.TestCase): def _test_place(place): with fluid.dygraph.guard(): paddle.set_default_dtype('float32') + # set_default_dtype should not take effect on int x = paddle.to_tensor(1, place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1])) self.assertNotEqual(x.dtype, core.VarDesc.VarType.FP32) + # set_default_dtype should not take effect on numpy + x = paddle.to_tensor( + np.array([1.2]).astype('float16'), + place=place, + stop_gradient=False) + self.assertTrue( + np.array_equal(x.numpy(), np.array([1.2], 'float16'))) + self.assertEqual(x.dtype, core.VarDesc.VarType.FP16) + + # set_default_dtype take effect on float x = paddle.to_tensor(1.2, place=place, stop_gradient=False) self.assertTrue( np.array_equal(x.numpy(), np.array([1.2]).astype( 'float32'))) self.assertEqual(x.dtype, core.VarDesc.VarType.FP32) + # set_default_dtype take effect on complex x = paddle.to_tensor(1 + 2j, place=place, stop_gradient=False) self.assertTrue(np.array_equal(x.numpy(), [1 + 2j])) self.assertEqual(x.dtype, 'complex64') diff --git a/python/paddle/framework/__init__.py b/python/paddle/framework/__init__.py index af788874191335ad31d1540bcc0db90cc12383c6..f33e4e0fca8727574bcd1970e26c6eaee2139a05 100644 --- a/python/paddle/framework/__init__.py +++ b/python/paddle/framework/__init__.py @@ -20,8 +20,8 @@ __all__ = [ ] __all__ += [ - 'grad', 'LayerList', 'load', 'save', 'to_variable', 'no_grad', - 'DataParallel' + 'grad', 'LayerList', 'load', 'save', 'SaveLoadConfig', 'to_variable', + 'no_grad', 'DataParallel' ] __all__ += [ @@ -50,6 +50,7 @@ from ..fluid.dygraph.base import to_variable #DEFINE_ALIAS from ..fluid.dygraph.base import grad #DEFINE_ALIAS from ..fluid.dygraph.checkpoint import load_dygraph as load #DEFINE_ALIAS from ..fluid.dygraph.checkpoint import save_dygraph as save #DEFINE_ALIAS +from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS from ..fluid.dygraph.parallel import DataParallel #DEFINE_ALIAS from ..fluid.dygraph.learning_rate_scheduler import NoamDecay #DEFINE_ALIAS diff --git a/python/paddle/framework/random.py b/python/paddle/framework/random.py index 2555d24464112ed8446d863dc8e65cfa37680b36..ba2cf603d4a69f118320e40f1f953cb4c5fcfb39 100644 --- a/python/paddle/framework/random.py +++ b/python/paddle/framework/random.py @@ -16,7 +16,7 @@ import paddle.fluid as fluid from paddle.fluid import core -__all__ = ['manual_seed'] +__all__ = ['manual_seed', 'get_cuda_rng_state', 'set_cuda_rng_state'] def manual_seed(seed): @@ -42,10 +42,69 @@ def manual_seed(seed): seed = int(seed) + if core.is_compiled_with_cuda(): + for i in range(core.get_cuda_device_count()): + core.default_cuda_generator(i)._is_init_py = True + core.default_cuda_generator(i).manual_seed(seed) + core.default_cpu_generator()._is_init_py = True return core.default_cpu_generator().manual_seed(seed) +def get_cuda_rng_state(): + """ + + Get random state of cuda generators. + + Args: + None + + Returns: + GeneratorState: object. + + Examples: + .. code-block:: python + + import paddle + sts = paddle.get_cuda_rng_state() + + """ + state_list = [] + if core.is_compiled_with_cuda(): + for i in range(core.get_cuda_device_count()): + state_list.append(core.default_cuda_generator(i).get_state()) + + return state_list + + +def set_cuda_rng_state(state_list): + """ + + Sets generator state for all cuda generators + + Args: + state_list(list): The cuda states to set back to cuda generators. state_list is obtained from get_cuda_rng_state(). + + Returns: + None + + Examples: + .. code-block:: python + + import paddle + sts = paddle.get_cuda_rng_state() + paddle.set_cuda_rng_state(sts) + + """ + if core.is_compiled_with_cuda(): + if not len(state_list) == core.get_cuda_device_count(): + raise ValueError( + "Length of cuda state list shoule be equal to the cuda device count" + ) + for i in range(core.get_cuda_device_count()): + core.default_cuda_generator(i).set_state(state_list[i]) + + def _manual_program_seed(seed): """ Sets global seed for generating random numbers. diff --git a/python/paddle/hapi/__init__.py b/python/paddle/hapi/__init__.py index 87f5a82525cdfa36e48d40c6d12488d359fe99db..67965de5d97621e188acfa1e0384325b9ec5b7aa 100644 --- a/python/paddle/hapi/__init__.py +++ b/python/paddle/hapi/__init__.py @@ -14,14 +14,12 @@ from . import logger from . import callbacks +from . import model_summary from . import model from .model import * - -from .dygraph_layer_patch import monkey_patch_layer +from .model_summary import summary logger.setup_logger() -__all__ = ['callbacks'] + model.__all__ - -monkey_patch_layer() +__all__ = ['callbacks'] + model.__all__ + ['summary'] diff --git a/python/paddle/hapi/dygraph_layer_patch.py b/python/paddle/hapi/dygraph_layer_patch.py deleted file mode 100644 index e3a2948b69305fcb08c14c850f5738ac46aea2be..0000000000000000000000000000000000000000 --- a/python/paddle/hapi/dygraph_layer_patch.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import warnings - -import paddle.fluid as fluid -from paddle.fluid.framework import in_dygraph_mode -from paddle.fluid.framework import _current_expected_place as _get_device - - -def monkey_patch_layer(): - def load_dict(self, - stat_dict, - include_sublayers=True, - use_structured_name=True): - ''' - Set parameters from stat_dict. All the parameters will be reset by the - tensor in the stat_dict - - This api will be Deprecated. Please use set_dict - - Parameters: - state_dict(dict) : Dict contains all the parameters - include_sublayers(bool, optional) : If true, also include the - parameters from sublayers. Default: True - use_structured_name(bool, optional) : If true, use structured name - as key, otherwise, use parameter name as key. Default: True - Returns: - None - - Examples: - .. code-block:: python - - import paddle.fluid as fluid - with fluid.dygraph.guard(): - emb = fluid.dygraph.Embedding([10, 10]) - - state_dict = emb.state_dict() - fluid.save_dygraph( state_dict, "paddle_dy") - - para_state_dict, _ = fluid.load_dygraph( "paddle_dy") - emb.load_dict( para_state_dict ) - - ''' - - def _check_match(key, param): - state = stat_dict.get(key, None) - if state is None: - raise ValueError( - "{} is not found in the providing file.".format(key)) - if list(state.shape) != list(param.shape): - raise ValueError( - "{} receives a shape {}, but the expected shape is {}.". - format(key, list(state.shape), list(param.shape))) - return param, state - - matched_param_state = [] - for key, param in self.state_dict().items(): - key_name = key if use_structured_name else param.name - try: - match_res = _check_match(key_name, param) - matched_param_state.append(match_res) - except ValueError as err: - warnings.warn(("Skip loading for {}. ".format(key) + str(err))) - - if in_dygraph_mode(): - for param, state in matched_param_state: - param.set_value(state) - else: - - def _set_var(var, ndarray): - t = fluid.global_scope().find_var(var.name).get_tensor() - p = t._place() - if p.is_cpu_place(): - place = fluid.CPUPlace() - elif p.is_cuda_pinned_place(): - place = fluid.CUDAPinnedPlace() - else: - p = fluid.core.Place() - p.set_place(t._place()) - place = fluid.CUDAPlace(p.gpu_device_id()) - t.set(ndarray, place) - - executor = fluid.Executor(_get_device())._default_executor - # restore parameter states - fluid.core._create_loaded_parameter( - [param for param, state in matched_param_state], - fluid.global_scope(), executor) - for param, state in matched_param_state: - _set_var(param, state) - - setattr(fluid.dygraph.Layer, 'load_dict', load_dict) diff --git a/python/paddle/hapi/model.py b/python/paddle/hapi/model.py index 5aa689ca324c099f239a29e2ee21b8283e378341..2836a151ec35698a31f3814d573828853349a151 100644 --- a/python/paddle/hapi/model.py +++ b/python/paddle/hapi/model.py @@ -47,10 +47,10 @@ from paddle.io import DataLoader, Dataset, DistributedBatchSampler from paddle.fluid.executor import scope_guard, Executor from paddle.fluid.dygraph.layers import Layer from paddle.metric import Metric - from paddle.static import InputSpec as Input from .callbacks import config_callbacks +from .model_summary import summary __all__ = ['Model', ] @@ -731,8 +731,8 @@ class DynamicGraphAdapter(object): if not self.model._optimizer or not optim_state: return - # If optimizer performs set_dict when state vars haven't been created, - # which would happen when set_dict before minimize, the state would be + # If optimizer performs set_state_dict when state vars haven't been created, + # which would happen when set_state_dict before minimize, the state would be # stored in optimizer._accumulators_holder and loaded lazily. # To contrive this when loading from static-graph saved states, extend # state dict to include keys named accoring to dygraph naming rules. @@ -776,7 +776,13 @@ class DynamicGraphAdapter(object): accum_name + "_0") converted_state[dy_state_name] = state_var - self.model._optimizer.set_dict(converted_state) + if not hasattr(self.model._optimizer, 'set_state_dict'): + warnings.warn( + "paddle.fluid.optimizer is deprecated in API 2.0, please use paddle.optimizer instead" + ) + self.model._optimizer.set_dict(converted_state) + else: + self.model._optimizer.set_state_dict(converted_state) class Model(object): @@ -1822,6 +1828,54 @@ class Model(object): return logs, outputs return logs + def summary(self, input_size=None, batch_size=None, dtype=None): + """Prints a string summary of the network. + + Args: + input_size (tuple|InputSpec|list[tuple|InputSpec], optional): size of input tensor. + if not set, input_size will get from ``self._inputs`` if network only have + one input, input_size can be tuple or InputSpec. if model have multiple + input, input_size must be a list which contain every input's shape. + Default: None. + batch_size (int, optional): batch size of input tensor, Default: None. + dtypes (str, optional): if dtypes is None, 'float32' will be used, Default: None. + + Returns: + Dict: a summary of the network including total params and total trainable params. + + Examples: + .. code-block:: python + + import paddle + from paddle.static import InputSpec + + dynamic = True + device = paddle.set_device('cpu') + paddle.disable_static(device) if dynamic else None + + input = InputSpec([None, 1, 28, 28], 'float32', 'image') + label = InputSpec([None, 1], 'int64', 'label') + + model = paddle.Model(paddle.vision.LeNet(classifier_activation=None), + input, label) + optim = paddle.optimizer.Adam( + learning_rate=0.001, parameters=model.parameters()) + model.prepare( + optim, + paddle.nn.CrossEntropyLoss()) + + params_info = model.summary() + print(params_info) + + """ + assert (input_size is not None or self._inputs is not None + ), "'input_size' or 'self._input' must be set" + if input_size is not None: + _input_size = input_size + else: + _input_size = self._inputs + return summary(self.network, _input_size, batch_size, dtype) + def _verify_spec(self, specs, is_input=False): out_specs = [] diff --git a/python/paddle/hapi/model_summary.py b/python/paddle/hapi/model_summary.py new file mode 100644 index 0000000000000000000000000000000000000000..716be1b539809ea3f90885b512f51ac45d85cd37 --- /dev/null +++ b/python/paddle/hapi/model_summary.py @@ -0,0 +1,229 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +import paddle +import paddle.nn as nn +from paddle.static import InputSpec + +from collections import OrderedDict + +__all__ = ['summary'] + + +def summary(net, input_size, batch_size=None, dtypes=None): + """Prints a string summary of the network. + + Args: + net (Layer): the network which must be a subinstance of Layer. + input_size (tuple|InputSpec|list[tuple|InputSpec]): size of input tensor. if model only + have one input, input_size can be tuple or InputSpec. if model + have multiple input, input_size must be a list which contain + every input's shape. + batch_size (int, optional): batch size of input tensor, Default: None. + dtypes (str, optional): if dtypes is None, 'float32' will be used, Default: None. + + Returns: + Dict: a summary of the network including total params and total trainable params. + + Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + + class LeNet(nn.Layer): + def __init__(self, num_classes=10): + super(LeNet, self).__init__() + self.num_classes = num_classes + self.features = nn.Sequential( + nn.Conv2d( + 1, 6, 3, stride=1, padding=1), + nn.ReLU(), + nn.MaxPool2d(2, 2), + nn.Conv2d( + 6, 16, 5, stride=1, padding=0), + nn.ReLU(), + nn.MaxPool2d(2, 2)) + + if num_classes > 0: + self.fc = nn.Sequential( + nn.Linear(400, 120), + nn.Linear(120, 84), + nn.Linear( + 84, 10)) + + def forward(self, inputs): + x = self.features(inputs) + + if self.num_classes > 0: + x = paddle.flatten(x, 1) + x = self.fc(x) + return x + + lenet = LeNet() + + params_info = paddle.summary(lenet, (1, 28, 28)) + print(params_info) + + """ + if isinstance(input_size, InputSpec): + _input_size = tuple(input_size.shape[1:]) + if batch_size is None: + batch_size = input_size.shape[0] + elif isinstance(input_size, list): + _input_size = [] + for item in input_size: + if isinstance(item, int): + item = (item, ) + assert isinstance(item, + (tuple, InputSpec)), 'When input_size is list, \ + expect item in input_size is a tuple or InputSpec, but got {}'.format( + type(item)) + + if isinstance(item, InputSpec): + _input_size.append(tuple(item.shape[1:])) + if batch_size is None: + batch_size = item.shape[0] + else: + _input_size.append(item) + elif isinstance(input_size, int): + _input_size = (input_size, ) + else: + _input_size = input_size + + if batch_size is None: + batch_size = -1 + + result, params_info = summary_string(net, _input_size, batch_size, dtypes) + print(result) + + return params_info + + +def summary_string(model, input_size, batch_size=-1, dtypes=None): + if dtypes == None: + dtypes = ['float32'] * len(input_size) + + summary_str = '' + + depth = len(list(model.sublayers())) + + def register_hook(module): + def hook(module, input, output): + class_name = str(module.__class__).split(".")[-1].split("'")[0] + + try: + module_idx = int(module._full_name.split('_')[-1]) + except: + module_idx = len(summary) + + m_key = "%s-%i" % (class_name, module_idx + 1) + summary[m_key] = OrderedDict() + summary[m_key]["input_shape"] = list(input[0].shape) + summary[m_key]["input_shape"][0] = batch_size + if isinstance(output, (list, tuple)): + summary[m_key]["output_shape"] = [[-1] + list(o.shape)[1:] + for o in output] + else: + summary[m_key]["output_shape"] = list(output.shape) + summary[m_key]["output_shape"][0] = batch_size + + params = 0 + if hasattr(module, "weight") and hasattr(module.weight, "shape"): + params += np.prod(module.weight.shape) + summary[m_key]["trainable"] = module.weight.trainable or ( + not module.weight.stop_gradient) + if hasattr(module, "bias") and hasattr(module.bias, "shape"): + params += np.prod(module.bias.shape) + summary[m_key]["nb_params"] = params + + if (not isinstance(module, nn.Sequential) and + not isinstance(module, nn.LayerList) and + (not (module == model) or depth < 1)): + + hooks.append(module.register_forward_post_hook(hook)) + + if isinstance(input_size, tuple): + input_size = [input_size] + + x = [ + paddle.rand( + [2] + list(in_size), dtype=dtype) + for in_size, dtype in zip(input_size, dtypes) + ] + + # create properties + summary = OrderedDict() + hooks = [] + + # register hook + model.apply(register_hook) + + # make a forward pass + model(*x) + + # remove these hooks + for h in hooks: + h.remove() + + table_width = 80 + summary_str += "-" * table_width + "\n" + line_new = "{:>15} {:>20} {:>20} {:>15}".format( + "Layer (type)", "Input Shape", "Output Shape", "Param #") + summary_str += line_new + "\n" + summary_str += "=" * table_width + "\n" + total_params = 0 + total_output = 0 + trainable_params = 0 + for layer in summary: + # input_shape, output_shape, trainable, nb_params + line_new = "{:>15} {:>20} {:>20} {:>15}".format( + layer, + str(summary[layer]["input_shape"]), + str(summary[layer]["output_shape"]), + "{0:,}".format(summary[layer]["nb_params"]), ) + total_params += summary[layer]["nb_params"] + + total_output += np.prod(summary[layer]["output_shape"]) + if "trainable" in summary[layer]: + if summary[layer]["trainable"] == True: + trainable_params += summary[layer]["nb_params"] + summary_str += line_new + "\n" + + # assume 4 bytes/number (float on cuda). + total_input_size = abs( + np.prod(sum(input_size, ())) * batch_size * 4. / (1024**2.)) + total_output_size = abs(2. * total_output * 4. / + (1024**2.)) # x2 for gradients + total_params_size = abs(total_params * 4. / (1024**2.)) + total_size = total_params_size + total_output_size + total_input_size + + summary_str += "=" * table_width + "\n" + summary_str += "Total params: {0:,}".format(total_params) + "\n" + summary_str += "Trainable params: {0:,}".format(trainable_params) + "\n" + summary_str += "Non-trainable params: {0:,}".format(total_params - + trainable_params) + "\n" + summary_str += "-" * table_width + "\n" + summary_str += "Input size (MB): %0.2f" % total_input_size + "\n" + summary_str += "Forward/backward pass size (MB): %0.2f" % total_output_size + "\n" + summary_str += "Params size (MB): %0.2f" % total_params_size + "\n" + summary_str += "Estimated Total Size (MB): %0.2f" % total_size + "\n" + summary_str += "-" * table_width + "\n" + # return summary + return summary_str, { + 'total_params': total_params, + 'trainable_params': trainable_params + } diff --git a/python/paddle/jit/__init__.py b/python/paddle/jit/__init__.py index 03299a3bb9823d31c40ae4faab601ed89570c71e..d04a65ad6ea99ee2e2e67e47fd9d656f1572a02d 100644 --- a/python/paddle/jit/__init__.py +++ b/python/paddle/jit/__init__.py @@ -14,7 +14,6 @@ from ..fluid.dygraph.jit import save #DEFINE_ALIAS from ..fluid.dygraph.jit import load #DEFINE_ALIAS -from ..fluid.dygraph.jit import SaveLoadConfig #DEFINE_ALIAS from ..fluid.dygraph.jit import TracedLayer #DEFINE_ALIAS from ..fluid.dygraph.jit import set_code_level #DEFINE_ALIAS from ..fluid.dygraph.jit import set_verbosity #DEFINE_ALIAS @@ -23,6 +22,6 @@ from ..fluid.dygraph import ProgramTranslator #DEFINE_ALIAS from ..fluid.dygraph.io import TranslatedLayer #DEFINE_ALIAS __all__ = [ - 'save', 'load', 'SaveLoadConfig', 'TracedLayer', 'to_static', - 'ProgramTranslator', 'TranslatedLayer', 'set_code_level', 'set_verbosity' + 'save', 'load', 'TracedLayer', 'to_static', 'ProgramTranslator', + 'TranslatedLayer', 'set_code_level', 'set_verbosity' ] diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index 3c0aa9c5c99e545b657559c30fcde46a69781231..325eaa64d5ca4bd3d65bf266ff0a42226a3199e6 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -233,3 +233,4 @@ from .vision import space_to_depth #DEFINE_ALIAS from .vision import yolo_box #DEFINE_ALIAS from .vision import yolov3_loss #DEFINE_ALIAS from .input import one_hot #DEFINE_ALIAS +from .input import embedding #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 623af3277fba0e29fb77b02c711e258602f1f75a..ad84a32186e8baeabbe8eea7d14e2b7391332944 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -910,12 +910,12 @@ def dropout(x, #get mask shape input_shape = x.shape drop_axes = [axis] if isinstance(axis, int) else axis - if max(drop_axes) > len(input_shape) - 1: - raise ValueError("axis value should less than dimensions of x:{}, but get drop_axes value:{} " \ + if min(drop_axes) < 0 or max(drop_axes) > len(input_shape) - 1: + raise ValueError("axis value should be greater than or equal to 0 and less than dimensions of x:{}, but get axis value:{} " \ .format(len(input_shape), max(drop_axes))) if len(drop_axes) > len(input_shape): raise ValueError( - "length of axis should not greater than dimensions of x:{}, but get length of drop axes: {}". + "length of axis should not be greater than dimensions of x:{}, but get length of axis: {}". format(len(input_shape), len(drop_axes))) mask_shape = [1] * len(input_shape) for i in drop_axes: @@ -1091,6 +1091,8 @@ def alpha_dropout(x, p=0.5, training=True, name=None): 'alpha_dropout') if training: + if p == 1: + return layers.scale(x, scale=0.) #get transformation params alpha = 1.6732632423543772848170429916717 scale = 1.0507009873554804934193349852946 diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 42d7d98aefcbbf51f562b98c4c494aeccfe20cf2..3c1482e69c3c36232ee5d70f2156a8d16c2d212a 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -232,7 +232,7 @@ def conv1d(x, raise ValueError("Attr(data_format) should be 'NCL' or 'NLC'. " "Received Attr(data_format): {}.".format(data_format)) - channel_last = (data_format == "NHWC") + channel_last = (data_format == "NLC") channel_dim = -1 if channel_last else 1 conv2d_data_format = "NHWC" if channel_last else "NCHW" num_channels = x.shape[channel_dim] @@ -399,7 +399,7 @@ def conv2d(x, `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, and when `data_format` is `"NCHW"`, `padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NHWC"`, `pool_padding` can be in the form + when `data_format` is `"NHWC"`, `padding` can be in the form `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. dilation (int|tuple): The dilation size. It means the spacing between the kernel @@ -733,20 +733,31 @@ def conv_transpose1d(x, stride = utils.convert_to_list(stride, 1, 'stride') + [1] dilation = utils.convert_to_list(dilation, 1, 'dilation') + [1] - output_padding = utils.convert_to_list(output_padding, 1, - 'output_padding') + [0] - if output_padding[0] > stride[0]: - raise ValueError( - "The size of output_padding should not be greater than stride." - "But got output_padding={} and stride={}".format(output_padding[0], - stride[0])) if output_size is None: output_size = [] - elif isinstance(output_size, (list, tuple, int)): - output_size = utils.convert_to_list(output_size, 1, 'output_size') + [1] else: - raise ValueError("output_size should be int, or list, tuple of ints") + if output_padding != 0: + raise ValueError('output_padding option is mutually exclusive with ' + 'output_size') + if isinstance(output_size, (list, tuple, int)): + output_size = utils.convert_to_list(output_size, 1, + 'output_size') + [1] + else: + raise ValueError( + "output_size should be int, or list, tuple of ints") + + if output_padding == 0: + output_padding = [] + else: + output_padding = utils.convert_to_list(output_padding, 1, + 'output_padding') + [0] + + if len(output_padding) > 0 and output_padding[0] > stride[0]: + raise ValueError( + "The size of output_padding should not be greater than stride." + "But got output_padding={} and stride={}".format(output_padding[0], + stride[0])) op_type = 'conv2d_transpose' num_filters = weight.shape[1] @@ -761,16 +772,17 @@ def conv_transpose1d(x, weight = nn.unsqueeze(input=weight, axes=[-1]) if in_dygraph_mode(): - attrs = ('output_size', output_size, 'strides', stride, 'paddings', - padding, 'padding_algorithm', padding_algorithm, 'dilations', - dilation, 'groups', groups, 'use_cudnn', use_cudnn, - 'data_format', conv2d_data_format) + attrs = ('output_padding', output_padding, 'output_size', output_size, + 'strides', stride, 'paddings', padding, 'padding_algorithm', + padding_algorithm, 'dilations', dilation, 'groups', groups, + 'use_cudnn', use_cudnn, 'data_format', conv2d_data_format) out = getattr(core.ops, op_type)(x, weight, *attrs) if bias is not None: out = nn.elementwise_add(out, bias, axis=channel_dim) else: inputs = {'Input': [x], 'Filter': [weight]} attrs = { + 'output_padding': output_padding, 'output_size': output_size, 'strides': stride, 'paddings': padding, @@ -791,12 +803,6 @@ def conv_transpose1d(x, if bias is not None: out = nn.elementwise_add(out, bias, axis=channel_dim) - if output_size is None: - out = pad2d( - out, - padding=[0, output_padding, 0, 0], - data_format=conv2d_data_format, - name=name) out = nn.squeeze(input=out, axes=[squeeze_axis]) return out @@ -888,9 +894,9 @@ def conv_transpose2d(x, 'SAME' which is the padding algorithm. If padding size is a tuple or list, it could be in three forms: `[pad_height, pad_width]` or `[pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, - and when `data_format` is `"NCHW"`, `pool_padding` can be in the form + and when `data_format` is `"NCHW"`, `padding` can be in the form `[[0,0], [0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NHWC"`, `pool_padding` can be in the form + when `data_format` is `"NHWC"`, `padding` can be in the form `[[0,0], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. output_padding(int|list|tuple, optional): Additional size added to one side @@ -1116,9 +1122,9 @@ def conv3d(x, 'SAME' which is the padding algorithm. If padding size is a tuple or list, it could be in three forms: `[pad_depth, pad_height, pad_width]` or `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, - and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form + and when `data_format` is `"NCDHW"`, `padding` can be in the form `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NDHWC"`, `pool_padding` can be in the form + when `data_format` is `"NDHWC"`, `padding` can be in the form `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. dilation (int|tuple): The dilation size. It means the spacing between the kernel points. @@ -1340,9 +1346,9 @@ def conv_transpose3d(x, 'SAME' which is the padding algorithm. If padding size is a tuple or list, it could be in three forms: `[pad_depth, pad_height, pad_width]` or `[pad_depth_front, pad_depth_back, pad_height_top, pad_height_bottom, pad_width_left, pad_width_right]`, - and when `data_format` is `"NCDHW"`, `pool_padding` can be in the form + and when `data_format` is `"NCDHW"`, `padding` can be in the form `[[0,0], [0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right]]`. - when `data_format` is `"NDHWC"`, `pool_padding` can be in the form + when `data_format` is `"NDHWC"`, `padding` can be in the form `[[0,0], [pad_depth_front, pad_depth_back], [pad_height_top, pad_height_bottom], [pad_width_left, pad_width_right], [0,0]]`. Default: padding = 0. output_padding(int|list|tuple, optional): Additional size added to one side diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py index e77bf0e39672984f7076938b134f3e54f4c761ab..bc48cc21c29e6683602f37fb3eab6c9485fe4977 100644 --- a/python/paddle/nn/functional/input.py +++ b/python/paddle/nn/functional/input.py @@ -19,7 +19,7 @@ from ...fluid.layer_helper import LayerHelper from ...fluid.layers import core from ...fluid.data_feeder import check_variable_and_dtype, check_dtype -__all__ = ['one_hot'] +__all__ = ['one_hot', 'embedding'] def one_hot(x, num_classes, name=None): @@ -83,6 +83,7 @@ def one_hot(x, num_classes, name=None): # [0., 1., 0., 0.], # [0., 0., 0., 1.], # [1., 0., 0., 0.]] + """ if in_dygraph_mode(): @@ -94,7 +95,7 @@ def one_hot(x, num_classes, name=None): one_hot_out = helper.create_variable_for_type_inference(dtype='float32') if not isinstance(num_classes, Variable): - # user attribute + # user attribute inputs = {'X': x} attrs = {'depth': num_classes, 'allow_out_of_range': False} else: @@ -108,3 +109,115 @@ def one_hot(x, num_classes, name=None): outputs={'Out': one_hot_out}, stop_gradient=True) return one_hot_out + + +def embedding(x, weight, padding_idx=None, sparse=False, name=None): + """ + The operator is used to lookup embeddings vector of ids provided by :attr:`input` . + + The shape of output Tensor is generated by appending the last dimension of the input Tensor shape + with embedding size. + **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < weight.shape[0]` , + otherwise the program will throw an exception and exit. + + .. code-block:: text + + Case 1: + input is a Tensor. + padding_idx = -1 + x.data = [[1, 3], [2, 4], [4, 127]] + x.shape = [3, 2] + weight.shape = [128, 16] + output is a Tensor: + out.shape = [3, 2, 16] + out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], + [0.345421456, 0.524563927, ..., 0.144534654]], + [[0.345249859, 0.124939536, ..., 0.194353745], + [0.945345345, 0.435394634, ..., 0.435345365]], + [[0.945345345, 0.435394634, ..., 0.435345365], + [0.0, 0.0, ..., 0.0 ]]] # padding data + + The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 + It will pad all-zero data when ids is 127. + + Args: + x(Tensor): A Tensor with type int32/int64, which contains the id information. The value of the input id should + satisfy :math:`0<= id < weight.shape[0]` . + weight (Tensor): The weight. A Tensor with shape of lookup table parameter. It should have two elements which + indicates the size of the dictionary of embeddings and the size of each embedding vector respectively. + sparse(bool): The flag indicating whether to use sparse update. This parameter only + affects the performance of the backwards gradient update. It is recommended to set + True because sparse update is faster. But some optimizers does not support sparse update, + such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` , + :ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` , + :ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` . + In these cases, is_sparse must be False. Default: False. + padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). + If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted + to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup + encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. + If set None, it makes no effect to output. Default: None. + name(str|None): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Returns: + Tensor: Embedding Tensor mapped by input. The data type is the same as :attr:`weight`. + + Examples: + + .. code-block:: python + + import paddle + import paddle.nn as nn + + weight = prog.global_block().create_parameter( + attr=self._param_attr, + shape=param_shape, + dtype=self._dtype, + default_initializer=Constant(1.0)) + + prog = paddle.static.Program() + + weight = prog.global_block().create_parameter( + (128, 100), dtype="float32", default_initializer=Constant(1.0)) + + label = paddle.data( + name="label", + shape=[4], + append_batch_size=False, + dtype="int64") + + emb = nn.embedding( + x=label, weight=weight, sparse=True, name="embedding") + + """ + if in_dygraph_mode(): + return core.ops.lookup_table_v2( + weight, x, 'is_sparse', sparse, 'is_distributed', False, + 'remote_prefetch', False, 'padding_idx', padding_idx) + else: + helper = LayerHelper('embedding', **locals()) + dtype = helper.input_dtype() + + check_variable_and_dtype(x, 'input', ['int32', 'int64'], 'embedding') + + is_distributed = False + remote_prefetch = sparse and (not is_distributed) + + tmp = helper.create_variable_for_type_inference(dtype) + padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else ( + weight.shape[0] + padding_idx) + + helper.append_op( + type='lookup_table_v2', + inputs={'Ids': x, + 'W': weight}, + outputs={'Out': tmp}, + attrs={ + 'is_sparse': sparse, + 'is_distributed': is_distributed, + 'remote_prefetch': remote_prefetch, + 'padding_idx': padding_idx + }) + return tmp diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index d2ddee654f4d04de152d15130ba53c424af3e5b2..3d5894064c44cb72259472fc638d46b67c5703fc 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -138,13 +138,10 @@ def binary_cross_entropy(input, label, weight=None, reduction='mean', .. code-block:: python import paddle - import numpy as np - input_data = np.array([0.5, 0.6, 0.7]).astype("float32") - label_data = np.array([1.0, 0.0, 1.0]).astype("float32") paddle.disable_static() - input = paddle.to_tensor(input_data) - label = paddle.to_tensor(label_data) + input = paddle.to_tensor([0.5, 0.6, 0.7], 'float32') + label = paddle.to_tensor([1.0, 0.0, 1.0], 'float32') output = paddle.nn.functional.binary_cross_entropy(input, label) print(output.numpy()) # [0.65537095] @@ -277,8 +274,8 @@ def binary_cross_entropy_with_logits(logit, import paddle paddle.disable_static() - logit = paddle.to_tensor([5.0, 1.0, 3.0], dtype="float32") - label = paddle.to_tensor([1.0, 0.0, 1.0], dtype="float32") + logit = paddle.to_tensor([5.0, 1.0, 3.0]) + label = paddle.to_tensor([1.0, 0.0, 1.0]) output = paddle.nn.functional.binary_cross_entropy_with_logits(logit, label) print(output.numpy()) # [0.45618808] @@ -569,13 +566,10 @@ def l1_loss(input, label, reduction='mean', name=None): Examples: .. code-block:: python import paddle - import numpy as np paddle.disable_static() - input_data = np.array([[1.5, 0.8], [0.2, 1.3]]).astype("float32") - label_data = np.array([[1.7, 1], [0.4, 0.5]]).astype("float32") - input = paddle.to_tensor(input_data) - label = paddle.to_tensor(label_data) + input = paddle.to_tensor([[1.5, 0.8], [0.2, 1.3]]) + label = paddle.to_tensor([[1.7, 1], [0.4, 0.5]]) l1_loss = paddle.nn.functional.l1_loss(input, label) print(l1_loss.numpy()) @@ -868,7 +862,7 @@ def mse_loss(input, label, reduction='mean', name=None): Examples: .. code-block:: python - import numpy as np + import paddle @@ -878,8 +872,6 @@ def mse_loss(input, label, reduction='mean', name=None): input = paddle.data(name="input", shape=[1]) label = paddle.data(name="label", shape=[1]) place = paddle.CPUPlace() - input_data = np.array([1.5]).astype("float32") - label_data = np.array([1.7]).astype("float32") output = mse_loss(input,label) exe = paddle.static.Executor(place) @@ -894,8 +886,8 @@ def mse_loss(input, label, reduction='mean', name=None): # dynamic graph mode paddle.disable_static() - input = paddle.to_variable(input_data) - label = paddle.to_variable(label_data) + input = paddle.to_tensor(1.5) + label = paddle.to_tensor(1.7) output = mse_loss(input, label) print(output.numpy()) # [0.04000002] diff --git a/python/paddle/nn/functional/norm.py b/python/paddle/nn/functional/norm.py index e9c1a21ecffb1b64cb5ae9e6b802600625cb4685..9e8f365f6d23a95275b9a696f6088bb287108ec0 100644 --- a/python/paddle/nn/functional/norm.py +++ b/python/paddle/nn/functional/norm.py @@ -165,7 +165,7 @@ def batch_norm(x, w = paddle.to_tensor(weight_data) b = paddle.to_tensor(bias_data) batch_norm_out = paddle.nn.functional.batch_norm(x, rm, rv, w, b) - print batch_norm_out + print(batch_norm_out.numpy()) """ assert len(x.shape) >= 2, "input dim must be larger than 1" @@ -176,6 +176,14 @@ def batch_norm(x, mean_out = running_mean variance_out = running_var + true_data_format = ['NC', 'NCL', 'NCHW', 'NCDHW'] + if data_format not in true_data_format: + raise ValueError( + "data_format must be one of 'NC', 'NCL', 'NCHW', 'NCDHW', but receive {}". + format(data_format)) + + data_format = 'NCHW' + if in_dygraph_mode(): # for dygraph need tuple attrs = ("momentum", momentum, "epsilon", epsilon, "data_layout", @@ -270,7 +278,7 @@ def layer_norm(x, layer_norm = paddle.nn.functional.layer_norm(x, x.shape[1:]) layer_norm_out = layer_norm(x) - print(layer_norm_out.numpy) + print(layer_norm_out.numpy()) """ input_shape = list(x.shape) input_ndim = len(input_shape) @@ -302,10 +310,10 @@ def layer_norm(x, # create output helper = LayerHelper('layer_norm', **locals()) mean_out = helper.create_variable_for_type_inference( - dtype=x.type, stop_gradient=True) + dtype=x.dtype, stop_gradient=True) variance_out = helper.create_variable_for_type_inference( - dtype=x.type, stop_gradient=True) - layer_norm_out = helper.create_variable_for_type_inference(x.type) + dtype=x.dtype, stop_gradient=True) + layer_norm_out = helper.create_variable_for_type_inference(x.dtype) helper.append_op( type="layer_norm", @@ -362,7 +370,7 @@ def instance_norm(x, x = paddle.to_tensor(x_data) instance_norm_out = paddle.nn.functional.instancenorm(x) - print(instance_norm_out.numpy) + print(instance_norm_out.numpy()) """ diff --git a/python/paddle/nn/functional/pooling.py b/python/paddle/nn/functional/pooling.py index c8790a75901fd5d9a38862158246e3756dc575c4..662205ab69550255406ff5edfda4556b73b98843 100755 --- a/python/paddle/nn/functional/pooling.py +++ b/python/paddle/nn/functional/pooling.py @@ -168,7 +168,7 @@ def avg_pool1d(x, count_include_pad=True, ceil_mode=False, name=None): - """ + """ This API implements average pooling 1d operation, See more details in :ref:`api_nn_pooling_AvgPool1d` . @@ -280,7 +280,7 @@ def avg_pool2d(x, """ This API implements average pooling 2d operation. See more details in :ref:`api_nn_pooling_AvgPool2d` . - + Args: x (Tensor): The input tensor of pooling operator which is a 4-D tensor with shape [N, C, H, W]. The format of input tensor is `"NCHW"` or @@ -640,7 +640,7 @@ def max_pool2d(x, 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). The default value is 0. ceil_mode (bool): when True, will use `ceil` instead of `floor` to compute the output shape - return_indices (bool): Whether to return the max indices along with the outputs. + return_indices (bool): Whether to return the max indices along with the outputs. Default False, only support `"NCHW"` data format data_format (string): The data format of the input and output data. An optional string from: `"NCHW"`, `"NHWC"`. The default is `"NCHW"`. When it is `"NCHW"`, the data is stored in the order of: `[batch_size, input_channels, input_height, input_width]`. @@ -690,15 +690,30 @@ def max_pool2d(x, padding, padding_algorithm = _update_padding_nd( padding, num_dims=2, channel_last=channel_last, ceil_mode=ceil_mode) + if data_format == "NHWC" and return_indices: + raise ValueError( + "When setting return_indices to true, data_format must be set to NCHW in API:max_pool2d" + ) + if in_dygraph_mode(): - output = core.ops.max_pool2d_with_index( - x, 'ksize', kernel_size, 'global_pooling', False, 'strides', stride, - 'paddings', padding, 'padding_algorithm', padding_algorithm, - 'use_cudnn', True, 'ceil_mode', ceil_mode, 'use_mkldnn', False, - 'exclusive', True, 'data_format', data_format) - return output if return_indices else output[0] + if data_format == "NCHW": + output = core.ops.max_pool2d_with_index( + x, 'ksize', kernel_size, 'global_pooling', False, 'strides', + stride, 'paddings', padding, 'padding_algorithm', + padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode, + 'use_mkldnn', False, 'exclusive', True, 'data_format', + data_format) + return output if return_indices else output[0] + elif data_format == "NHWC" and not return_indices: + output = core.ops.pool2d( + x, 'pooling_type', 'max', 'ksize', kernel_size, + 'global_pooling', False, 'padding_algorithm', padding_algorithm, + 'strides', stride, 'paddings', padding, 'use_cudnn', True, + 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'exclusive', True, + 'data_format', data_format) + return output - op_type = 'max_pool2d_with_index' + op_type = 'max_pool2d_with_index' if data_format == "NCHW" else "max_pool2d" helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) @@ -739,7 +754,7 @@ def max_pool3d(x, See more details in :ref:`api_nn_pooling_MaxPool3d` . Args: x (Tensor): The input tensor of pooling operator, which is a 5-D tensor with - shape [N, C, D, H, W]. The format of input tensor is `"NCDHW"` or `"NDHWC"`, where N represents batch size, C represents the number of channels, D, H and W represent the depth, height and width of the feature respectively. + shape [N, C, D, H, W]. The format of input tensor is `"NCDHW"` or `"NDHWC"`, where N represents batch size, C represents the number of channels, D, H and W represent the depth, height and width of the feature respectively. kernel_size (int|list|tuple): The pool kernel size. If the kernel size is a tuple or list, it must contain three integers, (kernel_size_Depth, kernel_size_Height, kernel_size_Width). @@ -755,7 +770,7 @@ def max_pool3d(x, 5. A list or tuple of pairs of integers. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension should be [0,0] or (0,0). The default value is 0. ceil_mode (bool): ${ceil_mode_comment} - return_indices (bool): Whether to return the max indices along with the outputs. + return_indices (bool): Whether to return the max indices along with the outputs. Default False. Only support "NDCHW" data_format. data_format (string): The data format of the input and output data. An optional string from: `"NCDHW"`, `"NDHWC"`. The default is `"NCDHW"`. When it is `"NCDHW"`, the data is stored in the order of: `[batch_size, input_channels, input_depth, input_height, input_width]`. @@ -801,15 +816,30 @@ def max_pool3d(x, padding, padding_algorithm = _update_padding_nd( padding, 3, channel_last=channel_last, ceil_mode=ceil_mode) + if data_format == "NDHWC" and return_indices: + raise ValueError( + "When setting return_indices to true, data_format must be set to NCDHW in API:max_pool3d" + ) + if in_dygraph_mode(): - output = core.ops.max_pool3d_with_index( - x, 'pooling_type', 'max', 'ksize', kernel_size, 'strides', stride, - 'paddings', padding, 'global_pooling', False, 'padding_algorithm', - padding_algorithm, 'use_cudnn', True, 'ceil_mode', ceil_mode, - 'use_mkldnn', False, 'exclusive', True, 'data_format', data_format) - return output if return_indices else output[0] + if data_format == "NCDHW": + output = core.ops.max_pool3d_with_index( + x, 'pooling_type', 'max', 'ksize', kernel_size, 'strides', + stride, 'paddings', padding, 'global_pooling', False, + 'padding_algorithm', padding_algorithm, 'use_cudnn', True, + 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'exclusive', True, + 'data_format', data_format) + return output if return_indices else output[0] + elif data_format == "NDHWC" and not return_indices: + output = core.ops.pool3d( + x, 'pooling_type', 'max', 'ksize', kernel_size, + 'global_pooling', False, 'padding_algorithm', padding_algorithm, + 'strides', stride, 'paddings', padding, 'use_cudnn', True, + 'ceil_mode', ceil_mode, 'use_mkldnn', False, 'exclusive', True, + 'data_format', data_format) + return output - op_type = "max_pool3d_with_index" + op_type = "max_pool3d_with_index" if data_format == "NCDHW" else "max_pool3d" helper = LayerHelper(op_type, **locals()) dtype = helper.input_dtype() pool_out = helper.create_variable_for_type_inference(dtype) @@ -841,14 +871,13 @@ def adaptive_avg_pool1d(x, output_size, name=None): """ This API implements adaptive average pooling 1d operation. See more details in :ref:`api_nn_pooling_AdaptiveAvgPool1d` . - + Args: x (Tensor): The input tensor of pooling operator, which is a 3-D tensor with shape [N, C, L]. The format of input tensor is NCL, where N is batch size, C is the number of channels, L is the length of the feature. The data type is float32 or float64. - output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain one int. + output_size (int): The target output size. It must be an integer. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. @@ -856,7 +885,7 @@ def adaptive_avg_pool1d(x, output_size, name=None): Tensor: The output tensor of adaptive average pooling result. The data type is same as input tensor. Raises: - ValueError: 'output_size' should be an integer or list or tuple with length as 1. + ValueError: 'output_size' should be an integer. Examples: .. code-block:: python # average adaptive pool1d @@ -977,6 +1006,7 @@ def adaptive_avg_pool2d(x, output_size, data_format='NCHW', name=None): if isinstance(output_size, int): output_size = utils.convert_to_list(output_size, 2, 'output_size') else: + output_size = list(output_size) if output_size[0] == None: output_size[0] = in_h if output_size[1] == None: @@ -1080,6 +1110,7 @@ def adaptive_avg_pool3d(x, output_size, data_format='NCDHW', name=None): if isinstance(output_size, int): output_size = utils.convert_to_list(output_size, 3, 'output_size') else: + output_size = list(output_size) if output_size[0] == None: output_size[0] = in_l if output_size[1] == None: @@ -1124,8 +1155,7 @@ def adaptive_max_pool1d(x, output_size, return_indices=False, name=None): with shape [N, C, L]. The format of input tensor is NCL, where N is batch size, C is the number of channels, L is the length of the feature. The data type is float32 or float64. - output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain one int. + output_size (int): The pool kernel size. The value should be an integer. return_indices (bool): If true, the index of max pooling point will be returned along with outputs. It cannot be set in average pooling type. Default False. name(str, optional): For detailed information, please refer @@ -1135,9 +1165,10 @@ def adaptive_max_pool1d(x, output_size, return_indices=False, name=None): Tensor: The output tensor of adaptive pooling result. The data type is same as input tensor. Raises: - ValueError: 'output_size' should be a integer or list or tuple with length as 1. + ValueError: 'output_size' should be an integer. Examples: .. code-block:: python + # max adaptive pool1d # suppose input data in shape of [N, C, L], `output_size` is m or [m], # output shape is [N, C, m], adaptive pool divide L dimension @@ -1163,7 +1194,7 @@ def adaptive_max_pool1d(x, output_size, return_indices=False, name=None): check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'adaptive_max_pool1d') _check_input(x, 3) - check_type(output_size, 'pool_size', (int), 'adaptive_max_pool1d') + check_type(output_size, 'pool_size', int, 'adaptive_max_pool1d') check_type(return_indices, 'return_indices', bool, 'adaptive_max_pool1d') pool_size = [1] + utils.convert_to_list(output_size, 1, 'pool_size') @@ -1202,15 +1233,19 @@ def adaptive_max_pool2d(x, output_size, return_indices=False, name=None): """ This operation applies a 2D adaptive max pooling on input tensor. See more details in :ref:`api_nn_pooling_AdaptiveMaxPool2d` . + Args: x (Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type can be float16, float32, float64, int32 or int64. output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain two elements, (H, W). H and W can be either a int, or None which means the size will be the same as that of the input. return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. + Returns: Tensor: The output tensor of adaptive max pool2d result. The data type is same as input tensor. + Examples: .. code-block:: python + # max adaptive pool2d # suppose input data in the shape of [N, C, H, W], `output_size` is [m, n] # output shape is [N, C, m, n], adaptive pool divide H and W dimensions @@ -1248,6 +1283,7 @@ def adaptive_max_pool2d(x, output_size, return_indices=False, name=None): if isinstance(output_size, int): output_size = utils.convert_to_list(output_size, 2, 'output_size') else: + output_size = list(output_size) if output_size[0] == None: output_size[0] = in_h if output_size[1] == None: @@ -1284,15 +1320,19 @@ def adaptive_max_pool3d(x, output_size, return_indices=False, name=None): """ This operation applies a 3D adaptive max pooling on input tensor. See more details in :ref:`api_nn_pooling_AdaptiveMaxPool3d` . + Args: x (Tensor): The input tensor of adaptive max pool3d operator, which is a 5-D tensor. The data type can be float32, float64. output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as that of the input. return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. + Returns: Tensor: The output tensor of adaptive max pool3d result. The data type is same as input tensor. + Examples: .. code-block:: python + # adaptive max pool3d # suppose input data in the shape of [N, C, D, H, W], `output_size` is [l, m, n] # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions @@ -1334,6 +1374,7 @@ def adaptive_max_pool3d(x, output_size, return_indices=False, name=None): if isinstance(output_size, int): output_size = utils.convert_to_list(output_size, 3, 'output_size') else: + output_size = list(output_size) if output_size[0] == None: output_size[0] = in_l if output_size[1] == None: diff --git a/python/paddle/nn/layer/common.py b/python/paddle/nn/layer/common.py index 8641e28e37b00ed301b95c66d3d2d2d1e3641051..d8e1d03b02840e76ff865986d8b90ca9d6cdd9f8 100644 --- a/python/paddle/nn/layer/common.py +++ b/python/paddle/nn/layer/common.py @@ -15,7 +15,7 @@ # TODO: define the common classes to build a neural network from ...fluid.dygraph import BilinearTensorProduct #DEFINE_ALIAS from ...fluid.dygraph import Pool2D #DEFINE_ALIAS -from ...fluid.dygraph import Embedding #DEFINE_ALIAS +from ...fluid.dygraph import Linear #DEFINE_ALIAS from ...fluid.dygraph import Flatten #DEFINE_ALIAS from ...fluid.dygraph import layers from .. import functional as F @@ -146,9 +146,9 @@ class UpSample(layers.Layer): 'nearest' : Nearest neighbor interpolation 'bicubic' : Bicubic interpolation - Linear interpolation is the method of using a line connecting two known quantities - to determine the value of an unknown quantity between the two known quantities. - + Linear interpolation is the method of using a line connecting two known quantities + to determine the value of an unknown quantity between the two known quantities. + Nearest neighbor interpolation is to perform nearest neighbor interpolation in both the 3rd dimension(in height direction) and the 4th dimension(in width direction) on input tensor. @@ -158,7 +158,7 @@ class UpSample(layers.Layer): W-direction in this op) on a rectilinear 2D grid. The key idea is to perform linear interpolation first in one direction, and then again in the other direction. - + Bicubic interpolation is an extension of cubic interpolation for interpolating data points on a two-dimensional regular grid. The interpolated surface is smoother than corresponding surfaces obtained by bilinear interpolation or @@ -205,7 +205,7 @@ class UpSample(layers.Layer): output: (N,C,H_out,W_out) where: H_out = round(H_{in} * scale_{factor}) W_out = round(W_{in} * scale_{factor}) - + Bilinear interpolation: if: align_corners = False , align_mode = 0 @@ -252,19 +252,19 @@ class UpSample(layers.Layer): https://en.wikipedia.org/wiki/Linear_interpolation. For details of linear interpolation, please refer to Wikipedia: - + For details of nearest neighbor interpolation, please refer to Wikipedia: https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation. - + For details of bilinear interpolation, please refer to Wikipedia: https://en.wikipedia.org/wiki/Bilinear_interpolation. - + For details of bicubic interpolation, please refer to Wikipedia: https://en.wikipedia.org/wiki/Bicubic_interpolation - + For details of trilinear interpolation, please refer to Wikipedia: https://en.wikipedia.org/wiki/Trilinear_interpolation. - + Parameters: x (Tensor): 3-D, 4-D or 5-D Tensor, its data type is float32, float64, or uint8, its data format is specified by :attr:`data_format`. @@ -537,8 +537,8 @@ class Pad2D(layers.Layer): If mode is 'reflect', paddings[0] and paddings[1] must be no greater than height-1. And the width dimension has the same condition. Parameters: - paddings (int | List[int32]): The padding size. If padding is a int, uses the same - padding in all boundaries, if padding is a List, it must contain four integers, + paddings (int | List[int32]): The padding size. If padding is a int, uses the same + padding in all boundaries, if padding is a List, it must contain four integers, (padding_top, padding_bottom, padding_left, padding_right). Default is [0, 0, 0, 0]. mode (str): Three modes: 'constant' (default), 'reflect', 'edge' . @@ -550,7 +550,7 @@ class Pad2D(layers.Layer): data_format (str): An string from: "NHWC", "NCHW". Specify the data format of the input data. Default is "NCHW" - Returns: + Returns: None Examples: .. code-block:: text @@ -631,11 +631,11 @@ class Bilinear(layers.Layer): in1_features (int): The dimension of each first input(`x1`). in2_features (int): The dimension of each second input(`x2`). out_features (int): The dimension of output of this layer. - weight_attr (ParamAttr, optional): The parameter attribute for the learnable w, parameters/weights of + weight_attr (ParamAttr, optional): The parameter attribute for the learnable w, parameters/weights of this layer. The default value is None. bias_attr (ParamAttr, optional): The parameter attribute for the bias of this layer. If it is set to False, no bias will be added to the output units. - If it is set to None, the bias is initialized zero. The default value is None. + If it is set to None, the bias is initialized zero. The default value is None. name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Default: None. @@ -702,7 +702,7 @@ class Dropout(layers.Layer): """ Dropout is a regularization technique for reducing overfitting by preventing neuron co-adaption during training as described in the paper: - `Improving neural networks by preventing co-adaptation of feature detectors `_ + `Improving neural networks by preventing co-adaptation of feature detectors `_ The dropout operator randomly sets the outputs of some units to zero, while upscale others according to the given dropout probability. @@ -771,8 +771,8 @@ class Dropout2d(layers.Layer): Randomly zero out entire channels (in the batched input 4d tensor with the shape `NCHW` , a channel is a 2D feature map with the shape `HW`). Each channel will be zeroed out independently on every forward call with probability `p` using samples from a Bernoulli distribution. - Dropout2d will help promote independence between feature maps as described in the paper: - `Efficient Object Localization Using Convolutional Networks `_ + Dropout2d will help promote independence between feature maps as described in the paper: + `Efficient Object Localization Using Convolutional Networks `_ See ``paddle.nn.functional.dropout2d`` for more details. @@ -829,8 +829,8 @@ class Dropout3d(layers.Layer): Randomly zero out entire channels (in the batched input 5d tensor with the shape `NCDHW` , a channel is a 3D feature map with the shape `DHW` ). Each channel will be zeroed out independently on every forward call with probability `p` using samples from a Bernoulli distribution. - Dropout3d will help promote independence between feature maps as described in the paper: - `Efficient Object Localization Using Convolutional Networks `_ + Dropout3d will help promote independence between feature maps as described in the paper: + `Efficient Object Localization Using Convolutional Networks `_ See ``paddle.nn.functional.dropout3d`` for more details. @@ -1547,3 +1547,131 @@ class CosineSimilarity(layers.Layer): def forward(self, x1, x2): return F.cosine_similarity(x1, x2, axis=self._axis, eps=self._eps) + + +class Embedding(layers.Layer): + """ + :alias_main: paddle.nn.Embedding + :alias: paddle.nn.Embedding,paddle.nn.layer.Embedding,paddle.nn.layer.common.Embedding + :old_api: paddle.fluid.dygraph.Embedding + + **Embedding Layer** + + This interface is used to construct a callable object of the ``Embedding`` class. + For specific usage, refer to code examples. It implements the function of the Embedding Layer. + This layer is used to lookup embeddings vector of ids provided by :attr:`input` . + It automatically constructs a 2D embedding matrix based on the + input :attr:`size` (vocab_size, emb_size) and :attr:`dtype` . + + The shape of output Tensor is generated by appending an emb_size dimension to the + last dimension of the input Tensor shape. + + **Note:** The id in :attr:`input` must satisfy :math:`0 =< id < size[0]` , + otherwise the program will throw an exception and exit. + + .. code-block:: text + + Case 1: + + input is a Tensor. padding_idx = -1 + input.data = [[1, 3], [2, 4], [4, 127] + input.shape = [3, 2] + Given size = [128, 16] + output is a Tensor: + out.shape = [3, 2, 16] + out.data = [[[0.129435295, 0.244512452, ..., 0.436322452], + [0.345421456, 0.524563927, ..., 0.144534654]], + + [[0.345249859, 0.124939536, ..., 0.194353745], + [0.945345345, 0.435394634, ..., 0.435345365]], + + [[0.945345345, 0.435394634, ..., 0.435345365], + [0.0, 0.0, ..., 0.0 ]]] # padding data + The input padding_idx is less than 0, it is automatically converted to padding_idx = -1 + 128 = 127 + It will pad all-zero data when ids is 127. + + Parameters: + num_embeddings (int): Just one element which indicate the size + of the dictionary of embeddings. + embedding_dim: Just one element which indicate the size of each embedding vector respectively. + padding_idx(int|long|None): padding_idx needs to be in the interval [-vocab_size, vocab_size). + If :math:`padding\_idx < 0`, the :math:`padding\_idx` will automatically be converted + to :math:`vocab\_size + padding\_idx` . It will output all-zero padding data whenever lookup + encounters :math:`padding\_idx` in id. And the padding data will not be updated while training. + If set None, it makes no effect to output. Default: None. + sparse(bool): The flag indicating whether to use sparse update. This parameter only + affects the performance of the backwards gradient update. It is recommended to set + True because sparse update is faster. But some optimizer does not support sparse update, + such as :ref:`api_optimizer_AdadeltaOptimizer` , :ref:`api_optimizer_AdamaxOptimizer` , + :ref:`api_optimizer_DecayedAdagradOptimizer` , :ref:`api_optimizer_FtrlOptimizer` , + :ref:`api_optimizer_LambOptimizer` and :ref:`api_optimizer_LarsMomentumOptimizer` . + In these case, is_sparse must be False. Default: False. + weight_attr(ParamAttr): To specify the weight parameter property. Default: None, which means the + default weight parameter property is used. See usage for details in :ref:`api_fluid_ParamAttr` . In addition, + user-defined or pre-trained word vectors can be loaded with the :attr:`param_attr` parameter. + The local word vector needs to be transformed into numpy format, and the shape of local word + vector should be consistent with :attr:`size` . Then :ref:`api_fluid_initializer_NumpyArrayInitializer` + is used to load custom or pre-trained word vectors. See code example 2 for details. + name(str|None): For detailed information, please refer + to :ref:`api_guide_Name`. Usually name is no need to set and + None by default. + + Attribute: + **weight** (Parameter): the learnable weights of this layer. + + Returns: + None + + Examples: + + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + paddle.disable_static() + + # example 1 + inp_word = np.array([[2, 3, 5], [4, 2, 1]]).astype('int64') + inp_word.shape # [2, 3] + dict_size = 20 + + emb = nn.Embedding( + dict_size, + 32, + sparse=False) + """ + + def __init__(self, + num_embeddings, + embedding_dim, + padding_idx=None, + sparse=False, + weight_attr=None, + name=None): + super(Embedding, self).__init__() + self._num_embeddings = num_embeddings + self._embedding_dim = embedding_dim + self._sparse = sparse + self._is_distributed = False + self._padding_idx = -1 if padding_idx is None else padding_idx if padding_idx >= 0 else ( + num_embeddings + padding_idx) + self._dtype = self._helper.get_default_dtype() + self._size = [self._num_embeddings, self._embedding_dim] + + self._weight_attr = weight_attr + self._remote_prefetch = False + self._name = name + self._weight = self.create_parameter( + attr=self._weight_attr, + shape=self._size, + dtype=self._dtype, + is_bias=False) + + def forward(self, x): + return F.embedding( + x, + weight=self._weight, + padding_idx=self._padding_idx, + sparse=self._sparse, + name=self._name) diff --git a/python/paddle/nn/layer/conv.py b/python/paddle/nn/layer/conv.py index 4e342c00528a2c0115940bb7f695e1ed5b582382..a610693a0a46b7e21d2c6d83716a7bc029677583 100644 --- a/python/paddle/nn/layer/conv.py +++ b/python/paddle/nn/layer/conv.py @@ -113,7 +113,7 @@ class _ConvNd(layers.Layer): attr=self._bias_attr, shape=[self._out_channels], is_bias=True) -class Conv1d(layers.Layer): +class Conv1d(_ConvNd): """ This interface is used to construct a callable object of the ``Conv1d`` class. For more details, refer to code examples. @@ -172,8 +172,7 @@ class Conv1d(layers.Layer): When in 'replicate' mode, uses input boundaries to pad the input tensor. When in 'circular' mode, uses circular input to pad the input tensor. Default is 'zeros'. - bias(bool, optional): Whether to use bias. Default: True. - param_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) + weight_attr (ParamAttr, optional): The parameter attribute for learnable weights(Parameter) of conv1d. If it is set to None or one attribute of ParamAttr, conv1d will create ParamAttr as param_attr. If the Initializer of the param_attr is not set, the parameter is initialized with :math:`Normal(0.0, std)`, @@ -227,205 +226,15 @@ class Conv1d(layers.Layer): dilation=1, groups=1, padding_mode='zeros', - bias=True, weight_attr=None, bias_attr=None, - data_format="NCL", - name=None): - super(Conv1d, self).__init__() - assert weight_attr is not False, "param_attr should not be False here." - self._in_channels = in_channels - self._out_channels = out_channels - self._groups = groups - if in_channels % groups != 0: - raise ValueError("in_channels must be divisible by groups.") - self._kernel_size = utils.convert_to_list(kernel_size, 1, 'kernel_size') - self._stride = utils.convert_to_list(stride, 1, 'stride') - self._dilation = utils.convert_to_list(dilation, 1, 'dilation') - self._padding = padding # leave it to F.conv1d - self._weight_attr = weight_attr - self._bias_attr = bias_attr - self._data_format = data_format - self._name = name - - self._padding_mode = padding_mode - - valid_padding_modes = {'zeros', 'reflect', 'replicate', 'circular'} - if padding_mode not in valid_padding_modes: - raise ValueError( - "padding_mode must be one of {}, but got padding_mode='{}'". - format(valid_padding_modes, padding_mode)) - - if padding_mode in {'reflect', 'replicate', 'circular' - } and not isinstance(padding, np.int): - raise ValueError( - "when padding_mode in ['reflect', 'replicate', 'circular'], type of padding must be int" - ) - if not isinstance(padding, str): - self._padding = utils.convert_to_list(padding, 1, 'padding') * 2 - - num_filter_channels = in_channels // groups - filter_shape = [self._out_channels, num_filter_channels - ] + self._kernel_size - - self.weight = self.create_parameter( - attr=self._weight_attr, - shape=filter_shape, - default_initializer=_get_default_param_initializer( - self._in_channels, filter_shape)) - self.bias = self.create_parameter( - attr=self._bias_attr, shape=[self._out_channels], - is_bias=True) if bias else None - - def forward(self, x): - padding = 0 - if self._padding_mode != "zeros": - x = F.pad(x, - self._padding, - mode=self._padding_mode, - data_format=self._data_format) - else: - padding = self._padding - - out = F.conv1d( - x, - self.weight, - bias=self.bias, - padding=padding, - stride=self._stride, - dilation=self._dilation, - groups=self._groups, - data_format=self._data_format, - name=self._name) - return out - - -class Conv2d(_ConvNd): - """ - This interface is used to construct a callable object of the ``Conv2d`` class. - For more details, refer to code examples. - The convolution2D layer calculates the output based on the input, filter - and strides, paddings, dilations, groups parameters. Input and - Output are in NCHW format, where N is batch size, C is the number of - the feature map, H is the height of the feature map, and W is the width of the feature map. - Filter's shape is [MCHW] , where M is the number of output feature map, - C is the number of input feature map, H is the height of the filter, - and W is the width of the filter. If the groups is greater than 1, - C will equal the number of input feature map divided by the groups. - Please refer to UFLDL's `convolution - `_ - for more details. - If bias attribution and activation type are provided, bias is added to the - output of the convolution, and the corresponding activation function is - applied to the final result. - For each input :math:`X`, the equation is: - - .. math:: - - Out = \sigma (W \\ast X + b) - - Where: - - * :math:`X`: Input value, a ``Tensor`` with NCHW format. - * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] . - * :math:`\\ast`: Convolution operation. - * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1]. - * :math:`\\sigma`: Activation function. - * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. - - Parameters: - in_channels(int): The number of input channels in the input image. - out_channels(int): The number of output channels produced by the convolution. - kernel_size(int|list|tuple, optional): The size of the convolving kernel. - stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must - contain three integers, (stride_H, stride_W). Otherwise, the - stride_H = stride_W = stride. The default value is 1. - padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. - 1. a string in ['valid', 'same']. - 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` - 3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...]. - 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. - 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). - The default value is 0. - dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must - contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the - dilation_D = dilation_H = dilation_W = dilation. The default value is 1. - groups(int, optional): The groups number of the Conv3d Layer. According to grouped - convolution in Alex Krizhevsky's Deep CNN paper: when group=2, - the first half of the filters is only connected to the first half - of the input channels, while the second half of the filters is only - connected to the second half of the input channels. The default value is 1. - padding_mode(str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``. - weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights - of conv2d. If it is set to None or one attribute of ParamAttr, conv2d - will create ParamAttr as param_attr. If it is set to None, the parameter - is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is - :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None. - bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv2d. - If it is set to False, no bias will be added to the output units. - If it is set to None or one attribute of ParamAttr, conv2d - will create ParamAttr as bias_attr. If the Initializer of the bias_attr - is not set, the bias is initialized zero. The default value is None. - data_format(str, optional): Data format that specifies the layout of input. - It can be "NCHW" or "NHWC". Default: "NCHW". - - Attribute: - - **weight** (Parameter): the learnable weights of filter of this layer. - - **bias** (Parameter or None): the learnable bias of this layer. - - Shape: - - - x: :math:`(N, C_{in}, H_{in}, W_{in})` - - - output: :math:`(N, C_{out}, H_{out}, W_{out})` - - Where - - .. math:: - - H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1 - - W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1 - - Examples: - - .. code-block:: python - - import numpy as np - import paddle - import paddle.nn as nn - x = np.random.uniform(-1, 1, (2, 4, 8, 8)).astype('float32') - - paddle.disable_static() - x_var = paddle.to_tensor(x) - conv = nn.Conv2d(4, 6, (3, 3)) - y_var = conv(x_var) - y_np = y_var.numpy() - print(y_np.shape) - - # (2, 6, 6, 6) - """ - - def __init__(self, - in_channels, - out_channels, - kernel_size, - stride=1, - padding=0, - dilation=1, - groups=1, - padding_mode='zeros', - weight_attr=None, - bias_attr=None, - data_format="NCHW"): - super(Conv2d, self).__init__( + data_format="NCL"): + super(Conv1d, self).__init__( in_channels, out_channels, kernel_size, False, - 2, + 1, stride=stride, padding=padding, padding_mode=padding_mode, @@ -436,25 +245,20 @@ class Conv2d(_ConvNd): data_format=data_format) def forward(self, x): - if self._padding_mode != 'zeros': + padding = 0 + if self._padding_mode != "zeros": x = F.pad(x, - self._reversed_padding_repeated_twice, + self._padding, mode=self._padding_mode, data_format=self._data_format) - return F.conv2d( - x, - self.weight, - bias=self.bias, - stride=self._stride, - dilation=self._dilation, - groups=self._groups, - data_format=self._data_format) + else: + padding = self._padding - out = F.conv2d( + out = F.conv1d( x, self.weight, bias=self.bias, - padding=self._padding, + padding=padding, stride=self._stride, dilation=self._dilation, groups=self._groups, @@ -462,7 +266,7 @@ class Conv2d(_ConvNd): return out -class ConvTranspose1d(layers.Layer): +class ConvTranspose1d(_ConvNd): """ This interface is used to construct a callable object of the ``ConvTranspose1d`` class. For more details, refer to code examples. @@ -603,34 +407,24 @@ class ConvTranspose1d(layers.Layer): padding=0, output_padding=0, groups=1, - bias=True, dilation=1, weight_attr=None, bias_attr=None, data_format="NCL"): - super(ConvTranspose1d, self).__init__() - assert weight_attr is not False, "param_attr should not be False in ConvTranspose1d." - self._param_attr = weight_attr - self._bias_attr = bias_attr - self._groups = groups - self._in_channels = in_channels - self._out_channels = out_channels - self._output_padding = output_padding - self._data_format = data_format - self._bias = bias - - self._stride = utils.convert_to_list(stride, 1, 'stride') - self._dilation = utils.convert_to_list(dilation, 1, 'dilation') - self._kernel_size = utils.convert_to_list(kernel_size, 1, 'kernel_size') - self._padding = padding - - filter_shape = [self._in_channels, out_channels // groups - ] + self._kernel_size - self.weight = self.create_parameter( - shape=filter_shape, attr=self._param_attr) - self.bias = self.create_parameter( - attr=self._bias_attr, shape=[self._out_channels], - is_bias=True) if self._bias else None + super(ConvTranspose1d, self).__init__( + in_channels, + out_channels, + kernel_size, + True, + 1, + stride=stride, + padding=padding, + dilation=dilation, + output_padding=output_padding, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) def forward(self, x, output_size=None): out = F.conv_transpose1d( @@ -638,7 +432,169 @@ class ConvTranspose1d(layers.Layer): self.weight, bias=self.bias, output_size=output_size, - output_padding=self._output_padding, + output_padding=self.output_padding, + padding=self._padding, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format) + return out + + +class Conv2d(_ConvNd): + """ + This interface is used to construct a callable object of the ``Conv2d`` class. + For more details, refer to code examples. + The convolution2D layer calculates the output based on the input, filter + and strides, paddings, dilations, groups parameters. Input and + Output are in NCHW format, where N is batch size, C is the number of + the feature map, H is the height of the feature map, and W is the width of the feature map. + Filter's shape is [MCHW] , where M is the number of output feature map, + C is the number of input feature map, H is the height of the filter, + and W is the width of the filter. If the groups is greater than 1, + C will equal the number of input feature map divided by the groups. + Please refer to UFLDL's `convolution + `_ + for more details. + If bias attribution and activation type are provided, bias is added to the + output of the convolution, and the corresponding activation function is + applied to the final result. + For each input :math:`X`, the equation is: + + .. math:: + + Out = \sigma (W \\ast X + b) + + Where: + + * :math:`X`: Input value, a ``Tensor`` with NCHW format. + * :math:`W`: Filter value, a ``Tensor`` with shape [MCHW] . + * :math:`\\ast`: Convolution operation. + * :math:`b`: Bias value, a 2-D ``Tensor`` with shape [M, 1]. + * :math:`\\sigma`: Activation function. + * :math:`Out`: Output value, the shape of :math:`Out` and :math:`X` may be different. + + Parameters: + in_channels(int): The number of input channels in the input image. + out_channels(int): The number of output channels produced by the convolution. + kernel_size(int|list|tuple, optional): The size of the convolving kernel. + stride(int|list|tuple, optional): The stride size. If stride is a tuple, it must + contain three integers, (stride_H, stride_W). Otherwise, the + stride_H = stride_W = stride. The default value is 1. + padding(int|str|tuple|list, optional): The padding size. Padding coule be in one of the following forms. + 1. a string in ['valid', 'same']. + 2. an int, which means each spartial dimension(depth, height, width) is zero paded by size of `padding` + 3. a list[int] or tuple[int] whose length is the number of spartial dimensions, which contains the amount of padding on each side for each spartial dimension. It has the form [pad_d1, pad_d2, ...]. + 4. a list[int] or tuple[int] whose length is 2 * number of spartial dimensions. It has the form [pad_before, pad_after, pad_before, pad_after, ...] for all spartial dimensions. + 5. a list or tuple of pairs of ints. It has the form [[pad_before, pad_after], [pad_before, pad_after], ...]. Note that, the batch dimension and channel dimension are also included. Each pair of integers correspond to the amount of padding for a dimension of the input. Padding in batch dimension and channel dimension should be [0, 0] or (0, 0). + The default value is 0. + dilation(int|list|tuple, optional): The dilation size. If dilation is a tuple, it must + contain three integers, (dilation_D, dilation_H, dilation_W). Otherwise, the + dilation_D = dilation_H = dilation_W = dilation. The default value is 1. + groups(int, optional): The groups number of the Conv3d Layer. According to grouped + convolution in Alex Krizhevsky's Deep CNN paper: when group=2, + the first half of the filters is only connected to the first half + of the input channels, while the second half of the filters is only + connected to the second half of the input channels. The default value is 1. + padding_mode(str, optional): ``'zeros'``, ``'reflect'``, ``'replicate'`` or ``'circular'``. Default: ``'zeros'``. + weight_attr(ParamAttr, optional): The parameter attribute for learnable parameters/weights + of conv2d. If it is set to None or one attribute of ParamAttr, conv2d + will create ParamAttr as param_attr. If it is set to None, the parameter + is initialized with :math:`Normal(0.0, std)`, and the :math:`std` is + :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. The default value is None. + bias_attr(ParamAttr|bool, optional): The parameter attribute for the bias of conv2d. + If it is set to False, no bias will be added to the output units. + If it is set to None or one attribute of ParamAttr, conv2d + will create ParamAttr as bias_attr. If the Initializer of the bias_attr + is not set, the bias is initialized zero. The default value is None. + data_format(str, optional): Data format that specifies the layout of input. + It can be "NCHW" or "NHWC". Default: "NCHW". + + Attribute: + + **weight** (Parameter): the learnable weights of filter of this layer. + + **bias** (Parameter or None): the learnable bias of this layer. + + Shape: + + - x: :math:`(N, C_{in}, H_{in}, W_{in})` + + - output: :math:`(N, C_{out}, H_{out}, W_{out})` + + Where + + .. math:: + + H_{out}&= \\frac{(H_{in} + 2 * paddings[0] - (dilations[0] * (kernel\_size[0] - 1) + 1))}{strides[0]} + 1 + + W_{out}&= \\frac{(W_{in} + 2 * paddings[1] - (dilations[1] * (kernel\_size[1] - 1) + 1))}{strides[1]} + 1 + + Examples: + + .. code-block:: python + + import numpy as np + import paddle + import paddle.nn as nn + x = np.random.uniform(-1, 1, (2, 4, 8, 8)).astype('float32') + + paddle.disable_static() + x_var = paddle.to_tensor(x) + conv = nn.Conv2d(4, 6, (3, 3)) + y_var = conv(x_var) + y_np = y_var.numpy() + print(y_np.shape) + + # (2, 6, 6, 6) + """ + + def __init__(self, + in_channels, + out_channels, + kernel_size, + stride=1, + padding=0, + dilation=1, + groups=1, + padding_mode='zeros', + weight_attr=None, + bias_attr=None, + data_format="NCHW"): + super(Conv2d, self).__init__( + in_channels, + out_channels, + kernel_size, + False, + 2, + stride=stride, + padding=padding, + padding_mode=padding_mode, + dilation=dilation, + groups=groups, + weight_attr=weight_attr, + bias_attr=bias_attr, + data_format=data_format) + + def forward(self, x): + if self._padding_mode != 'zeros': + x = F.pad(x, + self._reversed_padding_repeated_twice, + mode=self._padding_mode, + data_format=self._data_format) + return F.conv2d( + x, + self.weight, + bias=self.bias, + stride=self._stride, + dilation=self._dilation, + groups=self._groups, + data_format=self._data_format) + + out = F.conv2d( + x, + self.weight, + bias=self.bias, padding=self._padding, stride=self._stride, dilation=self._dilation, @@ -920,8 +876,8 @@ class Conv3d(_ConvNd): in_channels, out_channels, kernel_size, - padding=0, stride=1, + padding=0, dilation=1, groups=1, padding_mode='zeros', @@ -1128,7 +1084,7 @@ class ConvTranspose3d(_ConvNd): bias_attr=bias_attr, data_format=data_format) - def forward(self, x, output_size): + def forward(self, x, output_size=None): if output_size is None: output_padding = self.output_padding else: diff --git a/python/paddle/nn/layer/norm.py b/python/paddle/nn/layer/norm.py index 4d25418579d74ae896f8ca590400a0a334047e93..d13bf66ba5bfe483284e78dbcd2a42f8f3397210 100644 --- a/python/paddle/nn/layer/norm.py +++ b/python/paddle/nn/layer/norm.py @@ -78,7 +78,7 @@ class _InstanceNormBase(layers.Layer): super(_InstanceNormBase, self).__init__() if weight_attr == False or bias_attr == False: - assert weight_attr == param_attr, "weight_attr and bias_attr must be set to Fasle at the same time in InstanceNorm" + assert weight_attr == bias_attr, "weight_attr and bias_attr must be set to Fasle at the same time in InstanceNorm" self._epsilon = epsilon self._weight_attr = weight_attr self._bias_attr = bias_attr @@ -176,7 +176,7 @@ class InstanceNorm1d(_InstanceNormBase): instance_norm = paddle.nn.InstanceNorm1d(2) instance_norm_out = instance_norm(x) - print(instance_norm_out.numpy) + print(instance_norm_out.numpy()) """ @@ -253,7 +253,7 @@ class InstanceNorm2d(_InstanceNormBase): instance_norm = paddle.nn.InstanceNorm2d(2) instance_norm_out = instance_norm(x) - print(instance_norm_out.numpy) + print(instance_norm_out.numpy()) """ def _check_input_dim(self, input): @@ -329,7 +329,7 @@ class InstanceNorm3d(_InstanceNormBase): instance_norm = paddle.nn.InstanceNorm3d(2) instance_norm_out = instance_norm(x) - print(instance_norm_out.numpy) + print(instance_norm_out.numpy()) """ def _check_input_dim(self, input): @@ -346,8 +346,8 @@ class GroupNorm(layers.Layer): Refer to `Group Normalization `_ . Parameters: - num_channels(int): The number of channels of input. num_groups(int): The number of groups that divided from channels. + num_channels(int): The number of channels of input. epsilon(float, optional): The small value added to the variance to prevent division by zero. Default: 1e-05. weight_attr(ParamAttr|bool, optional): The parameter attribute for the learnable @@ -375,19 +375,19 @@ class GroupNorm(layers.Layer): np.random.seed(123) x_data = np.random.random(size=(2, 6, 2, 2)).astype('float32') x = paddle.to_tensor(x_data) - group_norm = paddle.nn.GroupNorm(num_channels=3, num_groups=6) + group_norm = paddle.nn.GroupNorm(num_channels=6, num_groups=6) group_norm_out = group_norm(x) - print(group_norm_out.numpy) + print(group_norm_out.numpy()) """ def __init__(self, - num_channels, num_groups, + num_channels, epsilon=1e-05, weight_attr=None, bias_attr=None, - data_layout='NCHW', + data_format='NCHW', name=None): super(GroupNorm, self).__init__() self._weight_attr = weight_attr @@ -395,18 +395,33 @@ class GroupNorm(layers.Layer): self._epsilon = epsilon self._num_channels = num_channels self._num_groups = num_groups - if data_layout != 'NCHW': + if data_format != 'NCHW': raise ValueError("unsupported data layout:" + data_layout) param_shape = [self._num_channels] - self.weight = self.create_parameter( - attr=self._weight_attr or False, - shape=param_shape, - default_initializer=Constant(1.0)) + if weight_attr == False: + self.weight = self.create_parameter( + attr=None, shape=param_shape, default_initializer=Constant(1.0)) + self.weight.stop_gradient = True + else: + self.weight = self.create_parameter( + attr=self._weight_attr, + shape=param_shape, + default_initializer=Constant(1.0)) + self.weight.stop_gradient = self._weight_attr != None and self._weight_attr.learning_rate == 0. - self.bias = self.create_parameter( - attr=self._weight_attr or False, shape=param_shape, is_bias=True) + if bias_attr == False: + self.bias = self.create_parameter( + attr=None, + shape=param_shape, + default_initializer=Constant(0.0), + is_bias=True) + self.bias.stop_gradient = True + else: + self.bias = self.create_parameter( + attr=self._bias_attr, shape=param_shape, is_bias=True) + self.bias.stop_gradient = self._bias_attr != None and self._bias_attr.learning_rate == 0. def forward(self, input): inputs = {'X': input} @@ -500,7 +515,7 @@ class LayerNorm(layers.Layer): layer_norm = paddle.nn.LayerNorm(x_data.shape[1:]) layer_norm_out = layer_norm(x) - print(layer_norm_out.numpy) + print(layer_norm_out.numpy()) """ def __init__(self, @@ -603,8 +618,7 @@ class _BatchNormBase(layers.Layer): initializer=Constant(0.0), trainable=False, do_model_average=True), - shape=param_shape, - dtype=self._dtype) + shape=param_shape) self._mean.stop_gradient = True self._variance = self.create_parameter( @@ -613,8 +627,7 @@ class _BatchNormBase(layers.Layer): initializer=Constant(1.0), trainable=False, do_model_average=True), - shape=param_shape, - dtype=self._dtype) + shape=param_shape) self._variance.stop_gradient = True self._data_format = data_format @@ -628,8 +641,13 @@ class _BatchNormBase(layers.Layer): def _check_input_dim(self, input): raise NotImplementedError("BatchNorm Base error") + def _check_data_format(self, input): + raise NotImplementedError("BatchNorm Base data format error") + def forward(self, input): + self._check_data_format(self._data_format) + self._check_input_dim(input) if not self.training and not self._track_running_stats: @@ -730,9 +748,15 @@ class BatchNorm1d(_BatchNormBase): batch_norm = paddle.nn.BatchNorm1d(1) batch_norm_out = batch_norm(x) - print(batch_norm_out.numpy) + print(batch_norm_out.numpy()) """ + def _check_data_format(self, input): + if input == 'NCHW' or input == 'NC' or input == 'NCL': + self._data_format = 'NCHW' + else: + raise ValueError('expected NC , NCL or None for data_format input') + def _check_input_dim(self, input): if len(input.shape) != 2 and len(input.shape) != 3: raise ValueError('expected 2D or 3D input (got {}D input)'.format( @@ -787,7 +811,7 @@ class BatchNorm2d(_BatchNormBase): If it is set to None or one attribute of ParamAttr, batch_norm will create ParamAttr as bias_attr. If it is set to Fasle, the weight is not learnable. If the Initializer of the bias_attr is not set, the bias is initialized zero. Default: None. - data_format(str, optional): Specify the input data format, the data format can be "NCHW" or "NHWC". Default: NCHW. + data_format(str, optional): Specify the input data format, the data format can be "NCHW". Default: NCHW. track_running_stats(bool, optional): Whether to use global mean and variance. In train period, True will track global mean and variance used for inference. When inference, track_running_stats must be True. Default: True. @@ -816,9 +840,15 @@ class BatchNorm2d(_BatchNormBase): batch_norm = paddle.nn.BatchNorm2d(1) batch_norm_out = batch_norm(x) - print(batch_norm_out.numpy) + print(batch_norm_out.numpy()) """ + def _check_data_format(self, input): + if input == 'NCHW': + self._data_format = input + else: + raise ValueError('expected NCHW for data_format input') + def _check_input_dim(self, input): if len(input.shape) != 4: raise ValueError('expected 4D input (got {}D input)'.format( @@ -902,9 +932,15 @@ class BatchNorm3d(_BatchNormBase): batch_norm = paddle.nn.BatchNorm3d(1) batch_norm_out = batch_norm(x) - print(batch_norm_out.numpy) + print(batch_norm_out.numpy()) """ + def _check_data_format(self, input): + if input == 'NCHW' or input == 'NCDHW': + self._data_format = 'NCHW' + else: + raise ValueError('expected NCDHW or None for data_format input') + def _check_input_dim(self, input): if len(input.shape) != 5: raise ValueError('expected 5D input (got {}D input)'.format( diff --git a/python/paddle/nn/layer/pooling.py b/python/paddle/nn/layer/pooling.py index 6f6b567849732ff889db4507708758cd8eeab2a8..129dae93b38327308263550e73031b607b2eacc3 100755 --- a/python/paddle/nn/layer/pooling.py +++ b/python/paddle/nn/layer/pooling.py @@ -87,6 +87,7 @@ class AvgPool1d(layers.Layer): Examples: .. code-block:: python + import paddle import paddle.nn as nn paddle.disable_static() @@ -176,6 +177,7 @@ class AvgPool2d(layers.Layer): ShapeError: If the output's shape calculated is not greater than 0. Examples: .. code-block:: python + import paddle import paddle.nn as nn import numpy as np @@ -267,6 +269,7 @@ class AvgPool3d(layers.Layer): Examples: .. code-block:: python + import paddle import paddle.nn as nn import numpy as np @@ -457,6 +460,7 @@ class MaxPool2d(layers.Layer): Examples: .. code-block:: python + import paddle import paddle.nn as nn import numpy as np @@ -547,6 +551,7 @@ class MaxPool3d(layers.Layer): Examples: .. code-block:: python + import paddle import paddle.nn as nn import numpy as np @@ -613,8 +618,7 @@ class AdaptiveAvgPool1d(layers.Layer): Output(i) &= \\frac{sum(Input[lstart:lend])}{(lstart - lend)} Args: - output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain one int. + output_size (int): The target output size. It must be an integer. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. @@ -623,7 +627,7 @@ class AdaptiveAvgPool1d(layers.Layer): None. Raises: - ValueError: 'pool_size' should be a integer or list or tuple with length as 1. + ValueError: 'output_size' should be an integer. Shape: - x: 3-D tensor. @@ -850,7 +854,7 @@ class AdaptiveMaxPool1d(layers.Layer): lend &= ceil((i + 1) * L_{in} / L_{out}) - Output(i) &= max(Input[lstart:lend])} + Output(i) &= max(Input[lstart:lend]) Args: output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, @@ -916,8 +920,11 @@ class AdaptiveMaxPool2d(layers.Layer): """ This operation applies 2D adaptive max pooling on input tensor. The h and w dimensions of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size. + For adaptive max pool2d: + .. math:: + hstart &= floor(i * H_{in} / H_{out}) hend &= ceil((i + 1) * H_{in} / H_{out}) wstart &= floor(j * W_{in} / W_{out}) @@ -932,11 +939,12 @@ class AdaptiveMaxPool2d(layers.Layer): Shape: x (Tensor): The input tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type can be float32, float64. output (Tensor): The output tensor of adaptive max pool2d operator, which is a 4-D tensor. The data type is same as input x. - + Returns: A callable object of AdaptiveMaxPool2d. Examples: .. code-block:: python + # adaptive max pool2d # suppose input data in shape of [N, C, H, W], `output_size` is [m, n], # output shape is [N, C, m, n], adaptive pool divide H and W dimensions @@ -977,10 +985,13 @@ class AdaptiveMaxPool2d(layers.Layer): class AdaptiveMaxPool3d(layers.Layer): """ - This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions + This operation applies 3D adaptive max pooling on input tensor. The h and w dimensions of the output tensor are determined by the parameter output_size. The difference between adaptive pooling and pooling is adaptive one focus on the output size. + For adaptive max pool3d: + .. math:: + dstart &= floor(i * D_{in} / D_{out}) dend &= ceil((i + 1) * D_{in} / D_{out}) hstart &= floor(j * H_{in} / H_{out}) @@ -988,10 +999,9 @@ class AdaptiveMaxPool3d(layers.Layer): wstart &= floor(k * W_{in} / W_{out}) wend &= ceil((k + 1) * W_{in} / W_{out}) Output(i ,j, k) &= max(Input[dstart:dend, hstart:hend, wstart:wend]) + Parameters: - output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, - it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means - the size will be the same as that of the input. + output_size (int|list|tuple): The pool kernel size. If pool kernel size is a tuple or list, it must contain three elements, (D, H, W). D, H and W can be either a int, or None which means the size will be the same as that of the input. return_indices (bool): If true, the index of max pooling point will be returned along with outputs. Default False. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and @@ -1003,6 +1013,7 @@ class AdaptiveMaxPool3d(layers.Layer): A callable object of AdaptiveMaxPool3d. Examples: .. code-block:: python + # adaptive max pool3d # suppose input data in shape of [N, C, D, H, W], `output_size` is [l, m, n], # output shape is [N, C, l, m, n], adaptive pool divide D, H and W dimensions @@ -1029,10 +1040,10 @@ class AdaptiveMaxPool3d(layers.Layer): pool = paddle.nn.AdaptiveMaxPool3d(output_size=4) out = pool(x) # out shape: [2, 3, 4, 4, 4] - pool, indices = paddle.nn.AdaptiveMaxPool3d(output_size=3, return_indices=True) - out = pool(x) + pool = paddle.nn.AdaptiveMaxPool3d(output_size=3, return_indices=True) + out, indices = pool(x) # out shape: [2, 3, 4, 4, 4], indices shape: [2, 3, 4, 4, 4] - + """ def __init__(self, output_size, return_indices=False, name=None): diff --git a/python/paddle/nn/layer/transformer.py b/python/paddle/nn/layer/transformer.py index 50a8755ac9f7b0a8e35c60f02a9fb825195ab80f..63069e83952172df3136458ebfee4b446749934d 100644 --- a/python/paddle/nn/layer/transformer.py +++ b/python/paddle/nn/layer/transformer.py @@ -25,12 +25,13 @@ __all__ = [ import copy import collections +from .common import Linear, Dropout +from .norm import LayerNorm +from .. import functional as F +from ... import tensor from ...fluid import layers +from ...fluid.dygraph import Layer, LayerList from ...fluid.param_attr import ParamAttr -from ...fluid.dygraph import Layer, Linear, Dropout, LayerNorm, LayerList -from .. import functional as F -from ...fluid.layers import utils -from ...fluid.layers.utils import map_structure def _convert_param_attr_to_list(param_attr, n): @@ -103,7 +104,7 @@ class MultiHeadAttention(Layer): # self attention mask: [batch_size, num_heads, query_len, query_len] attn_mask = paddle.rand((2, 2, 4, 4)) multi_head_attn = paddle.MultiHeadAttention(128, 2) - output = multi_head_attn(query, attn_mask=attn_mask) # [2, 4, 128] + output = multi_head_attn(query, None, None, attn_mask=attn_mask) # [2, 4, 128] """ Cache = collections.namedtuple("Cache", ["k", "v"]) @@ -176,8 +177,8 @@ class MultiHeadAttention(Layer): and their data types are same as inputs. """ q = self.q_proj(query) - q = layers.reshape(x=q, shape=[0, 0, self.num_heads, self.head_dim]) - q = layers.transpose(x=q, perm=[0, 2, 1, 3]) + q = tensor.reshape(x=q, shape=[0, 0, self.num_heads, self.head_dim]) + q = tensor.transpose(x=q, perm=[0, 2, 1, 3]) if isinstance(cache, self.StaticCache): # for encoder-decoder attention in inference and has cached @@ -187,8 +188,8 @@ class MultiHeadAttention(Layer): if isinstance(cache, self.Cache): # for decoder self-attention in inference - k = layers.concat([cache.k, k], axis=2) - v = layers.concat([cache.v, v], axis=2) + k = tensor.concat([cache.k, k], axis=2) + v = tensor.concat([cache.v, v], axis=2) cache = self.Cache(k, v) return (q, k, v) if cache is None else (q, k, v, cache) @@ -219,10 +220,10 @@ class MultiHeadAttention(Layer): """ k = self.k_proj(key) v = self.v_proj(value) - k = layers.reshape(x=k, shape=[0, 0, self.num_heads, self.head_dim]) - k = layers.transpose(x=k, perm=[0, 2, 1, 3]) - v = layers.reshape(x=v, shape=[0, 0, self.num_heads, self.head_dim]) - v = layers.transpose(x=v, perm=[0, 2, 1, 3]) + k = tensor.reshape(x=k, shape=[0, 0, self.num_heads, self.head_dim]) + k = tensor.transpose(x=k, perm=[0, 2, 1, 3]) + v = tensor.reshape(x=v, shape=[0, 0, self.num_heads, self.head_dim]) + v = tensor.transpose(x=v, perm=[0, 2, 1, 3]) return k, v def gen_cache(self, key, value=None, type=Cache): @@ -352,24 +353,25 @@ class MultiHeadAttention(Layer): q, k, v, cache = self._prepare_qkv(query, key, value, cache) # scale dot product attention + # TODO(guosheng): use tensor.matmul, however it doesn't support `alpha` product = layers.matmul( x=q, y=k, transpose_y=True, alpha=self.head_dim**-0.5) if attn_mask is not None: # TODO(guosheng): support bool mask product = product + attn_mask - weights = layers.softmax(product) + weights = F.softmax(product) if self.dropout: - weights = layers.dropout( + weights = F.dropout( weights, - dropout_prob=self.dropout, - dropout_implementation="upscale_in_train", - is_test=False) + self.dropout, + training=self.training, + mode="upscale_in_train") - out = layers.matmul(weights, v) + out = tensor.matmul(weights, v) # combine heads - out = layers.transpose(out, perm=[0, 2, 1, 3]) - out = layers.reshape(x=out, shape=[0, 0, out.shape[2] * out.shape[3]]) + out = tensor.transpose(out, perm=[0, 2, 1, 3]) + out = tensor.reshape(x=out, shape=[0, 0, out.shape[2] * out.shape[3]]) # project to output out = self.out_proj(out) @@ -429,7 +431,7 @@ class TransformerEncoderLayer(Layer): .. code-block:: python import paddle - from paddle import TransformerEncoderLayer + from paddle.nn import TransformerEncoderLayer # encoder input: [batch_size, src_len, d_model] enc_input = paddle.rand((2, 4, 128)) @@ -470,17 +472,14 @@ class TransformerEncoderLayer(Layer): bias_attr=bias_attrs[0]) self.linear1 = Linear( d_model, dim_feedforward, weight_attrs[1], bias_attr=bias_attrs[1]) - self.dropout = Dropout( - act_dropout, dropout_implementation="upscale_in_train") + self.dropout = Dropout(act_dropout, mode="upscale_in_train") self.linear2 = Linear( dim_feedforward, d_model, weight_attrs[1], bias_attr=bias_attrs[1]) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) - self.dropout1 = Dropout( - dropout, dropout_implementation="upscale_in_train") - self.dropout2 = Dropout( - dropout, dropout_implementation="upscale_in_train") - self.activation = getattr(layers, activation) + self.dropout1 = Dropout(dropout, mode="upscale_in_train") + self.dropout2 = Dropout(dropout, mode="upscale_in_train") + self.activation = getattr(F, activation) def forward(self, src, src_mask=None): """ @@ -539,7 +538,7 @@ class TransformerEncoder(Layer): .. code-block:: python import paddle - from paddle import TransformerEncoderLayer, TransformerEncoder + from paddle.nn import TransformerEncoderLayer, TransformerEncoder # encoder input: [batch_size, src_len, d_model] enc_input = paddle.rand((2, 4, 128)) @@ -643,7 +642,7 @@ class TransformerDecoderLayer(Layer): .. code-block:: python import paddle - from paddle import TransformerDecoderLayer + from paddle.nn import TransformerDecoderLayer # decoder input: [batch_size, tgt_len, d_model] dec_input = paddle.rand((2, 4, 128)) @@ -697,20 +696,16 @@ class TransformerDecoderLayer(Layer): bias_attr=bias_attrs[1]) self.linear1 = Linear( d_model, dim_feedforward, weight_attrs[2], bias_attr=bias_attrs[2]) - self.dropout = Dropout( - act_dropout, dropout_implementation="upscale_in_train") + self.dropout = Dropout(act_dropout, mode="upscale_in_train") self.linear2 = Linear( dim_feedforward, d_model, weight_attrs[2], bias_attr=bias_attrs[2]) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.norm3 = LayerNorm(d_model) - self.dropout1 = Dropout( - dropout, dropout_implementation="upscale_in_train") - self.dropout2 = Dropout( - dropout, dropout_implementation="upscale_in_train") - self.dropout3 = Dropout( - dropout, dropout_implementation="upscale_in_train") - self.activation = getattr(layers, activation) + self.dropout1 = Dropout(dropout, mode="upscale_in_train") + self.dropout2 = Dropout(dropout, mode="upscale_in_train") + self.dropout3 = Dropout(dropout, mode="upscale_in_train") + self.activation = getattr(F, activation) def forward(self, tgt, memory, tgt_mask=None, memory_mask=None, cache=None): """ @@ -834,7 +829,7 @@ class TransformerDecoder(Layer): .. code-block:: python import paddle - from paddle import TransformerDecoderLayer, TransformerDecoder + from paddle.nn import TransformerDecoderLayer, TransformerDecoder # decoder input: [batch_size, tgt_len, d_model] dec_input = paddle.rand((2, 4, 128)) @@ -1017,7 +1012,7 @@ class Transformer(Layer): .. code-block:: python import paddle - from paddle import Transformer + from paddle.nn import Transformer # src: [batch_size, tgt_len, d_model] enc_input = paddle.rand((2, 4, 128)) diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index 0da8053fe8a3495f5d3188a737638531347de648..3150b8c2d0363274dfb6fd3465110c89339cd4c9 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -45,8 +45,8 @@ class Adam(Optimizer): Related paper: `Adam: A Method for Stochastic Optimization `_ Args: - learning_rate (float|LearningRateDecay, optional): The learning rate used to update ``Parameter``. - It can be a float value or a LearningRateDecay. The default value is 0.001. + learning_rate (float|_LRScheduler, optional): The learning rate used to update ``Parameter``. + It can be a float value or a _LRScheduler. The default value is 0.001. beta1 (float|Tensor, optional): The exponential decay rate for the 1st moment estimates. It should be a float number or a Tensor with shape [1] and data type as float32. The default value is 0.9. @@ -55,7 +55,7 @@ class Adam(Optimizer): The default value is 0.999. epsilon (float, optional): A small float value for numerical stability. The default value is 1e-08. - parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ + parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ @@ -143,6 +143,12 @@ class Adam(Optimizer): assert beta1 is not None assert beta2 is not None assert epsilon is not None + if not 0 <= beta1 < 1: + raise ValueError("Invaild value of beta1, expect beta1 in [0,1).") + if not 0 <= beta2 < 1: + raise ValueError("Invaild value of beta2, expect beta2 in [0,1).") + if not 0 <= epsilon: + raise ValueError("Invaild value of epsilon, expect epsilon >= 0.") super(Adam, self).__init__( learning_rate=learning_rate, parameters=parameters, diff --git a/python/paddle/optimizer/adamax.py b/python/paddle/optimizer/adamax.py index 73a78b17cbba55c1ee90a2708f6c163940158a51..cca120efd450768520d9cf027f6a36aaad121d9e 100644 --- a/python/paddle/optimizer/adamax.py +++ b/python/paddle/optimizer/adamax.py @@ -47,15 +47,15 @@ class Adamax(Optimizer): it is added here for numerical stability to prevent the division by 0 error. Args: - learning_rate (float|LearningRateDecay, optional): The learning rate used to update ``Parameter``. - It can be a float value or a LearningRateDecay. The default value is 0.001. + learning_rate (float|_LRScheduler, optional): The learning rate used to update ``Parameter``. + It can be a float value or a _LRScheduler. The default value is 0.001. beta1 (float, optional): The exponential decay rate for the 1st moment estimates. The default value is 0.9. beta2 (float, optional): The exponential decay rate for the 2nd moment estimates. The default value is 0.999. epsilon (float, optional): A small float value for numerical stability. The default value is 1e-08. - parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ + parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ @@ -118,6 +118,12 @@ class Adamax(Optimizer): assert beta1 is not None assert beta2 is not None assert epsilon is not None + if not 0 <= beta1 < 1: + raise ValueError("Invaild value of beta1, expect beta1 in [0,1).") + if not 0 <= beta2 < 1: + raise ValueError("Invaild value of beta2, expect beta2 in [0,1).") + if not 0 <= epsilon: + raise ValueError("Invaild value of epsilon, expect epsilon >= 0.") super(Adamax, self).__init__( learning_rate=learning_rate, parameters=parameters, diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py index f498fcbffa24ec188b57ceb2d3c6884fc1e135d2..edaca7e8301676c8734eb3e60924844bea0121d9 100644 --- a/python/paddle/optimizer/adamw.py +++ b/python/paddle/optimizer/adamw.py @@ -19,112 +19,7 @@ import paddle __all__ = ['AdamW'] -class DecoupledWeightDecay(object): - def __init__(self, coeff=0.0, apply_decay_param_fun=None, **kwargs): - if not isinstance(coeff, float) and \ - not isinstance(coeff, framework.Variable): - raise TypeError("coeff should be float or Tensor.") - self._params_name = set() - self._apply_decay_param_fun = apply_decay_param_fun - self._coeff = coeff - super(DecoupledWeightDecay, self).__init__(**kwargs) - - def _scale_parameters(self, params_and_grads): - """ - Adds weight decay ops. - scaled_parameter = parameter * coeff - - Args: - params_and_grads: A list of (parameters, gradients) pairs, - the parameters need to decay. - Raises: - Exception: The type of coeff and parameter is not consistent. - """ - if isinstance(self._coeff, float) and self._coeff == 0.0: - return - - scaled_params = [] - for param, grad in params_and_grads: - # If no gradient then we don't need to do anything - if grad is None: - continue - if self._apply_decay_param_fun is not None \ - and not self._apply_decay_param_fun(param.name): - continue - - if isinstance(self._coeff, float): - assert param.dtype is not paddle.fluid.core.VarDesc.VarType.FP32, \ - "the type of coeff(float) and parameter(%s) is not consistent."%(self._coeff.dtype) - else: - assert self._coeff.dtype == param.dtype, \ - "the type of coeff(%s) and parameter(%s) is not consistent."%(self._coeff.dtype, param.dtype) - - with param.block.program._optimized_guard( - [param, grad]), framework.name_scope('weight decay'): - assert param.name not in self._params_name - scaled_params.append((param, grad, param * self._coeff)) - self._params_name.add(param.name) - return scaled_params - - def backward(self, **kargs): - return super(DecoupledWeightDecay, self).backward(**kargs) - - def _apply_optimize(self, **kargs): - return super(DecoupledWeightDecay, self)._apply_optimize(**kargs) - - def minimize(self, - loss, - startup_program=None, - parameters=None, - no_grad_set=None): - params_grads = self.backward( - loss=loss, - startup_program=startup_program, - parameters=parameters, - no_grad_set=no_grad_set) - scaled_params = self._scale_parameters(params_grads) - for p_grad_sgrad in scaled_params: - param, grad, scaled_param = p_grad_sgrad - with param.block.program._optimized_guard( - [param, grad]), framework.name_scope('weight decay'): - updated_param = paddle.fluid.layers.elementwise_sub( - x=param, y=scaled_param) - paddle.fluid.layers.assign(input=updated_param, output=param) - - optimize_ops = self._apply_optimize( - loss=loss, - params_grads=params_grads, - startup_program=startup_program) - return optimize_ops, params_grads - - @framework.dygraph_only - def step(self): - parameter_list = self._parameter_list - self._dtype = None - params_grads = [] - for param in self._parameter_list: - if not param.trainable: - continue - if param._grad_ivar() is not None: - grad_var = param._grad_ivar() - params_grads.append((param, grad_var)) - - scaled_params = self._scale_parameters(params_grads) - for p_grad_sgrad in scaled_params: - param, grad, scaled_param = p_grad_sgrad - with param.block.program._optimized_guard( - [param, grad]), framework.name_scope('weight decay'): - updated_param = paddle.fluid.layers.elementwise_sub( - x=param, y=scaled_param) - paddle.fluid.layers.assign(input=updated_param, output=param) - optimize_ops = self._apply_optimize( - loss=None, startup_program=None, params_grads=params_grads) - - def __str__(self): - return " ".join(["Weight Decay, params:", ",".join(self._params_name)]) - - -class AdamW(DecoupledWeightDecay, Adam): +class AdamW(Adam): """ The AdamW optimizer is implemented based on the AdamW Optimization in paper `DECOUPLED WEIGHT DECAY REGULARIZATION `_. @@ -145,8 +40,8 @@ class AdamW(DecoupledWeightDecay, Adam): Args: - learning_rate (float|LearningRateDecay, optional): The learning rate used to update ``Parameter``. - It can be a float value or a LearningRateDecay. The default value is 0.001. + learning_rate (float|_LRScheduler, optional): The learning rate used to update ``Parameter``. + It can be a float value or a _LRScheduler. The default value is 0.001. parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. @@ -157,9 +52,9 @@ class AdamW(DecoupledWeightDecay, Adam): It should be a float number or a Tensor with shape [1] and data type as float32. The default value is 0.999. epsilon (float, optional): A small float value for numerical stability. - weight_decay (float|Tensor): The weight decay coefficient, it can be float or Tensor. The default value is 0.0. The default value is 1e-08. - apply_decay_param_fun (function|None): If it is not None, + weight_decay (float|Tensor, optional): The weight decay coefficient, it can be float or Tensor. The default value is 0.01. + apply_decay_param_fun (function|None, optional): If it is not None, only tensors that makes apply_decay_param_fun(Tensor)==True will be updated. It only works when we want to specify tensors. Default: None. @@ -208,26 +103,129 @@ class AdamW(DecoupledWeightDecay, Adam): def __init__(self, learning_rate=0.001, - parameters=None, beta1=0.9, beta2=0.999, epsilon=1e-8, - weight_decay=0.0, + parameters=None, + weight_decay=0.01, apply_decay_param_fun=None, grad_clip=None, name=None, lazy_mode=False): - args_dict = { - "learning_rate": learning_rate, - "parameters": parameters, - "beta1": beta1, - "beta2": beta2, - "epsilon": epsilon, - "grad_clip": grad_clip, - "name": name, - "lazy_mode": lazy_mode - } + assert learning_rate is not None + assert beta1 is not None + assert beta2 is not None + assert epsilon is not None + if not 0 <= beta1 < 1: + raise ValueError("Invaild value of beta1, expect beta1 in [0,1).") + if not 0 <= beta2 < 1: + raise ValueError("Invaild value of beta2, expect beta2 in [0,1).") + if not 0 <= epsilon: + raise ValueError("Invaild value of epsilon, expect epsilon >= 0.") + coeff = weight_decay + if not isinstance(coeff, float) and \ + not isinstance(coeff, framework.Variable): + raise TypeError("coeff should be float or Tensor.") + self._params_name = set() + self._apply_decay_param_fun = apply_decay_param_fun + self._coeff = coeff super(AdamW, self).__init__( - weight_decay, - apply_decay_param_fun=apply_decay_param_fun, - **args_dict) + learning_rate=learning_rate, + parameters=parameters, + beta1=beta1, + beta2=beta2, + epsilon=epsilon, + grad_clip=grad_clip, + name=name, + lazy_mode=lazy_mode) + + def _scale_parameters(self, params_and_grads): + """ + Adds weight decay ops. + scaled_parameter = parameter * coeff + + Args: + params_and_grads: A list of (parameters, gradients) pairs, + the parameters need to decay. + Raises: + Exception: The type of coeff and parameter is not consistent. + """ + + scaled_params = [] + for param, grad in params_and_grads: + # If no gradient then we don't need to do anything + if grad is None: + continue + if self._apply_decay_param_fun is not None \ + and not self._apply_decay_param_fun(param.name): + continue + + if isinstance(self._coeff, float): + assert param.dtype is not paddle.fluid.core.VarDesc.VarType.FP32, \ + "the type of coeff(float) and parameter(%s) is not consistent."%(self._coeff.dtype) + else: + assert self._coeff.dtype == param.dtype, \ + "the type of coeff(%s) and parameter(%s) is not consistent."%(self._coeff.dtype, param.dtype) + if isinstance(self._learning_rate, float): + learning_rate = self._learning_rate + else: + self._learning_rate() + with param.block.program._optimized_guard( + [param, grad]), framework.name_scope('weight decay'): + if param.name not in self._params_name: + scaled_params.append( + (param, grad, param * self._coeff * learning_rate)) + self._params_name.add(param.name) + param = param * self._coeff + return scaled_params + + def minimize(self, + loss, + startup_program=None, + parameters=None, + no_grad_set=None): + params_grads = self.backward( + loss=loss, + startup_program=startup_program, + parameters=parameters, + no_grad_set=no_grad_set) + scaled_params = self._scale_parameters(params_grads) + for p_grad_sgrad in scaled_params: + param, grad, scaled_param = p_grad_sgrad + with param.block.program._optimized_guard( + [param, grad]), framework.name_scope('weight decay'): + updated_param = paddle.fluid.layers.elementwise_sub( + x=param, y=scaled_param) + paddle.fluid.layers.assign(input=updated_param, output=param) + + optimize_ops = self._apply_optimize( + loss=loss, + params_grads=params_grads, + startup_program=startup_program) + return optimize_ops, params_grads + + @framework.dygraph_only + def step(self): + parameter_list = self._parameter_list + self._dtype = None + params_grads = [] + for param in self._parameter_list: + if not param.trainable: + continue + if param._grad_ivar() is not None: + grad_var = param._grad_ivar() + params_grads.append((param, grad_var)) + + scaled_params = self._scale_parameters(params_grads) + for p_grad_sgrad in scaled_params: + param, grad, scaled_param = p_grad_sgrad + with param.block.program._optimized_guard( + [param, grad]), framework.name_scope('weight decay'): + updated_param = paddle.fluid.layers.elementwise_sub( + x=param, y=scaled_param) + param.set_value(updated_param.numpy()) + optimize_ops = self._apply_optimize( + loss=None, startup_program=None, params_grads=params_grads) + + def __str__(self): + return " ".join(["Weight Decay, params:", ",".join(self._params_name)]) diff --git a/python/paddle/optimizer/lr_scheduler.py b/python/paddle/optimizer/lr_scheduler.py index 4ecaffb8fa509bdc54067bb25f8d1b5191b7ac1b..61391704061bda7dfbad7252cbc04c0b7d6492a4 100644 --- a/python/paddle/optimizer/lr_scheduler.py +++ b/python/paddle/optimizer/lr_scheduler.py @@ -109,7 +109,7 @@ class _LRScheduler(object): """ self.keys = ['last_epoch', 'last_lr'] - def set_dict(self, state_dict): + def set_state_dict(self, state_dict): """ Loads the schedulers state. """ @@ -126,8 +126,8 @@ class _LRScheduler(object): "There are some unused values in state_dict. Maybe the optimizer have different 'LearningRateDecay' when invoking state_dict and set_dict" ) - # alias for set_dict - set_state_dict = set_dict + # alias for set_state_dict + set_dict = set_state_dict def get_lr(self): # calculate by python float diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index 3f9de0cefc05d1aaee36fa3af5cfa9ae4affcb97..1bd9a1f144ed4b5c69d76070eadc317e2063e25b 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -80,7 +80,6 @@ class Optimizer(object): .. code-block:: python #Take the subclass adam as an example - #Optimizer import paddle import numpy as np @@ -98,7 +97,7 @@ class Optimizer(object): """ - @imperative_base.no_grad() + @imperative_base.no_grad def __init__(self, learning_rate, parameters=None, @@ -170,7 +169,7 @@ class Optimizer(object): import paddle paddle.disable_static() - emb = paddle.nn.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) adam = paddle.optimizer.Adam(0.001, parameters=emb.parameters()) state_dict = adam.state_dict() @@ -200,7 +199,7 @@ class Optimizer(object): import paddle paddle.disable_static() - emb = paddle.nn.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) state_dict = emb.state_dict() paddle.framework.save(state_dict, "paddle_dy") @@ -215,6 +214,8 @@ class Optimizer(object): adam.set_state_dict(opti_state_dict) ''' + if isinstance(self._learning_rate, _LRScheduler): + self._learning_rate.set_dict(state_dict["LR_Scheduler"]) if isinstance(self._learning_rate, _LRScheduler): self._learning_rate.set_state_dict(state_dict["LR_Scheduler"]) @@ -270,6 +271,7 @@ class Optimizer(object): main_prog = framework.default_main_program() main_prog.lr_sheduler = self._learning_rate main_prog.lr_var = lr_var + self._learning_rate_map[framework.default_main_program( )] = lr_var @@ -300,7 +302,7 @@ class Optimizer(object): this API cannot be invoked, because it will lead to conflict. Args: - value (float|Tensor): the value of learning rate + value (float): the value of learning rate Returns: None @@ -358,6 +360,7 @@ class Optimizer(object): Get current step learning rate. The return value is all the same When _LRScheduler is not used, otherwise return the current step learning rate. + Returns: float: The learning rate of the current step. @@ -368,7 +371,7 @@ class Optimizer(object): import paddle # example1: _LRScheduler is not used, return value is all the same paddle.disable_static() - emb = paddle.nn.Embedding([10, 10]) + emb = paddle.nn.Embedding(10, 10) adam = paddle.optimizer.Adam(0.001, parameters = emb.parameters()) lr = adam.get_lr() print(lr) # 0.001 @@ -655,7 +658,7 @@ class Optimizer(object): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") a = paddle.to_tensor(value) - linear = paddle.nn.Linear(13, 5, dtype="float32") + linear = paddle.nn.Linear(13, 5) # This can be any optimizer supported by dygraph. adam = paddle.optimizer.Adam(learning_rate = 0.01, parameters = linear.parameters()) @@ -798,7 +801,7 @@ class Optimizer(object): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") a = paddle.to_tensor(value) - linear = paddle.nn.Linear(13, 5, dtype="float32") + linear = paddle.nn.Linear(13, 5) # This can be any optimizer supported by dygraph. adam = paddle.optimizer.Adam(learning_rate = 0.01, parameters = linear.parameters()) @@ -812,7 +815,7 @@ class Optimizer(object): if p.trainable: p.clear_gradient() - @imperative_base.no_grad() + @imperative_base.no_grad def minimize(self, loss, startup_program=None, @@ -836,36 +839,33 @@ class Optimizer(object): tuple: tuple (optimize_ops, params_grads), A list of operators appended by minimize and a list of (param, grad) tensor pairs, param is ``Parameter``, grad is the gradient value corresponding to the parameter. - The returned tuple can be passed to ``fetch_list`` in ``Executor.run()`` to + In static graph mode, the returned tuple can be passed to ``fetch_list`` in ``Executor.run()`` to indicate program pruning. If so, the program will be pruned by ``feed`` and ``fetch_list`` before run, see details in ``Executor``. Examples: .. code-block:: python - + import paddle - import paddle.fluid as fluid - - place = fluid.CPUPlace() - main = fluid.Program() - with fluid.program_guard(main): - x = fluid.data(name='x', shape=[None, 13], dtype='float32') - y = fluid.data(name='y', shape=[None, 1], dtype='float32') - y_predict = fluid.layers.fc(input=x, size=1, act=None) - cost = fluid.layers.square_error_cost(input=y_predict, label=y) - avg_cost = fluid.layers.mean(cost) - - adam_optimizer = paddle.optimizer.Adam(0.01) - adam_optimizer.minimize(avg_cost) - - fetch_list = [avg_cost] - train_reader = paddle.batch( - paddle.dataset.uci_housing.train(), batch_size=1) - feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) - exe = fluid.Executor(place) - exe.run(fluid.default_startup_program()) - for data in train_reader(): - exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list) + import numpy as np + + paddle.disable_static() + inp = np.random.uniform(-0.1, 0.1, [10, 10]).astype("float32") + linear = paddle.nn.Linear(10, 10) + inp = paddle.to_tensor(inp) + out = linear(inp) + loss = paddle.mean(out) + + beta1 = paddle.to_tensor([0.9], dtype="float32") + beta2 = paddle.to_tensor([0.99], dtype="float32") + + adam = paddle.optimizer.Adam(learning_rate=0.1, + parameters=linear.parameters(), + weight_decay=0.01) + out.backward() + adam.minimize(loss) + adam.clear_grad() + """ assert isinstance(loss, Variable), "The loss should be an Tensor." @@ -885,7 +885,7 @@ class Optimizer(object): @framework.dygraph_only def step(self): """ - Execute the optimizer once. + Execute the optimizer and update parameters once. Returns: None @@ -898,7 +898,7 @@ class Optimizer(object): paddle.disable_static() value = np.arange(26).reshape(2, 13).astype("float32") a = paddle.to_tensor(value) - linear = paddle.nn.Linear(13, 5, dtype="float32") + linear = paddle.nn.Linear(13, 5) # This can be any optimizer supported by dygraph. adam = paddle.optimizer.Adam(learning_rate = 0.01, parameters = linear.parameters()) diff --git a/python/paddle/optimizer/rmsprop.py b/python/paddle/optimizer/rmsprop.py index 0bc4c9bfd53dc15449f03d6de6c8942e977bf562..2609972d85ccdc2a867765431fefe21b9ba2de16 100644 --- a/python/paddle/optimizer/rmsprop.py +++ b/python/paddle/optimizer/rmsprop.py @@ -69,8 +69,8 @@ class RMSProp(Optimizer): Parameters: - learning_rate (float|LearningRateDecay): The learning rate used to update ``Parameter``. - It can be a float value or a LearningRateDecay. + learning_rate (float|_LRScheduler): The learning rate used to update ``Parameter``. + It can be a float value or a _LRScheduler. rho(float): rho is :math: `\\rho` in equation, default is 0.95. epsilon(float): :math: `\\epsilon` in equation is smoothing term to avoid division by zero, default is 1e-6. @@ -80,7 +80,7 @@ class RMSProp(Optimizer): the gradient; if False, by the uncentered second moment. Setting this to True may help with training, but is slightly more expensive in terms of computation and memory. Defaults to False. - parameters (list, optional): List of ``Tensor`` names to update to minimize ``loss``. \ + parameters (list, optional): List of ``Tensor`` to update to minimize ``loss``. \ This parameter is required in dygraph mode. \ The default value is None in static mode, at this time all parameters will be updated. weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \ @@ -147,6 +147,12 @@ class RMSProp(Optimizer): raise ValueError("epsilon is not set.") if momentum is None: raise ValueError("momentum is not set.") + if not 0.0 <= epsilon: + raise ValueError("Invalid value of epsilon, expect epsilon >= 0.") + if not 0.0 <= momentum: + raise ValueError("Invalid value of momentum, expect momentum >= 0.") + if not 0.0 <= rho: + raise ValueError("Invalid value of rho, expect rho >= 0.") super(RMSProp, self).__init__( learning_rate=learning_rate, diff --git a/python/paddle/optimizer/sgd.py b/python/paddle/optimizer/sgd.py index bb3a578e15724e9501d69dc209bdedc65afeb82b..133c3dfb24fed82e4d666321585932d7e58a6f29 100644 --- a/python/paddle/optimizer/sgd.py +++ b/python/paddle/optimizer/sgd.py @@ -85,7 +85,7 @@ class SGD(Optimizer): name=name) self.type = "sgd" - @no_grad() + @no_grad def _append_optimize_op(self, block, param_and_grad): lr = self._create_param_lr(param_and_grad) if framework.in_dygraph_mode(): diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 9ef66712540aa54eac39b7e6160c5c91b6e3fcd5..9eece1240d7d3c0b8a863091367e993047bd4527 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -73,8 +73,8 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): Can be a scalar, list, tuple, numpy\.ndarray, paddle\.Tensor, paddle\.ComplexTensor. dtype(str|np.dtype, optional): The desired data type of returned tensor. Can be 'bool' , 'float16' , 'float32' , 'float64' , 'int8' , 'int16' , 'int32' , 'int64' , 'uint8'. And - 'complex64' , 'complex128' only for ComplexTensor. Default: None, for float point number, - get type from ``get_default_type``, for other type, infers from ``data`` . + 'complex64' , 'complex128' only for ComplexTensor. Default: None, infers dtype from ``data`` + except for python float number which gets dtype from ``get_default_type`` . place(CPUPlace|CUDAPinnedPlace|CUDAPlace, optional): The place to allocate Tensor. Can be CPUPlace, CUDAPinnedPlace, CUDAPlace. Default: None, means global place. stop_gradient(bool, optional): Whether to block the gradient propagation of Autograd. Default: True. @@ -188,13 +188,21 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): raise TypeError( "Can't constructs a 'paddle.Tensor' with data type {}, data type must be scalar|list|tuple|numpy.ndarray|paddle.Tensor|paddle.ComplexTensor". format(type(data))) + if not dtype and data.dtype in [ + 'float16', 'float32', 'float64', 'complex64', 'complex128' + ]: + default_type = paddle.get_default_dtype() + if np.iscomplexobj(data): + default_type = 'complex64' if default_type in [ + 'float16', 'float32' + ] else 'complex128' + data = data.astype(default_type) + + if dtype and convert_dtype(dtype) != data.dtype: + data = data.astype(dtype) if not np.iscomplexobj(data): - if dtype: - dtype = convert_dtype(dtype) - elif data.dtype in ['float16', 'float32', 'float64']: - dtype = paddle.framework.get_default_dtype() - if dtype and dtype != data.dtype: + if dtype and convert_dtype(dtype) != data.dtype: data = data.astype(dtype) return paddle.Tensor( value=data, @@ -203,14 +211,6 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): zero_copy=True, stop_gradient=stop_gradient) else: - if dtype: - dtype = convert_dtype(dtype) - else: - dtype = paddle.framework.get_default_dtype() - dtype = 'complex64' if dtype in ['float16', 'float32' - ] else 'complex128' - if dtype != data.dtype: - data = data.astype(dtype) name = unique_name.generate('generated_tensor') real_tensor = paddle.Tensor( value=data.real, @@ -244,10 +244,6 @@ def full_like(x, fill_value, dtype=None, name=None): Returns: Tensor: Tensor which is created according to ``x``, ``fill_value`` and ``dtype``. - Raises: - TypeError: The data type of ``x`` must be one of bool, float16, float32, float64, int32, int64. - TypeError: The ``dtype`` must be one of bool, float16, float32, float64, int32, int64 and None. - Examples: .. code-block:: python @@ -303,11 +299,6 @@ def ones(shape, dtype=None, name=None): Returns: Tensor: A tensor of data type :attr:`dtype` with shape :attr:`shape` and all elements set to 1. - Raises: - TypeError: The ``dtype`` must be one of bool, float16, float32, float64, int32, int64 and None. - TypeError: The ``shape`` must be one of list, tuple and Tensor. The data type of ``shape`` must - be int32 or int64 when it's a Tensor. - Examples: .. code-block:: python @@ -366,11 +357,10 @@ def ones_like(x, dtype=None, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - x = paddle.to_tensor(np.array([1,2,3], dtype='float32')) + x = paddle.to_tensor([1,2,3]) out1 = paddle.zeros_like(x) # [1., 1., 1.] out2 = paddle.zeros_like(x, dtype='int32') # [1, 1, 1] @@ -392,11 +382,6 @@ def zeros(shape, dtype=None, name=None): Returns: Tensor: A tensor of data type :attr:`dtype` with shape :attr:`shape` and all elements set to 0. - Raises: - TypeError: The ``dtype`` must be one of bool, float16, float32, float64, int32, int64 and None. - TypeError: The ``shape`` must be one of list, tuple and Tensor. The data type of ``shape`` must - be int32 or int64 when it's a Tensor. - Examples: .. code-block:: python @@ -453,11 +438,10 @@ def zeros_like(x, dtype=None, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - x = paddle.to_tensor(np.array([1,2,3], dtype='float32')) + x = paddle.to_tensor([1,2,3]) out1 = paddle.zeros_like(x) # [0., 0., 0.] out2 = paddle.zeros_like(x, dtype='int32') # [0, 0, 0] @@ -482,10 +466,6 @@ def eye(num_rows, num_columns=None, dtype=None, name=None): Returns: Tensor: An identity Tensor or LoDTensor of shape [num_rows, num_columns]. - - Raises: - TypeError: The ``dtype`` must be one of float16, float32, float64, int32 int64 and None. - TypeError: The ``num_columns`` must be non-negative int. Examples: .. code-block:: python @@ -534,11 +514,6 @@ def full(shape, fill_value, dtype=None, name=None): Returns: Tensor: Tensor which is created according to ``shape``, ``fill_value`` and ``dtype``. - Raises: - TypeError: The ``dtype`` must be one of None, bool, float16, float32, float64, int32 and int64. - TypeError: The ``shape`` must be one of Tensor, list and tuple. The data type of ``shape`` must - be int32 or int64 when the it's a Tensor - Examples: .. code-block:: python @@ -619,7 +594,6 @@ def arange(start=0, end=None, step=1, dtype=None, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() @@ -633,7 +607,7 @@ def arange(start=0, end=None, step=1, dtype=None, name=None): out3 = paddle.arange(4.999, dtype='float32') # [0., 1., 2., 3., 4.] - start_var = paddle.to_tensor(np.array([3])) + start_var = paddle.to_tensor([3]) out4 = paddle.arange(start_var, 7) # [3, 4, 5, 6] @@ -725,7 +699,7 @@ def tril(x, diagonal=0, name=None): paddle.disable_static() - x = paddle.to_variable(data) + x = paddle.to_tensor(data) tril1 = paddle.tensor.tril(x) # array([[ 1, 0, 0, 0], @@ -797,7 +771,7 @@ def triu(x, diagonal=0, name=None): paddle.disable_static() # example 1, default diagonal - x = paddle.to_variable(data) + x = paddle.to_tensor(data) triu1 = paddle.tensor.triu(x) # array([[ 1, 2, 3, 4], # [ 0, 6, 7, 8], diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index b5b528325cd9f52a8b61ef21df0095c41da5a8ed..7ddda5091a0a260f56b29bcedfdcb0786e82ddd6 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -810,7 +810,7 @@ def cholesky(x, upper=False, name=None): a = np.random.rand(3, 3) a_t = np.transpose(a, [1, 0]) x_data = np.matmul(a, a_t) + 1e-03 - x = paddle.to_variable(x_data) + x = paddle.to_tensor(x_data) out = paddle.cholesky(x, upper=False) print(out.numpy()) # [[1.190523 0. 0. ] @@ -855,15 +855,16 @@ def bmm(x, y, name=None): Examples: import paddle - # In imperative mode: - # size input1: (2, 2, 3) and input2: (2, 3, 2) - input1 = np.array([[[1.0, 1.0, 1.0],[2.0, 2.0, 2.0]],[[3.0, 3.0, 3.0],[4.0, 4.0, 4.0]]]) - input2 = np.array([[[1.0, 1.0],[2.0, 2.0],[3.0, 3.0]],[[4.0, 4.0],[5.0, 5.0],[6.0, 6.0]]]) - paddle.disable_static() - - x = paddle.to_variable(input1) - y = paddle.to_variable(input2) + + # In imperative mode: + # size x: (2, 2, 3) and y: (2, 3, 2) + x = paddle.to_tensor([[[1.0, 1.0, 1.0], + [2.0, 2.0, 2.0]], + [[3.0, 3.0, 3.0], + [4.0, 4.0, 4.0]]]) + y = paddle.to_tensor([[[1.0, 1.0],[2.0, 2.0],[3.0, 3.0]], + [[4.0, 4.0],[5.0, 5.0],[6.0, 6.0]]]) out = paddle.bmm(x, y) #output size: (2, 2, 2) #output value: @@ -924,10 +925,8 @@ def histogram(input, bins=100, min=0, max=0): Code Example 2: .. code-block:: python import paddle - import numpy as np paddle.disable_static(paddle.CPUPlace()) - inputs_np = np.array([1, 2, 1]).astype(np.float) - inputs = paddle.to_variable(inputs_np) + inputs = paddle.to_tensor([1, 2, 1]) result = paddle.histogram(inputs, bins=4, min=0, max=3) print(result) # [0, 2, 1, 0] paddle.enable_static() diff --git a/python/paddle/tensor/logic.py b/python/paddle/tensor/logic.py index 36b558d597c1ce1333a8f1eec54e2fd2813625e3..5fd714421c8ed14820738543a1824c779296d7c3 100644 --- a/python/paddle/tensor/logic.py +++ b/python/paddle/tensor/logic.py @@ -71,13 +71,12 @@ def equal_all(x, y, name=None): Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x = paddle.to_variable(np.array([1, 2, 3])) - y = paddle.to_variable(np.array([1, 2, 3])) - z = paddle.to_variable(np.array([1, 4, 3])) + x = paddle.to_tensor([1, 2, 3]) + y = paddle.to_tensor([1, 2, 3]) + z = paddle.to_tensor([1, 4, 3]) result1 = paddle.equal_all(x, y) print(result1.numpy()) # result1 = [True ] result2 = paddle.equal_all(x, z) @@ -120,14 +119,11 @@ def allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - np_x = np.array([10000., 1e-07]).astype("float32") - np_y = np.array([10000.1, 1e-08]).astype("float32") - x = paddle.to_tensor(np_x) - y = paddle.to_tensor(np_y) + x = paddle.to_tensor([10000., 1e-07]) + y = paddle.to_tensor([10000.1, 1e-08]) result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name="ignore_nan") np_result1 = result1.numpy() @@ -137,10 +133,8 @@ def allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name=None): np_result2 = result2.numpy() # [False] - np_x = np.array([1.0, float('nan')]).astype("float32") - np_y = np.array([1.0, float('nan')]).astype("float32") - x = paddle.to_tensor(np_x) - y = paddle.to_tensor(np_y) + x = paddle.to_tensor([1.0, float('nan')]) + y = paddle.to_tensor([1.0, float('nan')]) result1 = paddle.allclose(x, y, rtol=1e-05, atol=1e-08, equal_nan=False, name="ignore_nan") np_result1 = result1.numpy() @@ -195,12 +189,11 @@ def equal(x, y, name=None): Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x = paddle.to_variable(np.array([1, 2, 3])) - y = paddle.to_variable(np.array([1, 3, 2])) + x = paddle.to_tensor([1, 2, 3]) + y = paddle.to_tensor([1, 3, 2]) result1 = paddle.equal(x, y) print(result1.numpy()) # result1 = [True False False] """ @@ -227,12 +220,11 @@ def greater_equal(x, y, name=None): Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x = paddle.to_variable(np.array([1, 2, 3])) - y = paddle.to_variable(np.array([1, 3, 2])) + x = paddle.to_tensor([1, 2, 3]) + y = paddle.to_tensor([1, 3, 2]) result1 = paddle.greater_equal(x, y) print(result1.numpy()) # result1 = [True False True] """ @@ -259,12 +251,11 @@ def greater_than(x, y, name=None): Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x = paddle.to_variable(np.array([1, 2, 3])) - y = paddle.to_variable(np.array([1, 3, 2])) + x = paddle.to_tensor([1, 2, 3]) + y = paddle.to_tensor([1, 3, 2]) result1 = paddle.greater_than(x, y) print(result1.numpy()) # result1 = [False False True] """ @@ -292,12 +283,11 @@ def less_equal(x, y, name=None): Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x = paddle.to_variable(np.array([1, 2, 3])) - y = paddle.to_variable(np.array([1, 3, 2])) + x = paddle.to_tensor([1, 2, 3]) + y = paddle.to_tensor([1, 3, 2]) result1 = paddle.less_equal(x, y) print(result1.numpy()) # result1 = [True True False] """ @@ -325,12 +315,11 @@ def less_than(x, y, name=None): Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x = paddle.to_variable(np.array([1, 2, 3])) - y = paddle.to_variable(np.array([1, 3, 2])) + x = paddle.to_tensor([1, 2, 3]) + y = paddle.to_tensor([1, 3, 2]) result1 = paddle.less_than(x, y) print(result1.numpy()) # result1 = [False True False] """ @@ -358,12 +347,12 @@ def not_equal(x, y, name=None): Examples: .. code-block:: python - import numpy as np + import paddle paddle.disable_static() - x = paddle.to_variable(np.array([1, 2, 3])) - y = paddle.to_variable(np.array([1, 3, 2])) + x = paddle.to_tensor([1, 2, 3]) + y = paddle.to_tensor([1, 3, 2]) result1 = paddle.not_equal(x, y) print(result1.numpy()) # result1 = [False True True] """ diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 845d2cf4d199328bbf8d0e03cd3a7a24a61aafd2..363c3ffceb85ef6168dc8c33b81185cac08083fb 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -85,11 +85,6 @@ def concat(x, axis=0, name=None): name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. - Raises: - TypeError: ``x`` must be list or tuple. - TypeError: The data type of ``x`` must be one of bool, float16, float32, float64, int32 and int64. - TypeError: The ``axis`` must be int or Tensor. The dtype of ``axis`` must be int32 or int64 when it's a Tensor. - TypeError: All the Tensors in ``x`` must have the same data type. Returns: Tensor: A Tensor with the same data type as ``x``. @@ -98,18 +93,14 @@ def concat(x, axis=0, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() # Now we are in imperative mode - in1 = np.array([[1, 2, 3], - [4, 5, 6]]) - in2 = np.array([[11, 12, 13], - [14, 15, 16]]) - in3 = np.array([[21, 22], - [23, 24]]) - x1 = paddle.to_tensor(in1) - x2 = paddle.to_tensor(in2) - x3 = paddle.to_tensor(in3) + x1 = paddle.to_tensor([[1, 2, 3], + [4, 5, 6]]) + x2 = paddle.to_tensor([[11, 12, 13], + [14, 15, 16]]) + x3 = paddle.to_tensor([[21, 22], + [23, 24]]) zero = paddle.full(shape=[1], dtype='int32', fill_value=0) # When the axis is negative, the real axis is (axis + Rank(x)) # As follow, axis is -1, Rank(x) is 2, the real axis is 1 @@ -158,7 +149,7 @@ def flip(x, axis, name=None): image_shape=(3, 2, 2) x = np.arange(image_shape[0] * image_shape[1] * image_shape[2]).reshape(image_shape) x = x.astype('float32') - img = paddle.to_variable(x) + img = paddle.to_tensor(x) out = paddle.flip(img, [0,1]) print(out) # [[[10,11][8, 9]],[[6, 7],[4, 5]] [[2, 3],[0, 1]]] @@ -250,7 +241,7 @@ def flatten(x, start_axis=0, stop_axis=-1, name=None): x = np.arange(image_shape[0] * image_shape[1] * image_shape[2] * image_shape[3]).reshape(image_shape) / 100. x = x.astype('float32') - img = paddle.to_variable(x) + img = paddle.to_tensor(x) out = paddle.flatten(img, start_axis=1, stop_axis=2) # out shape is [2, 12, 4] """ @@ -315,15 +306,13 @@ def roll(x, shifts, axis=None, name=None): Examples: .. code-block:: python - import numpy as np import paddle import paddle.fluid as fluid - data = np.array([[1.0, 2.0, 3.0], - [4.0, 5.0, 6.0], - [7.0, 8.0, 9.0]]) paddle.disable_static() - x = paddle.to_variable(data) + x = paddle.to_tensor([[1.0, 2.0, 3.0], + [4.0, 5.0, 6.0], + [7.0, 8.0, 9.0]]) out_z1 = paddle.roll(x, shifts=1) print(out_z1.numpy()) #[[9. 1. 2.] @@ -433,8 +422,7 @@ def stack(x, axis=0, name=None): [5.0, 6.0] ] ] Args: - x (Tensor|list[Tensor]): Input ``x`` can be a single tensor, or a ``list`` of tensors. - If ``x`` is a ``list``, the Tensors in ``x`` + x (list[Tensor]|tuple[Tensor]): Input ``x`` can be a ``list`` or ``tuple`` of tensors, the Tensors in ``x`` must be of the same shape and dtype. Supported data types: float32, float64, int32, int64. axis (int, optional): The axis along which all inputs are stacked. ``axis`` range is ``[-(R+1), R+1)``, where ``R`` is the number of dimensions of the first input tensor ``x[0]``. @@ -448,17 +436,11 @@ def stack(x, axis=0, name=None): .. code-block:: python import paddle - import numpy as np - - data1 = np.array([[1.0, 2.0]]) - data2 = np.array([[3.0, 4.0]]) - data3 = np.array([[5.0, 6.0]]) - + paddle.disable_static() - x1 = paddle.to_variable(data1) - x2 = paddle.to_variable(data2) - x3 = paddle.to_variable(data3) - + x1 = paddle.to_tensor([[1.0, 2.0]]) + x2 = paddle.to_tensor([[3.0, 4.0]]) + x3 = paddle.to_tensor([[5.0, 6.0]]) out = paddle.stack([x1, x2, x3], axis=0) print(out.shape) # [3, 1, 2] print(out.numpy()) @@ -487,10 +469,7 @@ def split(x, num_or_sections, axis=0, name=None): For more information, please refer to :ref:`api_guide_Name` . Returns: list(Tensor): The list of segmented Tensors. - Raises: - TypeError: The data type of ``x`` must be one of bool, float16, float32, float64, int32, int64. - TypeError: ``num_or_sections`` is not int, list or tuple. - TypeError: ``axis`` is not int or Tensor. the data type of ``axis`` must be int32 or int64 when it's a Tensor. + Example: .. code-block:: python @@ -638,12 +617,10 @@ def unique(x, Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - x_data = np.array([2, 3, 3, 1, 5, 3]) - x = paddle.to_tensor(x_data) + x = paddle.to_tensor([2, 3, 3, 1, 5, 3]) unique = paddle.unique(x) np_unique = unique.numpy() # [1 2 3 5] _, indices, inverse, counts = paddle.unique(x, return_index=True, return_inverse=True, return_counts=True) @@ -651,8 +628,7 @@ def unique(x, np_inverse = inverse.numpy() # [1 2 2 0 3 2] np_counts = counts.numpy() # [1 1 3 1] - x_data = np.array([[2, 1, 3], [3, 0, 1], [2, 1, 3]]) - x = paddle.to_tensor(x_data) + x = paddle.to_tensor([[2, 1, 3], [3, 0, 1], [2, 1, 3]]) unique = paddle.unique(x) np_unique = unique.numpy() # [0 1 2 3] @@ -812,23 +788,15 @@ def gather(x, index, axis=None, name=None): Returns: output (Tensor): The output is a tensor with the same rank as ``x``. - Raises: - TypeError: ``x`` must be a Tensor and the data type of ``x`` must to be one of float16, float32, float64, int32, int64, uint8. - TypeError: ``index`` must be a Tensor and the data type of ``index`` must be int32 or int64. - TypeError: ``axis`` must be a Tensor or int and the data type of ``index`` must be int32 or int64 when it's a Tensor. - Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - input_1 = np.array([[1,2],[3,4],[5,6]]) - index_1 = np.array([0,1]) - input = paddle.to_tensor(input_1) - index = paddle.to_tensor(index_1) + input = paddle.to_tensor([[1,2],[3,4],[5,6]]) + index = paddle.to_tensor([0,1]) output = paddle.gather(input, index, axis=0) # expected output: [[1,2],[3,4]] """ @@ -964,16 +932,11 @@ def scatter(x, index, updates, overwrite=True, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - x_data = np.array([[1, 1], [2, 2], [3, 3]]).astype(np.float32) - index_data = np.array([2, 1, 0, 1]).astype(np.int64) - updates_data = np.array([[1, 1], [2, 2], [3, 3], [4, 4]]).astype(np.float32) - - x = paddle.to_tensor(x_data) - index = paddle.to_tensor(index_data) - updates = paddle.to_tensor(updates_data) + x = paddle.to_tensor([[1, 1], [2, 2], [3, 3]], dtype='float32') + index = paddle.to_tensor([2, 1, 0, 1], dtype='int64') + updates = paddle.to_tensor([[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32') output1 = paddle.scatter(x, index, updates, overwrite=False) # [[3., 3.], @@ -1026,10 +989,7 @@ def chunk(x, chunks, axis=0, name=None): For more information, please refer to :ref:`api_guide_Name` . Returns: list(Tensor): The list of segmented Tensors. - Raises: - TypeError: The data type of ``x`` must be one of bool, float16, float32, float64, int32, int64. - TypeError: ``chunks`` is not int. - TypeError: ``axis`` is not int or Tensor. the data type of ``axis`` must be int32 or int64 when it's a Tensor. + Example: .. code-block:: python @@ -1080,11 +1040,9 @@ def tile(x, repeat_times, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - np_data = np.array([1, 2, 3]).astype('int32') - data = paddle.to_tensor(np_data) + data = paddle.to_tensor([1, 2, 3], dtype='int32') out = paddle.tile(data, repeat_times=[2, 1]) np_out = out.numpy() # [[1, 2, 3], [1, 2, 3]] @@ -1093,8 +1051,7 @@ def tile(x, repeat_times, name=None): np_out = out.numpy() # [[1, 2, 3, 1, 2, 3], [1, 2, 3, 1, 2, 3]] - np_repeat_times = np.array([2, 1]).astype("int32") - repeat_times = paddle.to_tensor(np_repeat_times) + repeat_times = paddle.to_tensor([2, 1], dtype='int32') out = paddle.tile(data, repeat_times=repeat_times) np_out = out.numpy() # [[1, 2, 3], [1, 2, 3]] @@ -1162,15 +1119,12 @@ def expand_as(x, y, name=None): Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - np_data_x = np.array([1, 2, 3]).astype('int32') - np_data_y = np.array([[1, 2, 3], [4, 5, 6]]).astype('int32') - data_x = paddle.to_tensor(np_data_x) - data_y = paddle.to_tensor(np_data_y) + data_x = paddle.to_tensor([1, 2, 3], 'int32') + data_y = paddle.to_tensor([[1, 2, 3], [4, 5, 6]], 'int32') out = paddle.expand_as(data_x, data_y) np_out = out.numpy() # [[1, 2, 3], [1, 2, 3]] @@ -1218,12 +1172,10 @@ def expand(x, shape, name=None): Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - np_data = np.array([1, 2, 3]).astype('int32') - data = paddle.to_tensor(np_data) + data = paddle.to_tensor([1, 2, 3], dtype='int32') out = paddle.expand(data, shape=[2, 3]) out = out.numpy() # [[1, 2, 3], [1, 2, 3]] @@ -1322,11 +1274,6 @@ def reshape(x, shape, name=None): Returns: Tensor: A reshaped Tensor with the same data type as ``x``. - Raises: - ValueError: If more than one elements of ``shape`` is -1. - ValueError: If the element of ``shape`` is 0, the corresponding dimension should be less than or equal to the dimension of ``x``. - ValueError: If the elements in ``shape`` is negative except -1. - Examples: .. code-block:: python @@ -1413,23 +1360,16 @@ def gather_nd(x, index, name=None): Returns: output (Tensor): A tensor with the shape index.shape[:-1] + input.shape[index.shape[-1]:] - Raises: - TypeError: ``x`` must be a Tensor and the data type of ``x`` must be one of float32, float64, int32 and int64. - TypeError: ``index`` must be a Tensor and the data type of ``index`` must be one of int32 and int64. - Examples: .. code-block:: python import paddle - import numpy as np paddle.disable_static() - np_x = np.array([[[1, 2], [3, 4], [5, 6]], - [[7, 8], [9, 10], [11, 12]]]) - np_index = [[0, 1]] - x = paddle.to_tensor(np_x) - index = paddle.to_tensor(np_index) + x = paddle.to_tensor([[[1, 2], [3, 4], [5, 6]], + [[7, 8], [9, 10], [11, 12]]]) + index = paddle.to_tensor([[0, 1]]) output = paddle.gather_nd(x, index) #[[3, 4]] diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index d2db2a7cb71945e137e46d6793f8cba1f7adf12f..ed2bbe03a366054dfe7d798310c7fa5d419b44a8 100755 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -174,14 +174,12 @@ def pow(x, y, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() # example 1: y is a float - x_data = np.array([1, 2, 3]) + x = paddle.to_tensor([1, 2, 3]) y = 2 - x = paddle.to_tensor(x_data) res = paddle.pow(x, y) print(res.numpy()) # [1 4 9] @@ -291,13 +289,10 @@ Examples: .. code-block:: python import paddle - import numpy as np paddle.disable_static() - np_x = np.array([2, 3, 4]).astype('float64') - np_y = np.array([1, 5, 2]).astype('float64') - x = paddle.to_variable(np_x) - y = paddle.to_variable(np_y) + x = paddle.to_tensor([2, 3, 4], 'float64') + y = paddle.to_tensor([1, 5, 2], 'float64') z = paddle.add(x, y) np_z = z.numpy() print(np_z) # [3., 8., 6. ] @@ -335,14 +330,11 @@ def divide(x, y, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - np_x = np.array([2, 3, 4]).astype('float64') - np_y = np.array([1, 5, 2]).astype('float64') - x = paddle.to_tensor(np_x) - y = paddle.to_tensor(np_y) + x = paddle.to_tensor([2, 3, 4], dtype='float64') + y = paddle.to_tensor([1, 5, 2], dtype='float64') z = paddle.divide(x, y) print(z.numpy()) # [2., 0.6, 2.] @@ -440,14 +432,11 @@ def floor_divide(x, y, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - np_x = np.array([2, 3, 8, 7]) - np_y = np.array([1, 5, 3, 3]) - x = paddle.to_tensor(np_x) - y = paddle.to_tensor(np_y) + x = paddle.to_tensor([2, 3, 8, 7]) + y = paddle.to_tensor([1, 5, 3, 3]) z = paddle.floor_divide(x, y) print(z.numpy()) # [2, 0, 2, 2] @@ -530,14 +519,11 @@ def remainder(x, y, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - np_x = np.array([2, 3, 8, 7]) - np_y = np.array([1, 5, 3, 3]) - x = paddle.to_tensor(np_x) - y = paddle.to_tensor(np_y) + x = paddle.to_tensor([2, 3, 8, 7]) + y = paddle.to_tensor([1, 5, 3, 3]) z = paddle.remainder(x, y) print(z.numpy()) # [0, 3, 2, 1] @@ -612,20 +598,15 @@ def multiply(x, y, axis=-1, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - x_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - y_data = np.array([[5, 6], [7, 8]], dtype=np.float32) - x = paddle.to_tensor(x_data) - y = paddle.to_tensor(y_data) + x = paddle.to_tensor([[1, 2], [3, 4]]) + y = paddle.to_tensor([[5, 6], [7, 8]]) res = paddle.multiply(x, y) print(res.numpy()) # [[5, 12], [21, 32]] - x_data = np.array([[[1, 2, 3], [1, 2, 3]]], dtype=np.float32) - y_data = np.array([1, 2], dtype=np.float32) - x = paddle.to_tensor(x_data) - y = paddle.to_tensor(y_data) + x = paddle.to_tensor([[[1, 2, 3], [1, 2, 3]]]) + y = paddle.to_tensor([1, 2]) res = paddle.multiply(x, y, axis=1) print(res.numpy()) # [[[1, 2, 3], [2, 4, 6]]] @@ -654,36 +635,28 @@ Examples: paddle.disable_static() - x_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - y_data = np.array([[5, 6], [7, 8]], dtype=np.float32) - x = paddle.to_variable(x_data) - y = paddle.to_variable(y_data) + x = paddle.to_tensor([[1, 2], [3, 4]]) + y = paddle.to_tensor([[5, 6], [7, 8]]) res = paddle.maximum(x, y) print(res.numpy()) #[[5. 6.] # [7. 8.]] - x_data = np.array([[[1, 2, 3], [1, 2, 3]]], dtype=np.float32) - y_data = np.array([1, 2], dtype=np.float32) - x = paddle.to_variable(x_data) - y = paddle.to_variable(y_data) + x = paddle.to_tensor([[[1, 2, 3], [1, 2, 3]]]) + y = paddle.to_tensor([1, 2]) res = paddle.maximum(x, y, axis=1) print(res.numpy()) #[[[1. 2. 3.] # [2. 2. 3.]]] - x_data = np.array([2, 3, 5], dtype=np.float32) - y_data = np.array([1, 4, np.nan], dtype=np.float32) - x = paddle.to_variable(x_data) - y = paddle.to_variable(y_data) + x = paddle.to_tensor([2, 3, 5], dtype='float32') + y = paddle.to_tensor([1, 4, np.nan], dtype='float32') res = paddle.maximum(x, y) print(res.numpy()) #[ 2. 4. nan] - x_data = np.array([5, 3, np.inf], dtype=np.float32) - y_data = np.array([1, 4, 5], dtype=np.float32) - x = paddle.to_variable(x_data) - y = paddle.to_variable(y_data) + x = paddle.to_tensor([5, 3, np.inf], dtype='float32') + y = paddle.to_tensor([1, 4, 5], dtype='float32') res = paddle.maximum(x, y) print(res.numpy()) #[ 5. 4. inf] @@ -703,38 +676,31 @@ Examples: import paddle import numpy as np + paddle.disable_static() - x_data = np.array([[1, 2], [3, 4]], dtype=np.float32) - y_data = np.array([[5, 6], [7, 8]], dtype=np.float32) - x = paddle.to_variable(x_data) - y = paddle.to_variable(y_data) + x = paddle.to_tensor([[1, 2], [3, 4]], dtype='float32') + y = paddle.to_tensor([[5, 6], [7, 8]], dtype='float32') res = paddle.minimum(x, y) print(res.numpy()) #[[1. 2.] # [3. 4.]] - x_data = np.array([[[1, 2, 3], [1, 2, 3]]], dtype=np.float32) - y_data = np.array([1, 2], dtype=np.float32) - x = paddle.to_variable(x_data) - y = paddle.to_variable(y_data) + x = paddle.to_tensor([[[1, 2, 3], [1, 2, 3]]], dtype='float32') + y = paddle.to_tensor([1, 2], dtype='float32') res = paddle.minimum(x, y, axis=1) print(res.numpy()) #[[[1. 1. 1.] # [2. 2. 2.]]] - x_data = np.array([2, 3, 5], dtype=np.float32) - y_data = np.array([1, 4, np.nan], dtype=np.float32) - x = paddle.to_variable(x_data) - y = paddle.to_variable(y_data) + x = paddle.to_tensor([2, 3, 5], dtype='float32') + y = paddle.to_tensor([1, 4, np.nan], dtype='float32') res = paddle.minimum(x, y) print(res.numpy()) #[ 1. 3. nan] - x_data = np.array([5, 3, np.inf], dtype=np.float32) - y_data = np.array([1, 4, 5], dtype=np.float32) - x = paddle.to_variable(x_data) - y = paddle.to_variable(y_data) + x = paddle.to_tensor([5, 3, np.inf], dtype='float32') + y = paddle.to_tensor([1, 4, 5], dtype='float32') res = paddle.minimum(x, y) print(res.numpy()) #[1. 3. 5.] @@ -794,33 +760,33 @@ def sum(x, axis=None, dtype=None, keepdim=False, name=None): it's data type is the same as `x`. Raises: - ValueError: The :attr:`dtype` must be float64 or int64. + ValueError: If the data type of `x` is float64, :attr:`dtype` can not be float32 or int32. + ValueError: If the data type of `x` is int64, :attr:`dtype` can not be int32. TypeError: The type of :attr:`axis` must be int, list or tuple. Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - # x is a Tensor variable with following elements: + # x is a Tensor with following elements: # [[0.2, 0.3, 0.5, 0.9] # [0.1, 0.2, 0.6, 0.7]] # Each example is followed by the corresponding output tensor. - x_data = np.array([[0.2, 0.3, 0.5, 0.9],[0.1, 0.2, 0.6, 0.7]]).astype('float32') - x = paddle.to_variable(x_data) + x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9], + [0.1, 0.2, 0.6, 0.7]]) out1 = paddle.sum(x) # [3.5] out2 = paddle.sum(x, axis=0) # [0.3, 0.5, 1.1, 1.6] out3 = paddle.sum(x, axis=-1) # [1.9, 1.6] out4 = paddle.sum(x, axis=1, keepdim=True) # [[1.9], [1.6]] - # y is a Tensor variable with shape [2, 2, 2] and elements as below: + # y is a Tensor with shape [2, 2, 2] and elements as below: # [[[1, 2], [3, 4]], # [[5, 6], [7, 8]]] # Each example is followed by the corresponding output tensor. - y_data = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]).astype('float32') - y = paddle.to_variable(y_data) + y = paddle.to_tensor([[[1, 2], [3, 4]], + [[5, 6], [7, 8]]]) out5 = paddle.sum(y, axis=[1, 2]) # [10, 26] out6 = paddle.sum(y, axis=[0, 1]) # [16, 20] """ @@ -850,10 +816,6 @@ def sum(x, axis=None, dtype=None, keepdim=False, name=None): 'out_dtype': convert_np_dtype_to_dtype_(dtype) }) dtype_flag = True - else: - raise ValueError( - "The value of 'dtype' in sum op must be float64, int64, but received of {}". - format(dtype)) if in_dygraph_mode(): axis = axis if axis != None and axis != [] else [0] @@ -867,6 +829,17 @@ def sum(x, axis=None, dtype=None, keepdim=False, name=None): 'reduce_all', reduce_all_flag) check_variable_and_dtype( x, 'x', ['float32', 'float64', 'int32', 'int64'], 'sum') + + if dtype is not None: + check_dtype(dtype, 'dtype', ['float32', 'float64', 'int32', 'int64'], 'sum') + x_dtype = convert_dtype(x.dtype) + + if (x_dtype == "float64" and dtype in ["float32", "int32"]) or \ + (x_dtype == "int64" and dtype == "int32"): + raise ValueError("The input(x)'s dtype is {} but the attr(dtype) of sum is {}, " + "which may cause data type overflows. Please reset attr(dtype) of sum." + .format(x_dtype, dtype)) + check_type(axis, 'axis', (int, list, tuple, type(None)), 'sum') helper = LayerHelper('sum', **locals()) @@ -1121,9 +1094,9 @@ def addmm(input, x, y, beta=1.0, alpha=1.0, name=None): paddle.disable_static() - x = paddle.to_variable(data_x) - y = paddle.to_variable(data_y) - input = paddle.to_variable(data_input) + x = paddle.to_tensor(data_x) + y = paddle.to_tensor(data_y) + input = paddle.to_tensor(data_input) out = paddle.tensor.addmm( input=input, x=x, y=y, beta=0.5, alpha=5.0 ) @@ -1204,12 +1177,10 @@ def logsumexp(x, axis=None, keepdim=False, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - x = np.array([[-1.5, 0., 2.], [3., 1.2, -2.4]]) - x = paddle.to_tensor(x) + x = paddle.to_tensor([[-1.5, 0., 2.], [3., 1.2, -2.4]]) out1 = paddle.logsumexp(x) # [3.4691226] out2 = paddle.logsumexp(x, 1) # [2.15317821, 3.15684602] @@ -1260,12 +1231,10 @@ def inverse(x, name=None): Examples: .. code-block:: python - import numpy as np import paddle - - mat_np = np.array([[2, 0], [0, 2]]).astype("float32") paddle.disable_static() - mat = paddle.to_variable(mat_np) + + mat = paddle.to_tensor([[2, 0], [0, 2]], dtype='float32') inv = paddle.inverse(mat) print(inv) # [[0.5, 0], [0, 0.5]] @@ -1316,16 +1285,15 @@ def max(x, axis=None, keepdim=False, name=None): Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() # data_x is a variable with shape [2, 4] # the axis is a int element - data_x = np.array([[0.2, 0.3, 0.5, 0.9], - [0.1, 0.2, 0.6, 0.7]]) - x = paddle.to_variable(data_x) + + x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9], + [0.1, 0.2, 0.6, 0.7]]) result1 = paddle.max(x) print(result1.numpy()) #[0.9] @@ -1342,9 +1310,9 @@ def max(x, axis=None, keepdim=False, name=None): # data_y is a variable with shape [2, 2, 2] # the axis is list - data_y = np.array([[[1.0, 2.0], [3.0, 4.0]], - [[5.0, 6.0], [7.0, 8.0]]]) - y = paddle.to_variable(data_y) + + y = paddle.to_tensor([[[1.0, 2.0], [3.0, 4.0]], + [[5.0, 6.0], [7.0, 8.0]]]) result5 = paddle.max(y, axis=[1, 2]) print(result5.numpy()) #[4. 8.] @@ -1411,16 +1379,14 @@ def min(x, axis=None, keepdim=False, name=None): Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - # data_x is a variable with shape [2, 4] + # x is a tensor with shape [2, 4] # the axis is a int element - data_x = np.array([[0.2, 0.3, 0.5, 0.9], - [0.1, 0.2, 0.6, 0.7]]) - x = paddle.to_variable(data_x) + x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9], + [0.1, 0.2, 0.6, 0.7]]) result1 = paddle.min(x) print(result1.numpy()) #[0.1] @@ -1435,11 +1401,10 @@ def min(x, axis=None, keepdim=False, name=None): #[[0.2] # [0.1]] - # data_y is a variable with shape [2, 2, 2] + # y is a variable with shape [2, 2, 2] # the axis is list - data_y = np.array([[[1.0, 2.0], [3.0, 4.0]], - [[5.0, 6.0], [7.0, 8.0]]]) - y = paddle.to_variable(data_y) + y = paddle.to_tensor([[[1.0, 2.0], [3.0, 4.0]], + [[5.0, 6.0], [7.0, 8.0]]]) result5 = paddle.min(y, axis=[1, 2]) print(result5.numpy()) #[1. 5.] @@ -1596,11 +1561,9 @@ def clip(x, min=None, max=None, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - x = np.array([[1.2,3.5], [4.5,6.4]]).astype('float32') - x1 = paddle.to_variable(x) + x1 = paddle.to_tensor([[1.2, 3.5], [4.5, 6.4]], 'float32') out1 = paddle.clip(x1, min=3.5, max=5.0) out2 = paddle.clip(x1, min=2.5) print(out1.numpy()) @@ -1611,11 +1574,8 @@ def clip(x, min=None, max=None, name=None): # [[4.5, 6.4] """ - np_dtype = np.float32 - if x.dtype == VarDesc.VarType.FP64: - np_dtype = np.float64 - fmin = float(np.finfo(np_dtype).min) - fmax = float(np.finfo(np_dtype).max) + fmin = float(np.finfo(np.float32).min) + fmax = float(np.finfo(np.float32).max) if in_dygraph_mode(): if isinstance(min, Variable): @@ -1656,7 +1616,7 @@ def clip(x, min=None, max=None, name=None): helper = LayerHelper('clip', **locals()) output = helper.create_variable_for_type_inference( - dtype=helper.input_dtype()) + dtype=helper.input_dtype('x')) helper.append_op( type='clip', inputs=inputs, outputs={'Out': [output]}, attrs=attrs) @@ -1704,9 +1664,9 @@ def trace(x, offset=0, axis1=0, axis2=1, name=None): paddle.disable_static() - case1 = paddle.to_variable(case1) - case2 = paddle.to_variable(case2) - case3 = paddle.to_variable(case3) + case1 = paddle.to_tensor(case1) + case2 = paddle.to_tensor(case2) + case3 = paddle.to_tensor(case3) data1 = paddle.trace(case1) # data1.shape = [1] data2 = paddle.trace(case2, offset=1, axis1=1, axis2=2) # data2.shape = [3] data3 = paddle.trace(case3, offset=-3, axis1=1, axis2=-1) # data2.shape = [3, 5] @@ -1897,10 +1857,8 @@ def isfinite(x, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - x_np = np.array([float('-inf'), -2, 3.6, float('inf'), 0, float('-nan'), float('nan')]) - x = paddle.to_tensor(x_np) + x = paddle.to_tensor([float('-inf'), -2, 3.6, float('inf'), 0, float('-nan'), float('nan')]) out = paddle.tensor.isfinite(x) print(out.numpy()) # [False True True False True False False] """ @@ -1928,10 +1886,8 @@ def isinf(x, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - x_np = np.array([float('-inf'), -2, 3.6, float('inf'), 0, float('-nan'), float('nan')]) - x = paddle.to_tensor(x_np) + x = paddle.to_tensor([float('-inf'), -2, 3.6, float('inf'), 0, float('-nan'), float('nan')]) out = paddle.tensor.isinf(x) print(out.numpy()) # [ True False False True False False False] """ @@ -1959,10 +1915,8 @@ def isnan(x, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - x_np = np.array([float('-inf'), -2, 3.6, float('inf'), 0, float('-nan'), float('nan')]) - x = paddle.to_tensor(x_np) + x = paddle.to_tensor([float('-inf'), -2, 3.6, float('inf'), 0, float('-nan'), float('nan')]) out = paddle.tensor.isnan(x) print(out.numpy()) # [False False False False False True True] """ @@ -2005,14 +1959,12 @@ def prod(x, axis=None, keepdim=False, dtype=None, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() # the axis is a int element - data_x = np.array([[0.2, 0.3, 0.5, 0.9], - [0.1, 0.2, 0.6, 0.7]]).astype(np.float32) - x = paddle.to_tensor(data_x) + x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9], + [0.1, 0.2, 0.6, 0.7]]) out1 = paddle.prod(x) print(out1.numpy()) # [0.0002268] @@ -2038,9 +1990,8 @@ def prod(x, axis=None, keepdim=False, dtype=None, name=None): # int64 # the axis is list - data_y = np.array([[[1.0, 2.0], [3.0, 4.0]], - [[5.0, 6.0], [7.0, 8.0]]]) - y = paddle.to_tensor(data_y) + y = paddle.to_tensor([[[1.0, 2.0], [3.0, 4.0]], + [[5.0, 6.0], [7.0, 8.0]]]) out6 = paddle.prod(y, [0, 1]) print(out6.numpy()) # [105. 384.] @@ -2073,12 +2024,10 @@ def sign(x, name=None): Examples: .. code-block:: python - import numpy as np import paddle - data = np.array([3.0, 0.0, -2.0, 1.7], dtype='float32') paddle.disable_static() - x = paddle.to_tensor(data) + x = paddle.to_tensor([3.0, 0.0, -2.0, 1.7], dtype='float32') out = paddle.sign(x=x) print(out) # [1.0, 0.0, -1.0, 1.0] """ @@ -2113,12 +2062,9 @@ def tanh(x, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() - - x_data = np.array([-0.4, -0.2, 0.1, 0.3]) - x = paddle.to_tensor(x_data) + x = paddle.to_tensor([-0.4, -0.2, 0.1, 0.3]) out = paddle.tanh(x) print(out.numpy()) # [-0.37994896 -0.19737532 0.09966799 0.29131261] diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index 6b08599fad1dfc6b5d60c3798bba802a5ddefd02..b38a1d0f5b7e92b0eac907170aad76a2b5c69bc1 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -14,17 +14,12 @@ # TODO: define random functions -import numpy as np - from ..fluid import core -from ..fluid.framework import device_guard, in_dygraph_mode, _varbase_creator, Variable, convert_np_dtype_to_dtype_ -from ..fluid.layers.layer_function_generator import templatedoc +from ..fluid.framework import in_dygraph_mode, Variable, convert_np_dtype_to_dtype_ from ..fluid.layer_helper import LayerHelper -from ..fluid.data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype +from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtype, check_shape from ..fluid.layers import utils -from ..fluid.layers.tensor import fill_constant import paddle -import warnings from ..fluid.io import shuffle #DEFINE_ALIAS @@ -65,7 +60,6 @@ def bernoulli(x, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() @@ -94,26 +88,26 @@ def bernoulli(x, name=None): return out -def gaussian_random(shape, mean=0.0, std=1.0, dtype=None, name=None): +def gaussian(shape, mean=0.0, std=1.0, dtype=None, name=None): """ This OP returns a Tensor filled with random values sampled from a Gaussian distribution, with ``shape`` and ``dtype``. Args: - shape(list|tuple|Tensor): The shape of the output Tensor. If ``shape`` + shape (list|tuple|Tensor): The shape of the output Tensor. If ``shape`` is a list or tuple, the elements of it should be integers or Tensors (with the shape [1], and the data type int32 or int64). If ``shape`` is a Tensor, it should be a 1-D Tensor(with the data type int32 or int64). - mean(float|int, optional): Mean of the output tensor, default is 0.0. - std(float|int, optional): Standard deviation of the output tensor, default + mean (float|int, optional): Mean of the output tensor, default is 0.0. + std (float|int, optional): Standard deviation of the output tensor, default is 1.0. - seed(int, optional): ${seed_comment} - dtype(str|np.dtype, optional): The data type of the output Tensor. + seed (int, optional): Random seed of generator. + dtype (str|np.dtype, optional): The data type of the output Tensor. Supported data types: float32, float64. Default is None, use global default dtype (see ``get_default_dtype`` for details). - name(str, optional): The default value is None. Normally there is no + name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. @@ -121,26 +115,26 @@ def gaussian_random(shape, mean=0.0, std=1.0, dtype=None, name=None): Tensor: A Tensor filled with random values sampled from a Gaussian distribution, with ``shape`` and ``dtype``. """ + op_type_for_check = 'gaussian/standard_normal/randn/normal' + seed = 0 + if dtype is None: dtype = paddle.framework.get_default_dtype() if dtype not in ['float32', 'float64']: raise TypeError( - "gaussian_random only supports [float32, float64], but the default dtype is %s" - % dtype) - + "{} only supports [float32, float64], but the default dtype is {}" + .format(op_type_for_check, dtype)) if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) - seed = 0 - op_type_for_check = 'gaussian_random/standard_normal/randn/normal' if in_dygraph_mode(): - shape = utils._convert_shape_to_list(shape) + shape = utils.convert_shape_to_list(shape) return core.ops.gaussian_random('shape', shape, 'mean', float(mean), 'std', float(std), 'seed', seed, 'dtype', dtype) - check_type(shape, 'shape', (list, tuple, Variable), op_type_for_check) + check_shape(shape, op_type_for_check) check_dtype(dtype, 'dtype', ['float32', 'float64'], op_type_for_check) inputs = {} @@ -151,10 +145,10 @@ def gaussian_random(shape, mean=0.0, std=1.0, dtype=None, name=None): 'dtype': dtype, 'use_mkldnn': False } - utils._get_shape_tensor_inputs( + utils.get_shape_tensor_inputs( inputs=inputs, attrs=attrs, shape=shape, op_type=op_type_for_check) - helper = LayerHelper('gaussian_random', **locals()) + helper = LayerHelper('gaussian', **locals()) out = helper.create_variable_for_type_inference(dtype) helper.append_op( type='gaussian_random', @@ -172,12 +166,12 @@ def standard_normal(shape, dtype=None, name=None): and ``dtype``. Args: - shape(list|tuple|Tensor): The shape of the output Tensor. If ``shape`` + shape (list|tuple|Tensor): The shape of the output Tensor. If ``shape`` is a list or tuple, the elements of it should be integers or Tensors (with the shape [1], and the data type int32 or int64). If ``shape`` is a Tensor, it should be a 1-D Tensor(with the data type int32 or int64). - dtype(str|np.dtype, optional): The data type of the output Tensor. + dtype (str|np.dtype, optional): The data type of the output Tensor. Supported data types: float32, float64. Default is None, use global default dtype (see ``get_default_dtype`` for details). @@ -189,27 +183,22 @@ def standard_normal(shape, dtype=None, name=None): normal distribution with mean 0 and standard deviation 1, with ``shape`` and ``dtype``. - Raises: - TypeError: If ``shape`` is not list, tuple, Tensor. - TypeError: If ``dtype`` is not float32, float64. - Examples: .. code-block:: python import paddle - import numpy as np paddle.disable_static() # example 1: attr shape is a list which doesn't contain Tensor. - result_1 = paddle.standard_normal(shape=[2, 3]) + out1 = paddle.standard_normal(shape=[2, 3]) # [[-2.923464 , 0.11934398, -0.51249987], # random # [ 0.39632758, 0.08177969, 0.2692008 ]] # random # example 2: attr shape is a list which contains Tensor. - dim_1 = paddle.fill_constant([1], "int64", 2) - dim_2 = paddle.fill_constant([1], "int32", 3) - result_2 = paddle.standard_normal(shape=[dim_1, dim_2, 2]) + dim1 = paddle.full([1], 2, "int64") + dim2 = paddle.full([1], 3, "int32") + out2 = paddle.standard_normal(shape=[dim1, dim2, 2]) # [[[-2.8852394 , -0.25898588], # random # [-0.47420555, 0.17683524], # random # [-0.7989969 , 0.00754541]], # random @@ -218,21 +207,14 @@ def standard_normal(shape, dtype=None, name=None): # [ 0.8086993 , 0.6868893 ]]] # random # example 3: attr shape is a Tensor, the data type must be int64 or int32. - var_shape = paddle.to_tensor(np.array([2, 3])) - result_3 = paddle.standard_normal(var_shape) + shape_tensor = paddle.to_tensor([2, 3]) + result_3 = paddle.standard_normal(shape_tensor) + # [[-2.878077 , 0.17099959, 0.05111201] # random # [-0.3761474, -1.044801 , 1.1870178 ]] # random """ - if dtype is None: - dtype = paddle.framework.get_default_dtype() - if dtype not in ['float32', 'float64']: - raise TypeError( - "standard_normal only supports [float32, float64], but the default dtype is %s" - % dtype) - - return gaussian_random( - shape=shape, mean=0.0, std=1.0, dtype=dtype, name=name) + return gaussian(shape=shape, mean=0.0, std=1.0, dtype=dtype, name=name) randn = standard_normal @@ -275,7 +257,6 @@ def normal(mean=0.0, std=1.0, shape=None, name=None): .. code-block:: python import paddle - import numpy as np paddle.disable_static() @@ -283,11 +264,11 @@ def normal(mean=0.0, std=1.0, shape=None, name=None): # [[ 0.17501129 0.32364586 1.561118 ] # random # [-1.7232178 1.1545963 -0.76156676]] # random - mean_tensor = paddle.to_tensor(np.array([1.0, 2.0, 3.0])) + mean_tensor = paddle.to_tensor([1.0, 2.0, 3.0]) out2 = paddle.normal(mean=mean_tensor) # [ 0.18644847 -1.19434458 3.93694787] # random - std_tensor = paddle.to_tensor(np.array([1.0, 2.0, 3.0])) + std_tensor = paddle.to_tensor([1.0, 2.0, 3.0]) out3 = paddle.normal(mean=mean_tensor, std=std_tensor) # [1.00780561 3.78457445 5.81058198] # random @@ -306,16 +287,7 @@ def normal(mean=0.0, std=1.0, shape=None, name=None): "If std is Tensor, it's data type only support float32, float64." ) if shape is not None: - if isinstance(shape, (list, tuple)): - for item in shape: - check_type(item, 'shape', (int), 'normal', - 'Elements of shape should be int.') - elif isinstance(shape, Variable): - check_dtype(shape.dtype, 'shape', ['int32', 'int64'], 'normal') - else: - assert TypeError( - 'If mean and std are all not Tensor, shape should be list, tuple, Tensor.' - ) + check_shape(shape, 'normal') if isinstance(mean, Variable): if isinstance(std, Variable): @@ -330,7 +302,7 @@ def normal(mean=0.0, std=1.0, shape=None, name=None): mean = float(mean) out = standard_normal(paddle.shape(std), std.dtype, name) else: - return gaussian_random(shape=shape, mean=mean, std=std, name=name) + return gaussian(shape=shape, mean=mean, std=std, name=name) out = out * std + mean if not in_dygraph_mode(): @@ -383,7 +355,6 @@ def uniform(shape, dtype=None, min=-1.0, max=1.0, seed=0, name=None): Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() @@ -405,8 +376,7 @@ def uniform(shape, dtype=None, min=-1.0, max=1.0, seed=0, name=None): # example 3: # attr shape is a Tensor, the data type must be int64 or int32. - shape = np.array([2, 3]) - shape_tensor = paddle.to_tensor(shape) + shape_tensor = paddle.to_tensor([2, 3]) result_3 = paddle.tensor.random.uniform(shape_tensor) # if shape_tensor's value is [2, 3] # result_3 is: @@ -419,27 +389,27 @@ def uniform(shape, dtype=None, min=-1.0, max=1.0, seed=0, name=None): dtype = paddle.framework.get_default_dtype() if dtype not in ['float32', 'float64']: raise TypeError( - "uniform only supports [float32, float64], but the default dtype is %s" - % dtype) + "uniform/rand only supports [float32, float64], but the default dtype is {}". + format(dtype)) if not isinstance(dtype, core.VarDesc.VarType): dtype = convert_np_dtype_to_dtype_(dtype) if in_dygraph_mode(): - shape = utils._convert_shape_to_list(shape) + shape = utils.convert_shape_to_list(shape) return core.ops.uniform_random('shape', shape, 'min', float(min), 'max', float(max), 'seed', seed, 'dtype', dtype) - check_type(shape, 'shape', (list, tuple, Variable), 'uniform_random/rand') - check_dtype(dtype, 'dtype', ('float32', 'float64'), 'uniform_random/rand') + check_type(shape, 'shape', (list, tuple, Variable), 'uniform/rand') + check_dtype(dtype, 'dtype', ('float32', 'float64'), 'uniform/rand') inputs = dict() attrs = {'seed': seed, 'min': min, 'max': max, 'dtype': dtype} - utils._get_shape_tensor_inputs( - inputs=inputs, attrs=attrs, shape=shape, op_type='uniform_random/rand') + utils.get_shape_tensor_inputs( + inputs=inputs, attrs=attrs, shape=shape, op_type='uniform/rand') - helper = LayerHelper("uniform_random", **locals()) + helper = LayerHelper("uniform", **locals()) out = helper.create_variable_for_type_inference(dtype) helper.append_op( type="uniform_random", inputs=inputs, attrs=attrs, @@ -449,29 +419,26 @@ def uniform(shape, dtype=None, min=-1.0, max=1.0, seed=0, name=None): def randint(low=0, high=None, shape=[1], dtype=None, name=None): """ - :alias_main: paddle.randint - :alias: paddle.tensor.randint, paddle.tensor.random.randint - This OP returns a Tensor filled with random integers from a discrete uniform distribution in the range [``low``, ``high``), with ``shape`` and ``dtype``. If ``high`` is None (the default), the range is [0, ``low``). Args: - low(int): The lower bound on the range of random values to generate. + low (int): The lower bound on the range of random values to generate. The ``low`` is included in the range. If ``high`` is None, the range is [0, ``low``). Default is 0. - high(int, optional): The upper bound on the range of random values to + high (int, optional): The upper bound on the range of random values to generate, the ``high`` is excluded in the range. Default is None (see above for behavior if high = None). Default is None. - shape(list|tuple|Tensor): The shape of the output Tensor. If ``shape`` + shape (list|tuple|Tensor): The shape of the output Tensor. If ``shape`` is a list or tuple, the elements of it should be integers or Tensors (with the shape [1], and the data type int32 or int64). If ``shape`` is a Tensor, it should be a 1-D Tensor(with the data type int32 or int64). Default is [1]. - dtype(str|np.dtype, optional): The data type of the + dtype (str|np.dtype, optional): The data type of the output tensor. Supported data types: int32, int64. If ``dytpe`` is None, the data type is int64. Default is None. - name(str, optional): The default value is None. Normally there is no + name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. @@ -479,48 +446,43 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None): Tensor: A Tensor filled with random integers from a discrete uniform distribution in the range [``low``, ``high``), with ``shape`` and ``dtype``. - Raises: - TypeError: If ``shape`` is not list, tuple, Tensor. - TypeError: If ``dtype`` is not int32, int64. - ValueError: If ``high`` is not greater then ``low``; If ``high`` is - None, and ``low`` is not greater than 0. - Examples: .. code-block:: python import paddle - import numpy as np paddle.disable_static() # example 1: # attr shape is a list which doesn't contain Tensor. - result_1 = paddle.randint(low=-5, high=5, shape=[3]) + out1 = paddle.randint(low=-5, high=5, shape=[3]) # [0, -3, 2] # random # example 2: # attr shape is a list which contains Tensor. - dim_1 = paddle.fill_constant([1], "int64", 2) - dim_2 = paddle.fill_constant([1], "int32", 3) - result_2 = paddle.randint(low=-5, high=5, shape=[dim_1, dim_2], dtype="int32") + dim1 = paddle.full([1], 2, "int64") + dim2 = paddle.full([1], 3, "int32") + out2 = paddle.randint(low=-5, high=5, shape=[dim1, dim2], dtype="int32") # [[0, -1, -3], # random # [4, -2, 0]] # random # example 3: # attr shape is a Tensor - var_shape = paddle.to_variable(np.array([3])) - result_3 = paddle.randint(low=-5, high=5, shape=var_shape) + + shape_tensor = paddle.to_tensor(3) + result_3 = paddle.randint(low=-5, high=5, shape=shape_tensor) + # [-2, 2, 3] # random # example 4: # data type is int32 - result_4 = paddle.randint(low=-5, high=5, shape=[3], dtype='int32') + out4 = paddle.randint(low=-5, high=5, shape=[3], dtype='int32') # [-5, 4, -4] # random # example 5: # Input only one parameter # low=0, high=10, shape=[1], dtype='int64' - result_5 = paddle.randint(10) + out5 = paddle.randint(10) # [7] # random """ @@ -537,11 +499,11 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None): dtype = convert_np_dtype_to_dtype_(dtype) if in_dygraph_mode(): - shape = utils._convert_shape_to_list(shape) + shape = utils.convert_shape_to_list(shape) return core.ops.randint('shape', shape, 'low', low, 'high', high, 'seed', 0, 'dtype', dtype) - check_type(shape, 'shape', (list, tuple, Variable), 'randint') + check_shape(shape, 'randint') check_dtype(dtype, 'dtype', ['int32', 'int64'], 'randint') if low >= high: raise ValueError( @@ -550,7 +512,7 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None): inputs = dict() attrs = {'low': low, 'high': high, 'seed': 0, 'dtype': dtype} - utils._get_shape_tensor_inputs( + utils.get_shape_tensor_inputs( inputs=inputs, attrs=attrs, shape=shape, op_type='randint') helper = LayerHelper("randint", **locals()) @@ -560,21 +522,17 @@ def randint(low=0, high=None, shape=[1], dtype=None, name=None): return out -@templatedoc() def randperm(n, dtype="int64", name=None): """ - :alias_main: paddle.randperm - :alias: paddle.tensor.randperm, paddle.tensor.random.randperm - This OP returns a 1-D Tensor filled with random permutation values from 0 to n-1, with ``dtype``. Args: - n(int): The upper bound (exclusive), and it should be greater than 0. - dtype(str|np.dtype, optional): The data type of + n (int): The upper bound (exclusive), and it should be greater than 0. + dtype (str|np.dtype, optional): The data type of the output Tensor. Supported data types: int32, int64, float32, float64. Default is int64. - name(str, optional): The default value is None. Normally there is no + name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. @@ -582,10 +540,6 @@ def randperm(n, dtype="int64", name=None): Tensor: A 1-D Tensor filled with random permutation values from 0 to n-1, with ``dtype``. - Raises: - ValueError: If ``n`` is not greater than 0. - TypeError: If ``dtype`` is not int32, int64, float32, float64. - Examples: .. code-block:: python @@ -593,10 +547,10 @@ def randperm(n, dtype="int64", name=None): paddle.disable_static() - result_1 = paddle.randperm(5) + out1 = paddle.randperm(5) # [4, 1, 2, 3, 0] # random - result_2 = paddle.randperm(7, 'int32') + out2 = paddle.randperm(7, 'int32') # [1, 6, 2, 0, 4, 3, 5] # random """ @@ -622,32 +576,20 @@ def randperm(n, dtype="int64", name=None): def rand(shape, dtype=None, name=None): """ - :alias_main: paddle.rand - :alias: paddle.tensor.rand, paddle.tensor.random.rand - This OP returns a Tensor filled with random values sampled from a uniform distribution in the range [0, 1), with ``shape`` and ``dtype``. - Examples: - :: - - Input: - shape = [1, 2] - - Output: - result=[[0.8505902, 0.8397286]] - Args: - shape(list|tuple|Tensor): The shape of the output Tensor. If ``shape`` + shape (list|tuple|Tensor): The shape of the output Tensor. If ``shape`` is a list or tuple, the elements of it should be integers or Tensors (with the shape [1], and the data type int32 or int64). If ``shape`` is a Tensor, it should be a 1-D Tensor(with the data type int32 or int64). - dtype(str|np.dtype, optional): The data type of the output Tensor. + dtype (str|np.dtype, optional): The data type of the output Tensor. Supported data types: float32, float64. Default is None, use global default dtype (see ``get_default_dtype`` for details). - name(str, optional): The default value is None. Normally there is no + name (str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. @@ -655,26 +597,21 @@ def rand(shape, dtype=None, name=None): Tensor: A Tensor filled with random values sampled from a uniform distribution in the range [0, 1), with ``shape`` and ``dtype``. - Raises: - TypeError: If ``shape`` is not list, tuple, Tensor. - ValueError: If ``dtype`` is not float32, float64. - Examples: .. code-block:: python import paddle - import numpy as np paddle.disable_static() # example 1: attr shape is a list which doesn't contain Tensor. - result_1 = paddle.rand(shape=[2, 3]) + out1 = paddle.rand(shape=[2, 3]) # [[0.451152 , 0.55825245, 0.403311 ], # random # [0.22550228, 0.22106001, 0.7877319 ]] # random # example 2: attr shape is a list which contains Tensor. - dim_1 = paddle.fill_constant([1], "int64", 2) - dim_2 = paddle.fill_constant([1], "int32", 3) - result_2 = paddle.rand(shape=[dim_1, dim_2, 2]) + dim1 = paddle.full([1], 2, "int64") + dim2 = paddle.full([1], 3, "int32") + out2 = paddle.rand(shape=[dim1, dim2, 2]) # [[[0.8879919 , 0.25788337], # random # [0.28826773, 0.9712097 ], # random # [0.26438272, 0.01796806]], # random @@ -683,19 +620,11 @@ def rand(shape, dtype=None, name=None): # [0.870881 , 0.2984597 ]]] # random # example 3: attr shape is a Tensor, the data type must be int64 or int32. - var_shape = paddle.to_variable(np.array([2, 3])) - result_3 = paddle.rand(var_shape) + shape_tensor = paddle.to_tensor([2, 3]) + result_3 = paddle.rand(shape_tensor) + # [[0.22920267, 0.841956 , 0.05981819], # random # [0.4836288 , 0.24573246, 0.7516129 ]] # random """ - if dtype is None: - dtype = paddle.framework.get_default_dtype() - if dtype not in ['float32', 'float64']: - raise TypeError( - "rand only supports [float32, float64], but the default dtype is %s" - % dtype) - - out = uniform(shape, dtype, min=0.0, max=1.0, name=name) - out.stop_gradient = True - return out + return uniform(shape, dtype, min=0.0, max=1.0, name=name) diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py index eede022e05ba61bc23da517e7af7cd2eb58f5416..ce03d0ef15f0f80f4e01cf57bc8cc449186c2560 100644 --- a/python/paddle/tensor/search.py +++ b/python/paddle/tensor/search.py @@ -18,7 +18,6 @@ from ..fluid.data_feeder import check_variable_and_dtype, check_type, check_dtyp from ..fluid import core, layers # TODO: define searching & indexing functions of a tensor -from ..fluid.layers import argmin #DEFINE_ALIAS from ..fluid.layers import has_inf #DEFINE_ALIAS from ..fluid.layers import has_nan #DEFINE_ALIAS @@ -67,16 +66,15 @@ def argsort(x, axis=-1, descending=False, name=None): Examples: .. code-block:: python import paddle - import numpy as np paddle.disable_static() - input_array = np.array([[[5,8,9,5], - [0,0,1,7], - [6,9,2,4]], - [[5,2,4,2], - [4,7,7,9], - [1,7,0,6]]]).astype(np.float32) - x = paddle.to_variable(input_array) + x = paddle.to_tensor([[[5,8,9,5], + [0,0,1,7], + [6,9,2,4]], + [[5,2,4,2], + [4,7,7,9], + [1,7,0,6]]], + dtype='float32') out1 = paddle.argsort(x=x, axis=-1) out2 = paddle.argsort(x=x, axis=0) out3 = paddle.argsort(x=x, axis=1) @@ -124,7 +122,7 @@ def argsort(x, axis=-1, descending=False, name=None): return ids -def argmax(x, axis=None, dtype=None, keepdim=False, name=None): +def argmax(x, axis=None, keepdim=False, dtype="int64", name=None): """ This OP computes the indices of the max elements of the input tensor's element along the provided axis. @@ -135,10 +133,10 @@ def argmax(x, axis=None, dtype=None, keepdim=False, name=None): axis(int, optional): Axis to compute indices along. The effective range is [-R, R), where R is x.ndim. when axis < 0, it works the same way as axis + R. Default is None, the input `x` will be into the flatten tensor, and selecting the min value index. - dtype(str): Data type of the output tensor which can - be int32, int64. The default value is None, and it will - return the int64 indices. keepdim(bool, optional): Keep the axis that selecting max. The defalut value is False. + dtype(str|np.dtype, optional): Data type of the output tensor which can + be int32, int64. The default value is 'int64', and it will + return the int64 indices. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. @@ -149,14 +147,12 @@ def argmax(x, axis=None, dtype=None, keepdim=False, name=None): Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - data = np.array([[5,8,9,5], - [0,0,1,7], - [6,9,2,4]]) - x = paddle.to_variable(data) + x = paddle.to_tensor([[5,8,9,5], + [0,0,1,7], + [6,9,2,4]]) out1 = paddle.argmax(x) print(out1.numpy()) # 2 out2 = paddle.argmax(x, axis=1) @@ -166,48 +162,45 @@ def argmax(x, axis=None, dtype=None, keepdim=False, name=None): print(out3.numpy()) # [2 3 1] """ + if axis is not None and not isinstance(axis, int): + raise TypeError( + "The type of 'axis' must be int or None in argmax, but received %s." + % (type(axis))) + + if not (isinstance(dtype, str) or isinstance(dtype, np.dtype)): + raise TypeError( + "the type of 'dtype' in argmax must be str or np.dtype, but received {}". + format(type(dtype))) + + var_dtype = convert_np_dtype_to_dtype_(dtype) + check_dtype(var_dtype, 'dtype', ['int32', 'int64'], 'argmin') flatten = False if axis is None: flatten = True axis = 0 if in_dygraph_mode(): - if dtype != None: - var_dtype = convert_np_dtype_to_dtype_(dtype) - out = core.ops.arg_max(x, 'axis', axis, 'dtype', var_dtype, - 'keepdim', keepdim, 'flatten', flatten) - else: - out = core.ops.arg_max(x, 'axis', axis, 'keepdim', keepdim, - 'flatten', flatten) + out = core.ops.arg_max(x, 'axis', axis, 'dtype', var_dtype, 'keepdims', + keepdim, 'flatten', flatten) return out helper = LayerHelper("argmax", **locals()) check_variable_and_dtype( x, 'x', ['float32', 'float64', 'int16', 'int32', 'int64', 'uint8'], 'paddle.argmax') - var_dtype = None attrs = {} - if dtype is not None: - if dtype not in ['int32', 'int64']: - raise ValueError( - "The value of 'dtype' in argmax op must be int32, int64, but received of {}". - format(dtype)) - var_dtype = convert_np_dtype_to_dtype_(dtype) - attrs["dtype"] = var_dtype - else: - var_dtype = VarDesc.VarType.INT64 - out = helper.create_variable_for_type_inference(var_dtype) attrs['keepdims'] = keepdim attrs['axis'] = axis attrs['flatten'] = flatten + attrs['dtype'] = var_dtype helper.append_op( type='arg_max', inputs={'X': x}, outputs={'Out': [out]}, attrs=attrs) out.stop_gradient = True return out -def argmin(x, axis=None, dtype=None, keepdim=False, name=None): +def argmin(x, axis=None, keepdim=False, dtype="int64", name=None): """ This OP computes the indices of the min elements of the input tensor's element along the provided axis. @@ -218,10 +211,10 @@ def argmin(x, axis=None, dtype=None, keepdim=False, name=None): axis(int, optional): Axis to compute indices along. The effective range is [-R, R), where R is x.ndim. when axis < 0, it works the same way as axis + R. Default is None, the input `x` will be into the flatten tensor, and selecting the min value index. + keepdim(bool, optional): Keep the axis that selecting min. The defalut value is False. dtype(str): Data type of the output tensor which can - be int32, int64. The default value is None, and it will + be int32, int64. The default value is 'int64', and it will return the int64 indices. - keepdim(bool, optional): Keep the axis that selecting min. The defalut value is False. name(str, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. @@ -232,14 +225,12 @@ def argmin(x, axis=None, dtype=None, keepdim=False, name=None): Examples: .. code-block:: python - import numpy as np import paddle paddle.disable_static() - data = np.array([[5,8,9,5], - [0,0,1,7], - [6,9,2,4]]) - x = paddle.to_variable(data) + x = paddle.to_tensor([[5,8,9,5], + [0,0,1,7], + [6,9,2,4]]) out1 = paddle.argmin(x) print(out1.numpy()) # 4 out2 = paddle.argmin(x, axis=1) @@ -249,41 +240,38 @@ def argmin(x, axis=None, dtype=None, keepdim=False, name=None): print(out3.numpy()) # [0 0 2] """ + if axis is not None and not isinstance(axis, int): + raise TypeError( + "The type of 'axis' must be int or None in argmin, but received %s." + % (type(axis))) + + if not (isinstance(dtype, str) or isinstance(dtype, np.dtype)): + raise TypeError( + "the type of 'dtype' in argmin must be str or np.dtype, but received {}". + format(dtype(dtype))) + + var_dtype = convert_np_dtype_to_dtype_(dtype) + check_dtype(var_dtype, 'dtype', ['int32', 'int64'], 'argmin') flatten = False if axis is None: flatten = True axis = 0 if in_dygraph_mode(): - if dtype != None: - var_dtype = convert_np_dtype_to_dtype_(dtype) - out = core.ops.arg_min(x, 'axis', axis, 'dtype', var_dtype, - 'keepdim', keepdim, 'flatten', flatten) - else: - out = core.ops.arg_min(x, 'axis', axis, 'keepdim', keepdim, - 'flatten', flatten) + out = core.ops.arg_min(x, 'axis', axis, 'dtype', var_dtype, 'keepdims', + keepdim, 'flatten', flatten) return out helper = LayerHelper("argmin", **locals()) check_variable_and_dtype( x, 'x', ['float32', 'float64', 'int16', 'int32', 'int64', 'uint8'], 'paddle.argmin') - var_dtype = None - attrs = {} - if dtype is not None: - if dtype not in ['int32', 'int64']: - raise ValueError( - "The value of 'dtype' in argmin op must be int32, int64, but received of {}". - format(dtype)) - var_dtype = convert_np_dtype_to_dtype_(dtype) - attrs["dtype"] = var_dtype - else: - var_dtype = VarDesc.VarType.INT64 - out = helper.create_variable_for_type_inference(var_dtype) + attrs = {} attrs['keepdims'] = keepdim attrs['axis'] = axis attrs['flatten'] = flatten + attrs['dtype'] = var_dtype helper.append_op( type='arg_min', inputs={'X': x}, outputs={'Out': [out]}, attrs=attrs) out.stop_gradient = True @@ -311,24 +299,16 @@ def index_select(x, index, axis=0, name=None): Returns: Tensor: A Tensor with same data type as ``x``. - Raises: - TypeError: ``x`` must be a Tensor and the data type of ``x`` must be one of float32, float64, int32 and int64. - TypeError: ``index`` must be a Tensor and the data type of ``index`` must be int32 or int64. - Examples: .. code-block:: python import paddle - import numpy as np paddle.disable_static() # Now we are in imperative mode - data = np.array([[1.0, 2.0, 3.0, 4.0], - [5.0, 6.0, 7.0, 8.0], - [9.0, 10.0, 11.0, 12.0]]) - data_index = np.array([0, 1, 1]).astype('int32') - - x = paddle.to_tensor(data) - index = paddle.to_tensor(data_index) + x = paddle.to_tensor([[1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, 11.0, 12.0]]) + index = paddle.to_tensor([0, 1, 1], dtype='int32') out_z1 = paddle.index_select(x=x, index=index) #[[1. 2. 3. 4.] # [5. 6. 7. 8.] @@ -382,48 +362,44 @@ def nonzero(input, as_tuple=False): Examples: .. code-block:: python import paddle - import paddle.fluid as fluid - import numpy as np - - data1 = np.array([[1.0, 0.0, 0.0], - [0.0, 2.0, 0.0], - [0.0, 0.0, 3.0]]) - data2 = np.array([0.0, 1.0, 0.0, 3.0]) - data3 = np.array([0.0, 0.0, 0.0]) - with fluid.dygraph.guard(): - x1 = fluid.dygraph.to_variable(data1) - x2 = fluid.dygraph.to_variable(data2) - x3 = fluid.dygraph.to_variable(data3) - out_z1 = paddle.nonzero(x1) - print(out_z1.numpy()) - #[[0 0] - # [1 1] - # [2 2]] - out_z1_tuple = paddle.nonzero(x1, as_tuple=True) - for out in out_z1_tuple: - print(out.numpy()) - #[[0] - # [1] - # [2]] - #[[0] - # [1] - # [2]] - out_z2 = paddle.nonzero(x2) - print(out_z2.numpy()) - #[[1] - # [3]] - out_z2_tuple = paddle.nonzero(x2, as_tuple=True) - for out in out_z2_tuple: - print(out.numpy()) - #[[1] - # [3]] - out_z3 = paddle.nonzero(x3) - print(out_z3.numpy()) - #[] - out_z3_tuple = paddle.nonzero(x3, as_tuple=True) - for out in out_z3_tuple: - print(out.numpy()) - #[] + + paddle.disable_static() + + x1 = paddle.to_tensor([[1.0, 0.0, 0.0], + [0.0, 2.0, 0.0], + [0.0, 0.0, 3.0]]) + x2 = paddle.to_tensor([0.0, 1.0, 0.0, 3.0]) + x3 = paddle.to_tensor([0.0, 0.0, 0.0]) + out_z1 = paddle.nonzero(x1) + print(out_z1.numpy()) + #[[0 0] + # [1 1] + # [2 2]] + out_z1_tuple = paddle.nonzero(x1, as_tuple=True) + for out in out_z1_tuple: + print(out.numpy()) + #[[0] + # [1] + # [2]] + #[[0] + # [1] + # [2]] + out_z2 = paddle.nonzero(x2) + print(out_z2.numpy()) + #[[1] + # [3]] + out_z2_tuple = paddle.nonzero(x2, as_tuple=True) + for out in out_z2_tuple: + print(out.numpy()) + #[[1] + # [3]] + out_z3 = paddle.nonzero(x3) + print(out_z3.numpy()) + #[] + out_z3_tuple = paddle.nonzero(x3, as_tuple=True) + for out in out_z3_tuple: + print(out.numpy()) + #[] """ list_out = [] shape = input.shape @@ -470,16 +446,15 @@ def sort(x, axis=-1, descending=False, name=None): Examples: .. code-block:: python import paddle - import numpy as np paddle.disable_static() - input_array = np.array([[[5,8,9,5], - [0,0,1,7], - [6,9,2,4]], - [[5,2,4,2], - [4,7,7,9], - [1,7,0,6]]]).astype(np.float32) - x = paddle.to_variable(input_array) + x = paddle.to_tensor([[[5,8,9,5], + [0,0,1,7], + [6,9,2,4]], + [[5,2,4,2], + [4,7,7,9], + [1,7,0,6]]], + dtype='float32') out1 = paddle.sort(x=x, axis=-1) out2 = paddle.sort(x=x, axis=0) out3 = paddle.sort(x=x, axis=1) @@ -555,16 +530,11 @@ def where(condition, x, y, name=None): .. code-block:: python import paddle - import numpy as np - import paddle.fluid as fluid - - x_i = np.array([0.9383, 0.1983, 3.2, 1.2]).astype("float32") - y_i = np.array([1.0, 1.0, 1.0, 1.0]).astype("float32") - with fluid.dygraph.guard(): - x = fluid.dygraph.to_variable(x_i) - y = fluid.dygraph.to_variable(y_i) - out = paddle.where(x>1, x, y) + paddle.disable_static() + x = paddle.to_tensor([0.9383, 0.1983, 3.2, 1.2]) + y = paddle.to_tensor([1.0, 1.0, 1.0, 1.0]) + out = paddle.where(x>1, x, y) print(out.numpy()) #out: [1.0, 1.0, 3.2, 1.2] @@ -641,50 +611,41 @@ def index_sample(x, index): .. code-block:: python import paddle - import paddle.fluid as fluid - import numpy as np - - data = np.array([[1.0, 2.0, 3.0, 4.0], - [5.0, 6.0, 7.0, 8.0], - [9.0, 10.0, 11.0, 12.0]]).astype('float32') - - data_index = np.array([[0, 1, 2], - [1, 2, 3], - [0, 0, 0]]).astype('int32') - - target_data = np.array([[100, 200, 300, 400], - [500, 600, 700, 800], - [900, 1000, 1100, 1200]]).astype('int32') - - with fluid.dygraph.guard(): - x = fluid.dygraph.to_variable(data) - index = fluid.dygraph.to_variable(data_index) - target = fluid.dygraph.to_variable(target_data) - - out_z1 = paddle.index_sample(x, index) - print(out_z1.numpy()) - #[[1. 2. 3.] - # [6. 7. 8.] - # [9. 9. 9.]] - - # Use the index of the maximum value by topk op - # get the value of the element of the corresponding index in other tensors - top_value, top_index = fluid.layers.topk(x, k=2) - out_z2 = paddle.index_sample(target, top_index) - print(top_value.numpy()) - #[[ 4. 3.] - # [ 8. 7.] - # [12. 11.]] - - print(top_index.numpy()) - #[[3 2] - # [3 2] - # [3 2]] - - print(out_z2.numpy()) - #[[ 400 300] - # [ 800 700] - # [1200 1100]] + + paddle.disable_static() + x = paddle.to_tensor([[1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, 11.0, 12.0]], dtype='float32') + index = paddle.to_tensor([[0, 1, 2], + [1, 2, 3], + [0, 0, 0]], dtype='int32') + target = paddle.to_tensor([[100, 200, 300, 400], + [500, 600, 700, 800], + [900, 1000, 1100, 1200]], dtype='int32') + out_z1 = paddle.index_sample(x, index) + print(out_z1.numpy()) + #[[1. 2. 3.] + # [6. 7. 8.] + # [9. 9. 9.]] + + # Use the index of the maximum value by topk op + # get the value of the element of the corresponding index in other tensors + top_value, top_index = paddle.topk(x, k=2) + out_z2 = paddle.index_sample(target, top_index) + print(top_value.numpy()) + #[[ 4. 3.] + # [ 8. 7.] + # [12. 11.]] + + print(top_index.numpy()) + #[[3 2] + # [3 2] + # [3 2]] + + print(out_z2.numpy()) + #[[ 400 300] + # [ 800 700] + # [1200 1100]] """ @@ -717,27 +678,20 @@ def masked_select(x, mask, name=None): Returns: A 1-D Tensor which is the same data type as ``x``. - Raises: - TypeError: ``x`` must be a Tensor and the data type of ``x`` must be one of float32, float64, int32 and int64. - TypeError: ``mask`` must be a Tensor and the data type of ``mask`` must be bool. - Examples: .. code-block:: python import paddle - import numpy as np - + paddle.disable_static() - data = np.array([[1.0, 2.0, 3.0, 4.0], - [5.0, 6.0, 7.0, 8.0], - [9.0, 10.0, 11.0, 12.0]]).astype('float32') - - mask_data = np.array([[True, False, False, False], - [True, True, False, False], - [True, False, False, False]]).astype('bool') - x = paddle.to_tensor(data) - mask = paddle.to_tensor(mask_data) + + x = paddle.to_tensor([[1.0, 2.0, 3.0, 4.0], + [5.0, 6.0, 7.0, 8.0], + [9.0, 10.0, 11.0, 12.0]]) + mask = paddle.to_tensor([[True, False, False, False], + [True, True, False, False], + [True, False, False, False]]) out = paddle.masked_select(x, mask) #[1.0 5.0 6.0 9.0] """ @@ -782,20 +736,17 @@ def topk(x, k, axis=None, largest=True, sorted=True, name=None): .. code-block:: python - import numpy as np import paddle paddle.disable_static() - data_1 = np.array([1, 4, 5, 7]) - tensor_1 = paddle.to_tensor(data_1) + tensor_1 = paddle.to_tensor([1, 4, 5, 7]) value_1, indices_1 = paddle.topk(tensor_1, k=1) print(value_1.numpy()) # [7] print(indices_1.numpy()) # [3] - data_2 = np.array([[1, 4, 5, 7], [2, 6, 2, 5]]) - tensor_2 = paddle.to_tensor(data_2) + tensor_2 = paddle.to_tensor([[1, 4, 5, 7], [2, 6, 2, 5]]) value_2, indices_2 = paddle.topk(tensor_2, k=1) print(value_2.numpy()) # [[7] diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py index 91676a6316b81a1998b9b48fb9ea7fcba6d67c25..d56dff5a81018e13e1c186f66172f868b0c4074b 100644 --- a/python/paddle/tensor/stat.py +++ b/python/paddle/tensor/stat.py @@ -237,10 +237,6 @@ def numel(x, name=None): Returns: Tensor: The number of elements for the input Tensor. - - Raises: - TypeError: ``x`` must be a Tensor and the data type of ``x`` must be one of bool, float16, float32, float64, int32, int64. - Examples: .. code-block:: python diff --git a/python/paddle/tests/test_model.py b/python/paddle/tests/test_model.py index 7b79b25cbc3e98b802bad87386ad0572ec6ab8d7..b7b5d44650f8d62926241a57feedfd5b932a37f5 100644 --- a/python/paddle/tests/test_model.py +++ b/python/paddle/tests/test_model.py @@ -416,6 +416,29 @@ class TestModelFunction(unittest.TestCase): shutil.rmtree(path) fluid.disable_dygraph() if dynamic else None + def test_dynamic_load(self): + mnist_data = MnistDataset(mode='train') + for new_optimizer in [True, False]: + path = tempfile.mkdtemp() + paddle.disable_static() + net = LeNet() + inputs = [InputSpec([None, 1, 28, 28], 'float32', 'x')] + labels = [InputSpec([None, 1], 'int64', 'label')] + if new_optimizer: + optim = paddle.optimizer.Adam( + learning_rate=0.001, parameters=net.parameters()) + else: + optim = fluid.optimizer.Adam( + learning_rate=0.001, parameter_list=net.parameters()) + model = Model(net, inputs, labels) + model.prepare( + optimizer=optim, loss=CrossEntropyLoss(reduction="sum")) + model.fit(mnist_data, batch_size=64, verbose=0) + model.save(path + '/test') + model.load(path + '/test') + shutil.rmtree(path) + paddle.enable_static() + def test_dynamic_save_static_load(self): path = tempfile.mkdtemp() # dynamic saving @@ -476,6 +499,30 @@ class TestModelFunction(unittest.TestCase): self.assertTrue(params[0].shape[1] == 10) fluid.disable_dygraph() if dynamic else None + def test_summary(self): + def _get_param_from_state_dict(state_dict): + params = 0 + for k, v in state_dict.items(): + params += np.prod(v.numpy().shape) + return params + + for dynamic in [True, False]: + device = paddle.set_device('cpu') + fluid.enable_dygraph(device) if dynamic else None + net = MyModel() + inputs = [InputSpec([None, 20], 'float32', 'x')] + model = Model(net, inputs) + model.prepare() + params_info = model.summary() + gt_params = _get_param_from_state_dict(net.state_dict()) + + np.testing.assert_allclose(params_info['total_params'], gt_params) + print(params_info) + + model.summary(input_size=(20)) + model.summary(input_size=[(20)]) + model.summary(input_size=(20), batch_size=2) + def test_export_deploy_model(self): for dynamic in [True, False]: fluid.enable_dygraph() if dynamic else None diff --git a/python/paddle/utils/__init__.py b/python/paddle/utils/__init__.py index f6299980b3e5c0bd0c7551b6b51c9b067d7960b5..2a649c776b4103b1d3d8648957bbff7a32007410 100644 --- a/python/paddle/utils/__init__.py +++ b/python/paddle/utils/__init__.py @@ -12,14 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .plot import Ploter from .profiler import ProfilerOptions from .profiler import Profiler from .profiler import get_profiler from .deprecated import deprecated + from . import download -__all__ = ['dump_config', 'Ploter', 'deprecated', 'download'] +__all__ = ['dump_config', 'deprecated', 'download'] #TODO: define new api under this directory # __all__ = ['unique_name', diff --git a/python/paddle/utils/deprecated.py b/python/paddle/utils/deprecated.py index 08fd7e33479b331454f63f05f6240dd221591ee9..d4e21748b55326468edb2ba1e46114e8d66c0046 100644 --- a/python/paddle/utils/deprecated.py +++ b/python/paddle/utils/deprecated.py @@ -45,7 +45,7 @@ def deprecated(update_to="", since="", reason=""): def decorator(func): # TODO(zhiqiu): We temporally disable the warnings for 2.0-bata, and it should be re-enabled in the future. - return func + # return func """construct warning message, and return a decorated function or class.""" assert isinstance(update_to, str), 'type of "update_to" must be str.' assert isinstance(since, str), 'type of "since" must be str.' @@ -56,9 +56,10 @@ def deprecated(update_to="", since="", reason=""): _reason = reason.strip() msg = 'API "{}.{}" is deprecated'.format(func.__module__, func.__name__) + if len(_since) > 0: msg += " since {}".format(_since) - msg += ", and may be removed in future versions." + msg += ", and will be removed in future versions." if len(_update_to) > 0: assert _update_to.startswith( "paddle." @@ -67,6 +68,11 @@ def deprecated(update_to="", since="", reason=""): msg += ' Please use "{}" instead.'.format(_update_to) if len(_reason) > 0: msg += "\n reason: {}".format(_reason) + if func.__doc__: + func.__doc__ = ('\n\nWarning: ' + msg + '\n') + func.__doc__ + # TODO(Joejiong) Early returning the wrapper function, currently we disable the warning wrapper, + # because the 2.0beta APIs are still under development, we will restore the warning functionality when 2.0 rc APIs become stable. + return func @functools.wraps(func) def wrapper(*args, **kwargs): @@ -75,6 +81,7 @@ def deprecated(update_to="", since="", reason=""): 2. since version is empty, in this case, API is deprecated in all versions. 3. current version is newer than since version. """ + msg = "\033[93mWarning %s \033[0m" % (msg) v_current = [int(i) for i in paddle.__version__.split(".")] v_current += [0] * (4 - len(v_current)) v_since = [int(i) for i in _since.split(".")] diff --git a/python/paddle/utils/plot.py b/python/paddle/utils/plot.py deleted file mode 100644 index ee651f2f0cd6f2e594a4e74c896baa924f70bbf5..0000000000000000000000000000000000000000 --- a/python/paddle/utils/plot.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import six - - -class PlotData(object): - def __init__(self): - self.step = [] - self.value = [] - - def append(self, step, value): - self.step.append(step) - self.value.append(value) - - def reset(self): - self.step = [] - self.value = [] - - -class Ploter(object): - """ - Plot input data in a 2D graph - - Args: - title: assign the title of input data. - step: x_axis of the data. - value: y_axis of the data. - """ - - def __init__(self, *args): - self.__args__ = args - self.__plot_data__ = {} - for title in args: - self.__plot_data__[title] = PlotData() - # demo in notebooks will use Ploter to plot figure, but when we convert - # the ipydb to py file for testing, the import of matplotlib will make the - # script crash. So we can use `export DISABLE_PLOT=True` to disable import - # these libs - self.__disable_plot__ = os.environ.get("DISABLE_PLOT") - if not self.__plot_is_disabled__(): - import matplotlib.pyplot as plt - from IPython import display - self.plt = plt - self.display = display - - def __plot_is_disabled__(self): - return self.__disable_plot__ == "True" - - def append(self, title, step, value): - """ - Feed data - - Args: - title: assign the group data to this subtitle. - step: the x_axis of data. - value: the y_axis of data. - - Examples: - .. code-block:: python - plot_curve = Ploter("Curve 1","Curve 2") - plot_curve.append(title="Curve 1",step=1,value=1) - """ - assert isinstance(title, six.string_types) - assert title in self.__plot_data__ - data = self.__plot_data__[title] - assert isinstance(data, PlotData) - data.append(step, value) - - def plot(self, path=None): - """ - Plot data in a 2D graph - - Args: - path: store the figure to this file path. Defaul None. - - Examples: - .. code-block:: python - plot_curve = Ploter() - plot_cure.plot() - """ - if self.__plot_is_disabled__(): - return - - titles = [] - for title in self.__args__: - data = self.__plot_data__[title] - assert isinstance(data, PlotData) - if len(data.step) > 0: - titles.append(title) - self.plt.plot(data.step, data.value) - self.plt.legend(titles, loc='upper left') - if path is None: - self.display.clear_output(wait=True) - self.display.display(self.plt.gcf()) - else: - self.plt.savefig(path) - self.plt.gcf().clear() - - def reset(self): - for key in self.__plot_data__: - data = self.__plot_data__[key] - assert isinstance(data, PlotData) - data.reset() diff --git a/python/requirements.txt b/python/requirements.txt index e278a1b824cc3829f1b67bc3a0cf643840990bb9..c8d3b2af1794bb0858b187d6a4c641322f50cdd1 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -21,4 +21,3 @@ objgraph astor pathlib netifaces -psutil diff --git a/tools/check_file_diff_approvals.sh b/tools/check_file_diff_approvals.sh index 2c575e4abf1beed039d3293821b8df356d4e9295..1e5179d0282d7f35c4232d9b9783cb831e83f462 100644 --- a/tools/check_file_diff_approvals.sh +++ b/tools/check_file_diff_approvals.sh @@ -19,8 +19,8 @@ API_FILES=("CMakeLists.txt" "paddle/fluid/framework/ir/node.h" "paddle/fluid/framework/ir/graph.h" "paddle/fluid/framework/framework.proto" - "python/paddle/distributed/__init" - "python/paddle/distributed/fleet/__init__.py" + "python/paddle/distributed/__init" + "python/paddle/distributed/fleet/__init__.py" "python/requirements.txt" "python/paddle/fluid/__init__.py" "python/paddle/fluid/compiler.py" @@ -39,6 +39,7 @@ API_FILES=("CMakeLists.txt" "python/paddle/fluid/tests/unittests/white_list/check_op_sequence_batch_1_input_white_list.py" "python/paddle/fluid/tests/unittests/white_list/no_grad_set_white_list.py" "tools/wlist.json" + "paddle/scripts/paddle_build.bat" ) approval_line=`curl -H "Authorization: token ${GITHUB_API_TOKEN}" https://api.github.com/repos/PaddlePaddle/Paddle/pulls/${GIT_PR_ID}/reviews?per_page=10000` @@ -114,17 +115,20 @@ for API_FILE in ${API_FILES[*]}; do echo_line="You must have one RD (luotao1 or phlrain) approval for ${API_FILE}, which manages the white list of batch size 1 input for sequence op test. For more information, please refer to [https://github.com/PaddlePaddle/Paddle/wiki/It-is-required-to-include-LoDTensor-input-with-batch_size=1-in-sequence-OP-test]. \n" check_approval 1 6836917 43953930 elif [ "${API_FILE}" == "python/paddle/fluid/tests/unittests/white_list/no_grad_set_white_list.py" ];then - echo_line="You must have one RD (Shixiaowei02 (Recommend), luotao1 or phlrain) approval for the python/paddle/fluid/tests/unittests/white_list/no_grad_set_white_list.py, which manages the white list of no_grad_set without value in operators. For more information, please refer to[https://github.com/PaddlePaddle/Paddle/wiki/It's-recommend-to-set-no_grad_set-to-be-None].\n" - check_approval 1 39303645 6836917 43953930 + echo_line="You must have one RD (Shixiaowei02 (Recommend), luotao1 or phlrain) approval for the python/paddle/fluid/tests/unittests/white_list/no_grad_set_white_list.py, which manages the white list of no_grad_set without value in operators. For more information, please refer to[https://github.com/PaddlePaddle/Paddle/wiki/It's-recommend-to-set-no_grad_set-to-be-None].\n" + check_approval 1 39303645 6836917 43953930 elif [ "${API_FILE}" == "tools/wlist.json" ];then - echo_line="You must have one TPM (jzhang533) approval for the api whitelist for the tools/wlist.json.\n" - check_approval 1 29231 + echo_line="You must have one TPM (jzhang533) approval for the api whitelist for the tools/wlist.json.\n" + check_approval 1 29231 elif [ "${API_FILE}" == "python/paddle/distributed/fleet/__init__.py" ]; then - echo_line="You must have (guru4elephant,raindrops2sea) approval for ${API_FILE} changes " - check_approval 1 35550832 38231817 + echo_line="You must have (guru4elephant,raindrops2sea) approval for ${API_FILE} changes " + check_approval 1 35550832 38231817 elif [ "${API_FILE}" == "python/paddle/distributed/__init__.py" ]; then - echo_line="You must have (guru4elephant,raindrops2sea) approval for ${API_FILE} changes " - check_approval 1 35550832 38231817 + echo_line="You must have (guru4elephant,raindrops2sea) approval for ${API_FILE} changes " + check_approval 1 35550832 38231817 + elif [ "${API_FILE}" == "paddle/scripts/paddle_build.bat" ]; then + echo_line="You must have one RD (zhouwei25 (Recommend), luotao1) approval for ${API_FILE} changes, which manages all Paddle CI task on Windows.\n" + check_approval 1 52485244 6836917 else echo_line="You must have one RD (XiaoguangHu01,Xreki,luotao1) approval for ${API_FILE}, which manages the underlying code for fluid.\n" check_approval 1 3048612 46782768 12538138 6836917 @@ -159,7 +163,7 @@ fi HAS_UNITTEST_SKIP=`git diff -U0 upstream/$BRANCH | grep "^+[[:space:]]\{0,\}@unittest.skip" || true` if [ "${HAS_UNITTEST_SKIP}" != "" ] && [ "${GIT_PR_ID}" != "" ]; then - echo_line="Unittest is not allowed to be disabled.\nYou must have one RD (kolinwei(Recommend), liuwei1031, or luotao1) approval for the usage of @unittest.skip or @unittest.skipIf.\n${HAS_UNITTEST_SKIP}\n" + echo_line="Unittest is not allowed to be disabled.\nYou must have one RD (kolinwei(Recommend), or luotao1) approval for the usage of @unittest.skip or @unittest.skipIf.\n${HAS_UNITTEST_SKIP}\n" check_approval 1 22165420 6836917 46661762 fi diff --git a/tools/get_cpu_info.sh b/tools/get_cpu_info.sh new file mode 100755 index 0000000000000000000000000000000000000000..a1881f551da1ca022c186c50c667e51dff89f9be --- /dev/null +++ b/tools/get_cpu_info.sh @@ -0,0 +1,49 @@ +#!/bin/bash + +if [ "`uname -s`" != "Linux" ]; then + echo "Current scenario only support in Linux yet!" + exit 0 +fi + +echo "********** Hardware Information **********" +sockets=`grep 'physical id' /proc/cpuinfo | sort -u | wc -l` +cores_per_socket=`grep 'core id' /proc/cpuinfo | sort -u | wc -l` +ht=`lscpu |grep "per core" |awk -F':' '{print $2}'|xargs` +physical_cores=$((sockets * cores_per_socket)) +virtual_cores=`grep 'processor' /proc/cpuinfo | sort -u | wc -l` +numa_nodes=`lscpu |grep "NUMA node(s)"|awk -F':' '{print $2}'|xargs` +echo "CPU Name : `cat /proc/cpuinfo |grep -i "model name" |uniq |awk -F ':' '{print $2}'|xargs`" +echo "CPU Family : `lscpu |grep \"CPU family\" |awk -F':' '{print $2}'|xargs`" +echo "Socket Number : $sockets" +echo "Cores Per Socket : $cores_per_socket" +echo "Total Physical Cores : $physical_cores" +echo "Total Virtual Cores : $virtual_cores" +if [ $ht -eq 1 ]; then + echo "Hyper Threading : OFF" + if [ $physical_cores -ne $virtual_cores ]; then + echo "Error: HT logical error" + fi +else + echo "Hyper Threading : ON" + if [ $physical_cores -ge $virtual_cores ]; then + echo "Error: HT logical error" + fi +fi +echo "NUMA Nodes : $numa_nodes" +if [ $numa_nodes -lt $sockets ]; then + echo "Warning: NUMA node is not enough for the best performance,\ + at least $sockets" +fi + +echo "********** Software Information **********" +echo "OS Version : `cat /proc/version`" +echo "Kernel Release Version : `uname -r`" +echo "Kernel Patch Version : `uname -v`" +echo "GCC Version :`gcc --version | head -n 1|awk -F '\\\(GCC\\\)' '{print $2}'`" +if command -v cmake >/dev/null 2>&1; then + cmake_ver=`cmake --version | head -n 1 | awk -F 'version' '{print $2}'` +else + cmake_ver=" Not installed" +fi +echo "CMake Version :$cmake_ver" +echo "******************************************"