提交 1a13420b 编写于 作者: L LiuChiaChi

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into...

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add-transformer-generate_square_subsequent_mask-api
...@@ -30,8 +30,10 @@ __global__ void ComputeDifferent(T *centers_diff, const T *X, const T *centers, ...@@ -30,8 +30,10 @@ __global__ void ComputeDifferent(T *centers_diff, const T *X, const T *centers,
while (idy < K) { while (idy < K) {
int64_t id = ids[idy]; int64_t id = ids[idy];
PADDLE_ENFORCE(id >= 0, "received id:", id); PADDLE_ENFORCE(id >= 0, "Id should larger than 0 but received id: %d.", id);
PADDLE_ENFORCE(id < N, "received id:", id); PADDLE_ENFORCE(id < N, "Id should smaller than %d but received id: %d.", N,
id);
T *out = centers_diff + idy * D; T *out = centers_diff + idy * D;
const T *x = X + idy * D; const T *x = X + idy * D;
const T *cent = centers + id * D; const T *cent = centers + id * D;
...@@ -52,8 +54,9 @@ __global__ void UpdateCenters(T *centers, T *centers_diff, const int64_t *ids, ...@@ -52,8 +54,9 @@ __global__ void UpdateCenters(T *centers, T *centers_diff, const int64_t *ids,
while (idy < K) { while (idy < K) {
int count = 1; int count = 1;
int64_t id = ids[idy]; int64_t id = ids[idy];
PADDLE_ENFORCE(id >= 0, "received id:", id); PADDLE_ENFORCE(id >= 0, "Id should larger than 0 but received id: %d.", id);
PADDLE_ENFORCE(id < N, "received id:", id); PADDLE_ENFORCE(id < N, "Id should smaller than %d but received id: %d.", N,
id);
for (int i = 0; i < K; i++) { for (int i = 0; i < K; i++) {
if (ids[i] == id) { if (ids[i] == id) {
......
...@@ -69,8 +69,10 @@ template <typename T> ...@@ -69,8 +69,10 @@ template <typename T>
class CTCAlignOpCUDAKernel : public framework::OpKernel<T> { class CTCAlignOpCUDAKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_ENFORCE(platform::is_gpu_place(ctx.GetPlace()), PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true,
"It must use CUDAPlace."); platform::errors::InvalidArgument(
"CTCAlign operator CUDA kernel must use CUDAPlace "
"rather than CPUPlace."));
auto* input = ctx.Input<LoDTensor>("Input"); auto* input = ctx.Input<LoDTensor>("Input");
auto* output = ctx.Output<LoDTensor>("Output"); auto* output = ctx.Output<LoDTensor>("Output");
const int blank = ctx.Attr<int>("blank"); const int blank = ctx.Attr<int>("blank");
......
...@@ -72,8 +72,11 @@ class CTCAlignKernel : public framework::OpKernel<T> { ...@@ -72,8 +72,11 @@ class CTCAlignKernel : public framework::OpKernel<T> {
// check input dims and lod // check input dims and lod
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
input_dims[0], static_cast<int64_t>(input_lod[level].back()), input_dims[0], static_cast<int64_t>(input_lod[level].back()),
"The first dimension of Input(Input) should be equal to " platform::errors::InvalidArgument(
"the sum of all sequences' lengths."); "The first dimension %d of CTCAlign operator Input(Input) should "
"be equal to "
"the sum of all sequences' lengths %d.",
input_dims[0], static_cast<int64_t>(input_lod[level].back())));
const size_t num_sequences = input_lod[level].size() - 1; const size_t num_sequences = input_lod[level].size() - 1;
......
...@@ -42,21 +42,21 @@ class MVOp : public framework::OperatorWithKernel { ...@@ -42,21 +42,21 @@ class MVOp : public framework::OperatorWithKernel {
OP_INOUT_CHECK(context->HasOutput("Out"), "Output", "Out", "mv"); OP_INOUT_CHECK(context->HasOutput("Out"), "Output", "Out", "mv");
auto dim_x = context->GetInputDim("X"); auto dim_x = context->GetInputDim("X");
auto dim_y = context->GetInputDim("Vec"); auto dim_vec = context->GetInputDim("Vec");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
dim_x.size(), 2, dim_x.size(), 2,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The rank of input X should be 2, but is %d", dim_x.size())); "The rank of input X should be 2, but is %d", dim_x.size()));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
dim_y.size(), 1, dim_vec.size(), 1,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The rank of input Vec should be 1, but is %d", dim_y.size())); "The rank of input Vec should be 1, but is %d", dim_vec.size()));
PADDLE_ENFORCE_EQ(dim_x[1] == dim_y[0], true, PADDLE_ENFORCE_EQ(dim_x[1], dim_vec[0],
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The length of input X' second dim should equal the " "X's second dimension is expected to be equal to "
"length of input Vec," "Vec's first dimension"
" but X[%d, %d], Vec[%d]", "but recieved X'shape = [%s], Vec's shape = [%s]",
dim_x[0], dim_x[1], dim_y[0])); dim_x, dim_vec));
framework::DDim dim_out = framework::make_ddim({dim_x[0]}); framework::DDim dim_out = framework::make_ddim({dim_x[0]});
......
...@@ -19,8 +19,8 @@ namespace paddle { ...@@ -19,8 +19,8 @@ namespace paddle {
namespace operators { namespace operators {
template <typename T> template <typename T>
__global__ void MVGradCUDAKernel(const int m, const int n, const T *dout, __global__ void MVGradDxCUDAKernel(const int m, const int n, const T *dout,
const T *vec, T *dx) { const T *vec, T *dx) {
int idx = blockDim.x * blockIdx.x + threadIdx.x; int idx = blockDim.x * blockIdx.x + threadIdx.x;
for (; idx < m * n; idx += blockDim.x * gridDim.x) { for (; idx < m * n; idx += blockDim.x * gridDim.x) {
int i = idx / n; int i = idx / n;
...@@ -52,32 +52,31 @@ class MVGradKernel<platform::CUDADeviceContext, T> ...@@ -52,32 +52,31 @@ class MVGradKernel<platform::CUDADeviceContext, T>
int m = dim_x[0]; int m = dim_x[0];
int n = dim_x[1]; int n = dim_x[1];
dx->Resize(framework::make_ddim({m * n}));
// get data ptr // get data ptr
const T *x_data = x->data<T>(); const T *x_data = x->data<T>();
const T *vec_data = vec->data<T>(); const T *vec_data = vec->data<T>();
const T *dout_data = dout->data<T>(); const T *dout_data = dout->data<T>();
T *dx_data = dx->mutable_data<T>(context.GetPlace());
T *dvec_data = dvec->mutable_data<T>(context.GetPlace());
auto &dev_ctx = auto &dev_ctx =
context.template device_context<platform::CUDADeviceContext>(); context.template device_context<platform::CUDADeviceContext>();
auto blas = math::GetBlas<platform::CUDADeviceContext, T>(dev_ctx); auto blas = math::GetBlas<platform::CUDADeviceContext, T>(dev_ctx);
// calculate dx
auto stream = context.cuda_device_context().stream(); auto stream = context.cuda_device_context().stream();
auto config = GetGpuLaunchConfig1D(dev_ctx, m * n); auto config = GetGpuLaunchConfig1D(dev_ctx, m * n);
MVGradCUDAKernel<
T><<<config.block_per_grid.x, config.thread_per_block.x, 0, stream>>>(
m, n, dout_data, vec_data, dx_data);
dx->Resize(framework::make_ddim({m, n})); if (dx) {
T *dx_data = dx->mutable_data<T>(context.GetPlace());
MVGradDxCUDAKernel<
T><<<config.block_per_grid.x, config.thread_per_block.x, 0, stream>>>(
m, n, dout_data, vec_data, dx_data);
}
if (dvec) {
T *dvec_data = dvec->mutable_data<T>(context.GetPlace());
// calculate dvec blas.GEMV(true, dim_x[0], dim_x[1], static_cast<T>(1), x_data, dout_data,
blas.GEMV(true, dim_x[0], dim_x[1], static_cast<T>(1), x_data, dout_data, static_cast<T>(0), dvec_data);
static_cast<T>(0), dvec_data); }
} }
}; };
......
...@@ -74,30 +74,30 @@ class MVGradKernel : public framework::OpKernel<T> { ...@@ -74,30 +74,30 @@ class MVGradKernel : public framework::OpKernel<T> {
int m = dim_x[0]; int m = dim_x[0];
int n = dim_x[1]; int n = dim_x[1];
dx->Resize(framework::make_ddim({m * n}));
// get data ptr // get data ptr
const T *x_data = x->data<T>(); const T *x_data = x->data<T>();
const T *vec_data = vec->data<T>(); const T *vec_data = vec->data<T>();
const T *dout_data = dout->data<T>(); const T *dout_data = dout->data<T>();
T *dx_data = dx->mutable_data<T>(context.GetPlace()); if (dx) {
T *dvec_data = dvec->mutable_data<T>(context.GetPlace()); T *dx_data = dx->mutable_data<T>(context.GetPlace());
auto &dev_ctx = context.template device_context<DeviceContext>();
auto blas = math::GetBlas<DeviceContext, T>(dev_ctx);
// calculate dx for (int i = 0; i < m; ++i) {
for (int i = 0; i < m; ++i) { for (int j = 0; j < n; ++j) {
for (int j = 0; j < n; ++j) dx_data[i * n + j] = dout_data[i] * vec_data[j];
dx_data[i * n + j] = dout_data[i] * vec_data[j]; }
}
} }
dx->Resize(framework::make_ddim({m, n})); if (dvec) {
T *dvec_data = dvec->mutable_data<T>(context.GetPlace());
auto &dev_ctx = context.template device_context<DeviceContext>();
auto blas = math::GetBlas<DeviceContext, T>(dev_ctx);
// calculate dvec blas.GEMV(true, dim_x[0], dim_x[1], static_cast<T>(1), x_data, dout_data,
blas.GEMV(true, dim_x[0], dim_x[1], static_cast<T>(1), x_data, dout_data, static_cast<T>(0), dvec_data);
static_cast<T>(0), dvec_data); }
} }
}; };
......
...@@ -45,8 +45,10 @@ template <typename T> ...@@ -45,8 +45,10 @@ template <typename T>
class PoolCUDNNOpKernel : public framework::OpKernel<T> { class PoolCUDNNOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, PADDLE_ENFORCE_EQ(
"It must use CUDAPlace."); platform::is_gpu_place(ctx.GetPlace()), true,
platform::errors::InvalidArgument("Pool operator CUDA kernel must use "
"CUDAPlace rather than CPUPlace."));
const Tensor *input = ctx.Input<Tensor>("X"); const Tensor *input = ctx.Input<Tensor>("X");
Tensor *output = ctx.Output<Tensor>("Out"); Tensor *output = ctx.Output<Tensor>("Out");
...@@ -175,8 +177,10 @@ template <typename T> ...@@ -175,8 +177,10 @@ template <typename T>
class PoolCUDNNGradOpKernel : public framework::OpKernel<T> { class PoolCUDNNGradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext &ctx) const override { void Compute(const framework::ExecutionContext &ctx) const override {
PADDLE_ENFORCE_EQ(platform::is_gpu_place(ctx.GetPlace()), true, PADDLE_ENFORCE_EQ(
"It must use CUDAPlace."); platform::is_gpu_place(ctx.GetPlace()), true,
platform::errors::InvalidArgument("Pool operator CUDA kernel must use "
"CUDAPlace rather than CPUPlace."));
const Tensor *input = ctx.Input<Tensor>("X"); const Tensor *input = ctx.Input<Tensor>("X");
const Tensor *output = ctx.Input<Tensor>("Out"); const Tensor *output = ctx.Input<Tensor>("Out");
......
...@@ -38,18 +38,22 @@ int PoolOutputSize(int input_size, int filter_size, int padding_1, ...@@ -38,18 +38,22 @@ int PoolOutputSize(int input_size, int filter_size, int padding_1,
} }
PADDLE_ENFORCE_GT( PADDLE_ENFORCE_GT(
output_size, 0, output_size, 0,
"ShapeError: the output size must be greater than 0. But received: " platform::errors::InvalidArgument(
"output_size = %d due to the settings of input_size(%d), padding(%d,%d), " "the output size must be greater than 0. But received: "
"k_size(%d) and stride(%d). Please check again!", "output_size = %d due to the settings of input_size(%d), "
output_size, input_size, padding_1, padding_2, filter_size, stride); "padding(%d,%d), "
"k_size(%d) and stride(%d). Please check again!",
output_size, input_size, padding_1, padding_2, filter_size, stride));
return output_size; return output_size;
} }
void PoolOp::InferShape(framework::InferShapeContext* ctx) const { void PoolOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, PADDLE_ENFORCE_EQ(
"X(Input) of Pooling should not be null."); ctx->HasInput("X"), true,
PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, platform::errors::NotFound("Input(X) of Pool operator is not found."));
"Out(Output) of Pooling should not be null."); PADDLE_ENFORCE_EQ(
ctx->HasOutput("Out"), true,
platform::errors::NotFound("Output(Out) of Pool operator is not found."));
std::string pooling_type = ctx->Attrs().Get<std::string>("pooling_type"); std::string pooling_type = ctx->Attrs().Get<std::string>("pooling_type");
std::vector<int> ksize = ctx->Attrs().Get<std::vector<int>>("ksize"); std::vector<int> ksize = ctx->Attrs().Get<std::vector<int>>("ksize");
...@@ -65,28 +69,32 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const { ...@@ -65,28 +69,32 @@ void PoolOp::InferShape(framework::InferShapeContext* ctx) const {
auto in_x_dims = ctx->GetInputDim("X"); auto in_x_dims = ctx->GetInputDim("X");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
in_x_dims.size() == 4 || in_x_dims.size() == 5, true, in_x_dims.size() == 4 || in_x_dims.size() == 5, true,
"ShapeError: the input of Op(pool) should be 4-D or 5-D Tensor. But " platform::errors::InvalidArgument(
"received: %u-D Tensor and it's shape is [%s].", "the input of Op(pool) should be 4-D or 5-D Tensor. But "
in_x_dims.size(), in_x_dims); "received: %u-D Tensor and it's shape is [%s].",
in_x_dims.size(), in_x_dims));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
in_x_dims.size() - ksize.size(), 2U, in_x_dims.size() - ksize.size(), 2U,
"ShapeError: the dimension of input minus the size of " platform::errors::InvalidArgument(
"Attr(ksize) must be euqal to 2 in Op(pool). " "the dimension of input minus the size of "
"But received: the dimension of input minus the size " "Attr(ksize) must be euqal to 2 in Op(pool). "
"of Attr(ksize) is %d, the " "But received: the dimension of input minus the size "
"input's dimension is %d, the shape of input " "of Attr(ksize) is %d, the "
"is [%s], the Attr(ksize)'s size is %d, the Attr(ksize) is [%s].", "input's dimension is %d, the shape of input "
in_x_dims.size() - ksize.size(), in_x_dims.size(), in_x_dims, "is [%s], the Attr(ksize)'s size is %d, the Attr(ksize) is [%s].",
ksize.size(), framework::make_ddim(ksize)); in_x_dims.size() - ksize.size(), in_x_dims.size(), in_x_dims,
ksize.size(), framework::make_ddim(ksize)));
PADDLE_ENFORCE_EQ(ksize.size(), strides.size(),
"ShapeError: the size of Attr(ksize) and Attr(strides) in " PADDLE_ENFORCE_EQ(
"Op(pool) must be equal. " ksize.size(), strides.size(),
"But received: Attr(ksize)'s size is %d, Attr(strides)'s " platform::errors::InvalidArgument(
"size is %d, Attr(ksize) is [%s], Attr(strides)is [%s].", "the size of Attr(ksize) and Attr(strides) in "
ksize.size(), strides.size(), framework::make_ddim(ksize), "Op(pool) must be equal. "
framework::make_ddim(strides)); "But received: Attr(ksize)'s size is %d, Attr(strides)'s "
"size is %d, Attr(ksize) is [%s], Attr(strides)is [%s].",
ksize.size(), strides.size(), framework::make_ddim(ksize),
framework::make_ddim(strides)));
// MKL-DNN Kernels are using NCHW order of dims description // MKL-DNN Kernels are using NCHW order of dims description
// so we ignore data_format consideration for MKL-DNN kernel // so we ignore data_format consideration for MKL-DNN kernel
...@@ -182,9 +190,12 @@ framework::OpKernelType PoolOp::GetKernelTypeForVar( ...@@ -182,9 +190,12 @@ framework::OpKernelType PoolOp::GetKernelTypeForVar(
} }
void PoolOpGrad::InferShape(framework::InferShapeContext* ctx) const { void PoolOpGrad::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, "Input(X) must not be null."); PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true,
platform::errors::NotFound(
"Input(X) of Pool Gradoperator is not found."));
PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("X")), true, PADDLE_ENFORCE_EQ(ctx->HasOutput(framework::GradVarName("X")), true,
"Input(X@GRAD) should not be null."); platform::errors::NotFound(
"Input(X@GRAD) of Pool Gradoperator is not found."));
ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X")); ctx->SetOutputDim(framework::GradVarName("X"), ctx->GetInputDim("X"));
} }
...@@ -210,7 +221,8 @@ framework::OpKernelType PoolOpGrad::GetExpectedKernelType( ...@@ -210,7 +221,8 @@ framework::OpKernelType PoolOpGrad::GetExpectedKernelType(
auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X"); auto input_data_type = OperatorWithKernel::IndicateVarDataType(ctx, "X");
if (input_data_type == framework::proto::VarType::FP16) { if (input_data_type == framework::proto::VarType::FP16) {
PADDLE_ENFORCE_EQ(library_, framework::LibraryType::kCUDNN, PADDLE_ENFORCE_EQ(library_, framework::LibraryType::kCUDNN,
"float16 can only be used when CUDNN is used"); platform::errors::InvalidArgument(
"Float16 can only be used when CUDNN is used"));
} }
return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_, return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_,
library_); library_);
......
...@@ -81,9 +81,11 @@ inline void UpdatePadding(std::vector<T>* paddings, const bool global_pooling, ...@@ -81,9 +81,11 @@ inline void UpdatePadding(std::vector<T>* paddings, const bool global_pooling,
paddings->insert(paddings->begin() + 2 * i + 1, copy_pad); paddings->insert(paddings->begin() + 2 * i + 1, copy_pad);
} }
} else { } else {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(data_dims.size() * 2, paddings->size(),
data_dims.size() * 2, paddings->size(), platform::errors::InvalidArgument(
"Paddings size should be the same or twice as the pooling size."); "Paddings size %d should be the same or twice as the "
"pooling size %d.",
paddings->size(), data_dims.size() * 2));
} }
// when padding_algorithm is "VALID" or "SAME" // when padding_algorithm is "VALID" or "SAME"
...@@ -200,7 +202,10 @@ class PoolKernel : public framework::OpKernel<T> { ...@@ -200,7 +202,10 @@ class PoolKernel : public framework::OpKernel<T> {
pool_process, exclusive, adaptive, out); pool_process, exclusive, adaptive, out);
} }
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: {
PADDLE_THROW(platform::errors::InvalidArgument(
"Pool op only supports 2D and 3D input."));
}
} }
} }
}; };
...@@ -287,7 +292,10 @@ class PoolGradKernel : public framework::OpKernel<T> { ...@@ -287,7 +292,10 @@ class PoolGradKernel : public framework::OpKernel<T> {
adaptive, in_x_grad); adaptive, in_x_grad);
} }
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: {
PADDLE_THROW(platform::errors::InvalidArgument(
"Pool op only supports 2D and 3D input."));
}
} }
} }
} }
......
...@@ -46,8 +46,11 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { ...@@ -46,8 +46,11 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings"); std::vector<int> paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
bool adaptive = ctx->Attrs().Get<bool>("adaptive"); bool adaptive = ctx->Attrs().Get<bool>("adaptive");
PADDLE_ENFORCE(in_x_dims.size() == 4 || in_x_dims.size() == 5, PADDLE_ENFORCE(
"Pooling intput should be 4-D or 5-D tensor."); in_x_dims.size() == 4 || in_x_dims.size() == 5,
platform::errors::InvalidArgument("Pooling intput should be 4-D or 5-D "
"tensor but received %dD-Tensor",
in_x_dims.size()));
if (ctx->Attrs().Get<bool>("global_pooling")) { if (ctx->Attrs().Get<bool>("global_pooling")) {
ksize.resize(static_cast<size_t>(in_x_dims.size()) - 2); ksize.resize(static_cast<size_t>(in_x_dims.size()) - 2);
...@@ -57,16 +60,21 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel { ...@@ -57,16 +60,21 @@ class MaxPoolWithIndexOp : public framework::OperatorWithKernel {
} }
} }
PADDLE_ENFORCE_EQ(in_x_dims.size() - ksize.size(), 2U, PADDLE_ENFORCE_EQ(
platform::errors::InvalidArgument( in_x_dims.size() - ksize.size(), 2U,
"Input size and pooling size should be consistent.")); platform::errors::InvalidArgument(
PADDLE_ENFORCE_EQ(ksize.size(), strides.size(), "The input size %d minus the kernel size %d should equal to 2.",
platform::errors::InvalidArgument( in_x_dims.size(), ksize.size()));
"Strides size and pooling size should be the same.")); PADDLE_ENFORCE_EQ(
ksize.size(), strides.size(),
platform::errors::InvalidArgument(
"Strides size %d and pooling size %d should be the same.",
strides.size(), ksize.size()));
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
ksize.size(), paddings.size(), ksize.size(), paddings.size(),
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Paddings size and pooling size should be the same.")); "Paddings size %d and pooling size %d should be the same.",
paddings.size(), ksize.size()));
std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]}); std::vector<int64_t> output_shape({in_x_dims[0], in_x_dims[1]});
if (adaptive) { if (adaptive) {
......
...@@ -61,7 +61,10 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T1> { ...@@ -61,7 +61,10 @@ class MaxPoolWithIndexKernel : public framework::OpKernel<T1> {
pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, adaptive, out, pool3d_forward(dev_ctx, *in_x, ksize, strides, paddings, adaptive, out,
mask); mask);
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: {
PADDLE_THROW(platform::errors::InvalidArgument(
"Pool op only supports 2D and 3D input."));
}
} }
} }
}; };
...@@ -106,7 +109,10 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T1> { ...@@ -106,7 +109,10 @@ class MaxPoolWithIndexGradKernel : public framework::OpKernel<T1> {
pool3d_backward(device_ctx, *out_grad, *mask, ksize, strides, pool3d_backward(device_ctx, *out_grad, *mask, ksize, strides,
paddings, adaptive, in_x_grad); paddings, adaptive, in_x_grad);
} break; } break;
default: { PADDLE_THROW("Pool op only supports 2D and 3D input."); } default: {
PADDLE_THROW(platform::errors::InvalidArgument(
"Pool op only supports 2D and 3D input."));
}
} }
} }
} }
......
...@@ -176,22 +176,31 @@ class GPUPSROIPoolOpKernel : public framework::OpKernel<T> { ...@@ -176,22 +176,31 @@ class GPUPSROIPoolOpKernel : public framework::OpKernel<T> {
int height = in_dims[2]; int height = in_dims[2];
int width = in_dims[3]; int width = in_dims[3];
PADDLE_ENFORCE_EQ(input_channels, PADDLE_ENFORCE_EQ(
output_channels * pooled_height * pooled_width, input_channels, output_channels * pooled_height * pooled_width,
"the channels of input X should equal the product of " platform::errors::InvalidArgument(
"output_channels x pooled_height x pooled_width"); "The channels %d of input X should equal the product of "
"output_channels %d x pooled_height %d x pooled_width %d.",
input_channels, output_channels, pooled_height, pooled_width));
int rois_num = rois->dims()[0]; int rois_num = rois->dims()[0];
if (rois_num == 0) return; if (rois_num == 0) return;
auto rois_lod = rois->lod().back(); auto rois_lod = rois->lod().back();
int rois_batch_size = rois_lod.size() - 1; int rois_batch_size = rois_lod.size() - 1;
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(rois_batch_size, batch_size,
rois_batch_size, batch_size, platform::errors::InvalidArgument(
"The rois_batch_size and input(X) batch_size must be the same."); "The batch size of input(ROIs) and input(X) must be "
"the same but received batch size of input(ROIs) and "
"input(X) is %d and %d respectively.",
rois_batch_size, batch_size));
int rois_num_with_lod = rois_lod[rois_batch_size]; int rois_num_with_lod = rois_lod[rois_batch_size];
PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod, PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod,
"The rois_num from input and lod must be the same."); platform::errors::InvalidArgument(
"The number of rois from input(ROIs) and its LOD "
"must be the same. Received rois %d of input(ROIs) "
"but the number of rois %d from its LOD is %d",
rois_num, rois_num_with_lod));
// set rois batch id // set rois batch id
framework::Tensor rois_batch_id_list; framework::Tensor rois_batch_id_list;
......
...@@ -160,9 +160,14 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> { ...@@ -160,9 +160,14 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
if (ctx.HasInput("RoisNum")) { if (ctx.HasInput("RoisNum")) {
auto* rois_num_t = ctx.Input<Tensor>("RoisNum"); auto* rois_num_t = ctx.Input<Tensor>("RoisNum");
int rois_batch_size = rois_num_t->numel(); int rois_batch_size = rois_num_t->numel();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
rois_batch_size, batch_size, rois_batch_size, batch_size,
"The rois_batch_size and imgs batch_size must be the same."); platform::errors::InvalidArgument(
"The batch size of input(ROIs) and input(X) must be the same but "
"received batch size of input(ROIs) and input(X) is %d and %d "
"respectively.",
rois_batch_size, batch_size));
std::vector<int> rois_num_list(rois_batch_size); std::vector<int> rois_num_list(rois_batch_size);
memory::Copy(cplace, rois_num_list.data(), gplace, memory::Copy(cplace, rois_num_list.data(), gplace,
rois_num_t->data<int>(), sizeof(int) * rois_batch_size, 0); rois_num_t->data<int>(), sizeof(int) * rois_batch_size, 0);
...@@ -178,10 +183,19 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> { ...@@ -178,10 +183,19 @@ class GPUROIPoolOpKernel : public framework::OpKernel<T> {
int rois_batch_size = rois_lod.size() - 1; int rois_batch_size = rois_lod.size() - 1;
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
rois_batch_size, batch_size, rois_batch_size, batch_size,
"The rois_batch_size and imgs batch_size must be the same."); platform::errors::InvalidArgument(
"The batch size of input(ROIs) and input(X) must be the same but "
"received batch size of input(ROIs) and input(X) is %d and %d "
"respectively.",
rois_batch_size, batch_size));
int rois_num_with_lod = rois_lod[rois_batch_size]; int rois_num_with_lod = rois_lod[rois_batch_size];
PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod, PADDLE_ENFORCE_EQ(rois_num, rois_num_with_lod,
"The rois_num from input and lod must be the same."); platform::errors::InvalidArgument(
"The number of rois from input(ROIs) and its LOD "
"must be the same. Received rois %d of input(ROIs) "
"but the number of rois %d from its LOD is %d",
rois_num, rois_num_with_lod));
for (int n = 0; n < rois_batch_size; ++n) { for (int n = 0; n < rois_batch_size; ++n) {
for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) { for (size_t i = rois_lod[n]; i < rois_lod[n + 1]; ++i) {
roi_batch_id_data[i] = n; roi_batch_id_data[i] = n;
......
...@@ -30,6 +30,7 @@ __all__ = [ ...@@ -30,6 +30,7 @@ __all__ = [
] ]
fleet = Fleet() fleet = Fleet()
_final_strategy = fleet._final_strategy
init = fleet.init init = fleet.init
is_first_worker = fleet.is_first_worker is_first_worker = fleet.is_first_worker
worker_index = fleet.worker_index worker_index = fleet.worker_index
......
...@@ -1244,8 +1244,7 @@ class DistributedStrategy(object): ...@@ -1244,8 +1244,7 @@ class DistributedStrategy(object):
if getattr(self.strategy, f.name): if getattr(self.strategy, f.name):
draws += border + "\n" draws += border + "\n"
draws += h1_format.format( draws += h1_format.format(
"{} = True, please check {}_configs".format( "{}=True <-> {}_configs".format(f.name, f.name))
f.name, f.name))
draws += line + "\n" draws += line + "\n"
my_configs = getattr(self.strategy, my_configs = getattr(self.strategy,
f.name + "_configs") f.name + "_configs")
......
...@@ -119,6 +119,8 @@ class Fleet(object): ...@@ -119,6 +119,8 @@ class Fleet(object):
self.strategy_compiler = None self.strategy_compiler = None
self._is_collective = False self._is_collective = False
self._runtime_handle = None self._runtime_handle = None
self._util = None
self._context = {}
def init(self, role_maker=None, is_collective=False): def init(self, role_maker=None, is_collective=False):
""" """
...@@ -233,7 +235,7 @@ class Fleet(object): ...@@ -233,7 +235,7 @@ class Fleet(object):
Returns: Returns:
int: worker numbers int: worker numbers
Examples: Examples:
.. code-block:: python .. code-block:: python
...@@ -569,8 +571,9 @@ class Fleet(object): ...@@ -569,8 +571,9 @@ class Fleet(object):
if strategy == None: if strategy == None:
strategy = DistributedStrategy() strategy = DistributedStrategy()
self.user_defined_strategy = strategy
self.valid_strategy = None self._user_defined_strategy = copy.deepcopy(strategy)
self._context = {}
return self return self
@dygraph_only @dygraph_only
...@@ -909,6 +912,15 @@ class Fleet(object): ...@@ -909,6 +912,15 @@ class Fleet(object):
# imitate target optimizer retrieval # imitate target optimizer retrieval
return self.user_defined_optimizer.clear_grad() return self.user_defined_optimizer.clear_grad()
def _final_strategy(self):
if "valid_strategy" not in self._context:
print(
"WARNING: You may need to call minimize function before this function is called"
)
return {}
else:
return self._context["valid_strategy"]
def minimize(self, def minimize(self,
loss, loss,
startup_program=None, startup_program=None,
...@@ -958,12 +970,15 @@ class Fleet(object): ...@@ -958,12 +970,15 @@ class Fleet(object):
# for more examples, please reference https://github.com/PaddlePaddle/FleetX # for more examples, please reference https://github.com/PaddlePaddle/FleetX
""" """
context = {}
context["user_defined_strategy"] = copy.deepcopy(
self._user_defined_strategy)
if paddle.fluid.framework.in_dygraph_mode(): if paddle.fluid.framework.in_dygraph_mode():
# imitate target optimizer retrieval # imitate target optimizer retrieval
target_opt = self.user_defined_optimizer target_opt = self.user_defined_optimizer
self._context = context
return target_opt.minimize(loss) return target_opt.minimize(loss)
context = {}
# cache original feed forward program # cache original feed forward program
self.origin_main_program = loss.block.program self.origin_main_program = loss.block.program
context["origin_main_program"] = self.origin_main_program context["origin_main_program"] = self.origin_main_program
...@@ -984,17 +999,19 @@ class Fleet(object): ...@@ -984,17 +999,19 @@ class Fleet(object):
MetaOptimizerFactory()._get_valid_meta_optimizers( MetaOptimizerFactory()._get_valid_meta_optimizers(
self.user_defined_optimizer) self.user_defined_optimizer)
context["user_defined_strategy"] = copy.copy(self.user_defined_strategy) context["user_defined_strategy"] = copy.deepcopy(
self._user_defined_strategy)
copy_user_defined_strategy = copy.deepcopy(self._user_defined_strategy)
# trigger the auto-parallel in very strict condition # trigger the auto-parallel in very strict condition
# strategy = DistributedStrategy() # strategy = DistributedStrategy()
# strategy.auto = True # strategy.auto = True
# optimizer = paddle.optimizer.SGD(learning_rate=0.1) # optimizer = paddle.optimizer.SGD(learning_rate=0.1)
# optimizer = fleet.distributed_optimizer(optimizer, strategy) # optimizer = fleet.distributed_optimizer(optimizer, strategy)
if self.user_defined_strategy._is_strict_auto(): if copy_user_defined_strategy._is_strict_auto():
# turn on all the strategy for each optimizer # turn on all the strategy for each optimizer
for opt in distributed_optimizer_list: for opt in distributed_optimizer_list:
opt._enable_strategy(self.user_defined_strategy, context) opt._enable_strategy(copy_user_defined_strategy, context)
valid_optimizer_list = [] valid_optimizer_list = []
valid_graph_optimizer_list = [] valid_graph_optimizer_list = []
...@@ -1003,7 +1020,7 @@ class Fleet(object): ...@@ -1003,7 +1020,7 @@ class Fleet(object):
for opt in distributed_optimizer_list: for opt in distributed_optimizer_list:
opt._set_basic_info(loss, self._role_maker, opt._set_basic_info(loss, self._role_maker,
self.user_defined_optimizer, self.user_defined_optimizer,
self.user_defined_strategy) copy_user_defined_strategy)
if opt._can_apply() and not opt._is_graph_out(): if opt._can_apply() and not opt._is_graph_out():
valid_optimizer_list.append(opt) valid_optimizer_list.append(opt)
elif opt._can_apply() and opt._is_graph_out(): elif opt._can_apply() and opt._is_graph_out():
...@@ -1014,13 +1031,15 @@ class Fleet(object): ...@@ -1014,13 +1031,15 @@ class Fleet(object):
meta_optimizer, graph_optimizer = \ meta_optimizer, graph_optimizer = \
self.strategy_compiler.generate_optimizer( self.strategy_compiler.generate_optimizer(
loss, self._role_maker, self.user_defined_optimizer, loss, self._role_maker, self.user_defined_optimizer,
self.user_defined_strategy, valid_optimizer_list, copy_user_defined_strategy, valid_optimizer_list,
valid_graph_optimizer_list) valid_graph_optimizer_list)
valid_strategy = self.strategy_compiler._get_valid_strategy( valid_strategy = self.strategy_compiler._get_valid_strategy(
self.user_defined_strategy, can_not_apply_optimizer_list) copy_user_defined_strategy, can_not_apply_optimizer_list)
context["valid_strategy"] = copy.deepcopy(valid_strategy)
context["valid_strategy"] = valid_strategy self._context = context
self.valid_strategy = valid_strategy self.valid_strategy = valid_strategy
self.valid_strategy._enable_env() self.valid_strategy._enable_env()
......
...@@ -1291,17 +1291,17 @@ def append_backward(loss, ...@@ -1291,17 +1291,17 @@ def append_backward(loss,
It will be automatically invoked by the optimizer's `minimize` function. It will be automatically invoked by the optimizer's `minimize` function.
Parameters: Parameters:
loss( :ref:`api_guide_Variable_en` ): The loss variable of the network. loss(Tensor): The loss Tensor of the network.
parameter_list(list[Variable|str], optional): List of Parameters or Parameter.names parameter_list(list[Tensor|str], optional): List of Parameters or Parameter.names
that need to be updated by optimizers. that need to be updated by optimizers.
If it is None, all parameters If it is None, all parameters
will be updated. will be updated.
Default: None. Default: None.
no_grad_set(set[Variable|str], optional): Set of Variables or Variable.names in the :ref:`api_guide_Block_en` 0 whose gradients no_grad_set(set[Tensor|str], optional): Set of Tensors or Tensor.names in the :ref:`api_guide_Block_en` 0 whose gradients
should be ignored. All variables with should be ignored. All Tensors with
`stop_gradient=True` from all blocks will `stop_gradient=True` from all blocks will
be automatically added into this set. be automatically added into this set.
If this parameter is not None, the Variables or Variable.names in this set will be added to the default set. If this parameter is not None, the Tensors or Tensor.names in this set will be added to the default set.
Default: None. Default: None.
callbacks(list[callable object], optional): List of callback functions. callbacks(list[callable object], optional): List of callback functions.
The callbacks are used for The callbacks are used for
...@@ -1312,70 +1312,73 @@ def append_backward(loss, ...@@ -1312,70 +1312,73 @@ def append_backward(loss,
new gradient operator is added new gradient operator is added
into the program. The callable into the program. The callable
object must have two input object must have two input
parameters: 'block' and 'context'. parameters: ``block`` and ``context`` .
The 'block' is the :ref:`api_guide_Block_en` which The ``block`` is the :ref:`api_guide_Block_en` which
the new gradient operator will the new gradient operator will
be added to. The 'context' is a be added to. The ``context`` is a
map, whose keys are gradient map, whose keys are gradient
variable names and values are Tensor names and values are
corresponding original :ref:`api_guide_Variable_en` . corresponding original :ref:`api_guide_tensor_en` .
In addition to this, the 'context' In addition to this, the ``context``
has another special key-value pair: has another special key-value pair:
the key is string '__current_op_desc__' the key is string ``__current_op_desc__``
and the value is the op_desc of the and the value is the op_desc of the
gradient operator who has just gradient operator who has just
triggered the callable object. triggered the callable object.
Default: None. Default: None.
Returns: Returns:
list of tuple ( :ref:`api_guide_Variable_en` , :ref:`api_guide_Variable_en` ): Pairs of parameter and its corresponding gradients. list of tuple ( :ref:`api_guide_tensor_en` , :ref:`api_guide_tensor_en` ): Pairs of parameter and its corresponding gradients.
The key is the parameter and the value is gradient variable. The key is the parameter and the value is gradient Tensor.
Raises: Raises:
AssertionError: If `loss` is not an instance of Variable. AssertionError: If ``loss`` is not an instance of Tensor.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
import paddle.nn.functional as F
x = fluid.data(name='x', shape=[None, 13], dtype='int64') paddle.enable_static()
y = fluid.data(name='y', shape=[None, 1], dtype='float32')
x_emb = fluid.embedding(x, size=[100, 256]) x = paddle.static.data(name='x', shape=[None, 13], dtype='int64')
y_predict = fluid.layers.fc(input=x_emb, size=1, act=None, name='my_fc') y = paddle.static.data(name='y', shape=[None, 1], dtype='float32')
loss = fluid.layers.square_error_cost(input=y_predict, label=y) x_emb = paddle.static.nn.embedding(x, size=[100, 256])
avg_loss = fluid.layers.mean(loss) y_predict = paddle.static.nn.fc(input=x_emb, size=1, act=None, name='my_fc')
loss = F.square_error_cost(input=y_predict, label=y)
avg_loss = paddle.mean(loss)
# Get all weights in main_program, not include bias. # Get all weights in main_program, not include bias.
all_weights = [param for param in fluid.default_main_program().block(0).all_parameters() if 'w_' in param.name] all_weights = [param for param in paddle.static.default_main_program().block(0).all_parameters() if 'w_' in param.name]
all_weights_name = [w.name for w in all_weights] all_weights_name = [w.name for w in all_weights]
# return all param_grads needed to be updated if parameter_list set default None. # return all param_grads needed to be updated if parameter_list set default None.
p_g_list1 = fluid.backward.append_backward(loss=avg_loss) p_g_list1 = paddle.static.append_backward(loss=avg_loss)
# output: [(embedding_0.w_0, embedding_0.w_0@GRAD), (my_fc.w_0, my_fc.w_0@GRAD), (my_fc.b_0, my_fc.b_0@GRAD)] # output: [(embedding_0.w_0, embedding_0.w_0@GRAD), (my_fc.w_0, my_fc.w_0@GRAD), (my_fc.b_0, my_fc.b_0@GRAD)]
# return the param_grads corresponding to parameter_list that can be list of param (Variable). # return the param_grads corresponding to parameter_list that can be list of param (Tensor).
p_g_list2 = fluid.backward.append_backward(loss=avg_loss, parameter_list=all_weights) p_g_list2 = paddle.static.append_backward(loss=avg_loss, parameter_list=all_weights)
# output: [(embedding_0.w_0, embedding_0.w_0@GRAD), (my_fc.w_0, my_fc.w_0@GRAD)] # output: [(embedding_0.w_0, embedding_0.w_0@GRAD), (my_fc.w_0, my_fc.w_0@GRAD)]
# parameter_list can be list of param.name (str). # parameter_list can be list of param.name (str).
p_g_list3 = fluid.backward.append_backward(loss=avg_loss, parameter_list=all_weights_name) p_g_list3 = paddle.static.append_backward(loss=avg_loss, parameter_list=all_weights_name)
# output: [(embedding_0.w_0, embedding_0.w_0@GRAD), (my_fc.w_0, my_fc.w_0@GRAD)] # output: [(embedding_0.w_0, embedding_0.w_0@GRAD), (my_fc.w_0, my_fc.w_0@GRAD)]
# no_grad_set can be set of Variables that means grad will be cut off from these Variables. # no_grad_set can be set of Tensors that means grad will be cut off from these Tensors.
p_g_list4 = fluid.backward.append_backward(loss=avg_loss, no_grad_set=set([x_emb])) p_g_list4 = paddle.static.append_backward(loss=avg_loss, no_grad_set=set([x_emb]))
# output: [(my_fc.w_0, my_fc.w_0@GRAD), (my_fc.b_0, my_fc.b_0@GRAD)] # output: [(my_fc.w_0, my_fc.w_0@GRAD), (my_fc.b_0, my_fc.b_0@GRAD)]
# no_grad_set can be set of Variable.name when the Variable is created inside layers and can't be specified explicitly. # no_grad_set can be set of Tensor.name when the Tensor is created inside layers and can't be specified explicitly.
p_g_list5 = fluid.backward.append_backward(loss=avg_loss, no_grad_set=set(['my_fc.b_0'])) p_g_list5 = paddle.static.append_backward(loss=avg_loss, no_grad_set=set(['my_fc.b_0']))
# output: [(embedding_0.w_0, embedding_0.w_0@GRAD), (my_fc.w_0, my_fc.w_0@GRAD)] # output: [(embedding_0.w_0, embedding_0.w_0@GRAD), (my_fc.w_0, my_fc.w_0@GRAD)]
# return [] because all param_grads are filtered by no_grad_set. # return [] because all param_grads are filtered by no_grad_set.
p_g_list6 = fluid.backward.append_backward(loss=avg_loss, parameter_list=all_weights, no_grad_set=set(all_weights)) p_g_list6 = paddle.static.append_backward(loss=avg_loss, parameter_list=all_weights, no_grad_set=set(all_weights))
""" """
check_type(loss, 'loss', framework.Variable, check_type(loss, 'loss', framework.Variable,
'fluid.backward.append_backward') 'paddle.static.append_backward')
if loss.op is None: if loss.op is None:
# the loss is from a cloned program. Find loss op manually. # the loss is from a cloned program. Find loss op manually.
...@@ -1387,7 +1390,7 @@ def append_backward(loss, ...@@ -1387,7 +1390,7 @@ def append_backward(loss,
if callbacks is not None: if callbacks is not None:
check_type(callbacks, 'callbacks', list, check_type(callbacks, 'callbacks', list,
'fluid.backward.append_backward') 'paddle.static.append_backward')
program = loss.block.program program = loss.block.program
root_block = program.block(0) root_block = program.block(0)
...@@ -1727,21 +1730,21 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None): ...@@ -1727,21 +1730,21 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None):
Backpropagate the gradients of targets to inputs. Backpropagate the gradients of targets to inputs.
Args: Args:
targets(Variable|list[Variable]): The target variables targets(Tensor|list[Tensor]): The target Tensors
inputs(Variable|list[Variable]): The input variables inputs(Tensor|list[Tensor]): The input Tensors
target_gradients (Variable|list[Variable], optional): The gradient variables target_gradients (Tensor|list[Tensor], optional): The gradient Tensors
of targets which has the same shape with targets, If None, ones will of targets which has the same shape with targets, If None, ones will
be created for them. be created for them.
no_grad_set(set[Variable|str], optional): Set of Variables or Variable.names in the :ref:`api_guide_Block_en` 0 whose gradients no_grad_set(set[Tensor|str], optional): Set of Tensors or Tensor.names in the :ref:`api_guide_Block_en` 0 whose gradients
should be ignored. All variables with should be ignored. All Tensors with
`stop_gradient=True` from all blocks will `stop_gradient=True` from all blocks will
be automatically added into this set. be automatically added into this set.
If this parameter is not None, the Variables or Variable.names in this set will be added to the default set. If this parameter is not None, the Tensors or Tensor.names in this set will be added to the default set.
Default: None. Default: None.
Return: Return:
(list[Variable]): A list of gradients for inputs (list[Tensor]): A list of gradients for inputs
If an input does not affect targets, the corresponding gradient variable If an input does not affect targets, the corresponding gradient Tensor
will be None will be None
""" """
targets = _as_list(targets) targets = _as_list(targets)
...@@ -1865,41 +1868,42 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None): ...@@ -1865,41 +1868,42 @@ def gradients(targets, inputs, target_gradients=None, no_grad_set=None):
Backpropagate the gradients of targets to inputs. Backpropagate the gradients of targets to inputs.
Args: Args:
targets (Variable|list[Variable]): The target variables. targets (Tensor|list[Tensor]): The target Tensors.
inputs (Variable|list[Variable]): The input variables. inputs (Tensor|list[Tensor]): The input Tensors.
target_gradients (Variable|list[Variable], optional): The gradient variables target_gradients (Tensor|list[Tensor], optional): The gradient Tensor
of targets which has the same shape with targets, If None, ones will of targets which has the same shape with targets, If None, ones will
be created for them. be created for them.
no_grad_set (set[Variable|str], optional): Set of Variables or Variable.names in the :ref:`api_guide_Block_en` 0 whose gradients no_grad_set (set[Tensor|str], optional): Set of Tensors or Tensor.names in the :ref:`api_guide_Block_en` 0 whose gradients
should be ignored. All variables with `stop_gradient=True` from all blocks will should be ignored. All Tensors with ``stop_gradient=True`` from all blocks will
be automatically added into this set. If this parameter is not None, the Variables or Variable.names be automatically added into this set. If this parameter is not None, the Tensors or Tensor.names
in this set will be added to the default set. Default: None. in this set will be added to the default set. Default: None.
Return: Return:
(list[Variable]): A list of gradients for inputs (list[Tensor]): A list of gradients for inputs
If an input does not affect targets, the corresponding gradient variable If an input does not affect targets, the corresponding gradient Tensor
will be None. will be None.
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
import paddle.nn.functional as F
paddle.enable_static()
x = fluid.data(name='x', shape=[None,2,8,8], dtype='float32') x = paddle.static.data(name='x', shape=[None, 2, 8, 8], dtype='float32')
x.stop_gradient=False x.stop_gradient=False
y = fluid.layers.conv2d(x, 4, 1, bias_attr=False) y = paddle.static.nn.conv2d(x, 4, 1, bias_attr=False)
y = fluid.layers.relu(y) y = F.relu(y)
y = fluid.layers.conv2d(y, 4, 1, bias_attr=False) z = paddle.static.gradients([y], x)
y = fluid.layers.relu(y) print(z) # [var x@GRAD : fluid.VarType.LOD_TENSOR.shape(-1L, 2L, 8L, 8L).astype(VarType.FP32)]
z = fluid.gradients([y], x)
print(z)
""" """
check_type(targets, 'targets', (framework.Variable, list), check_type(targets, 'targets', (framework.Variable, list),
'fluid.backward.gradients') 'paddle.static.gradients')
check_type(inputs, 'inputs', (framework.Variable, list), check_type(inputs, 'inputs', (framework.Variable, list),
'fluid.backward.gradients') 'paddle.static.gradients')
check_type(target_gradients, 'target_gradients', ( check_type(target_gradients, 'target_gradients', (
framework.Variable, list, type(None)), 'fluid.backward.gradients') framework.Variable, list, type(None)), 'paddle.static.gradients')
outs = calc_gradient(targets, inputs, target_gradients, no_grad_set) outs = calc_gradient(targets, inputs, target_gradients, no_grad_set)
return _as_list(outs) return _as_list(outs)
...@@ -506,11 +506,12 @@ def name_scope(prefix=None): ...@@ -506,11 +506,12 @@ def name_scope(prefix=None):
""" """
:api_attr: Static Graph :api_attr: Static Graph
Generate hierarchical name prefix for the operators. Generate hierarchical name prefix for the operators in Static Graph.
Note: Note:
This should only used for debugging and visualization purpose. This should only used for debugging and visualization purpose.
Don't use it for serious analysis such as graph/program transformations. Don't use it for serious analysis such as graph/program transformations.
Don't use it in dygraph, since it will cause memory leak.
Args: Args:
prefix(str, optional): prefix. Default is none. prefix(str, optional): prefix. Default is none.
...@@ -518,21 +519,22 @@ def name_scope(prefix=None): ...@@ -518,21 +519,22 @@ def name_scope(prefix=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
with fluid.name_scope("s1"): paddle.enable_static()
a = fluid.data(name='data', shape=[None, 1], dtype='int32') with paddle.static.name_scope("s1"):
a = paddle.data(name='data', shape=[None, 1], dtype='int32')
b = a + 1 b = a + 1
with fluid.name_scope("s2"): with paddle.static.name_scope("s2"):
c = b * 1 c = b * 1
with fluid.name_scope("s3"): with paddle.static.name_scope("s3"):
d = c / 1 d = c / 1
with fluid.name_scope("s1"): with paddle.static.name_scope("s1"):
f = fluid.layers.pow(d, 2.0) f = paddle.tensor.pow(d, 2.0)
with fluid.name_scope("s4"): with paddle.static.name_scope("s4"):
g = f - 1 g = f - 1
# Op are created in the default main program. # Op are created in the default main program.
for op in fluid.default_main_program().block(0).ops: for op in paddle.static.default_main_program().block(0).ops:
# elementwise_add is created in /s1/ # elementwise_add is created in /s1/
if op.type == 'elementwise_add': if op.type == 'elementwise_add':
assert op.desc.attr("op_namescope") == '/s1/' assert op.desc.attr("op_namescope") == '/s1/'
...@@ -5396,13 +5398,13 @@ def program_guard(main_program, startup_program=None): ...@@ -5396,13 +5398,13 @@ def program_guard(main_program, startup_program=None):
""" """
:api_attr: Static Graph :api_attr: Static Graph
Change the global main program and startup program with `"with"` statement. Change the global main program and startup program with ``with`` statement.
Layer functions in the Python `"with"` block will append operators and Layer functions in the Python ``with`` block will append operators and
variables to the new main programs. Tensors to the new main programs.
Args: Args:
main_program(Program): New main program inside `"with"` statement. main_program(Program): New main program inside ``with`` statement.
startup_program(Program, optional): New startup program inside `"with"` startup_program(Program, optional): New startup program inside ``with``
statement. :code:`None` means not changing startup program, statement. :code:`None` means not changing startup program,
default_startup_program is still used. default_startup_program is still used.
Default: None. Default: None.
...@@ -5410,13 +5412,14 @@ def program_guard(main_program, startup_program=None): ...@@ -5410,13 +5412,14 @@ def program_guard(main_program, startup_program=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
main_program = fluid.Program() paddle.enable_static()
startup_program = fluid.Program() main_program = paddle.static.Program()
with fluid.program_guard(main_program, startup_program): startup_program = paddle.static.Program()
data = fluid.data(name='image', shape=[None, 784, 784], dtype='float32') with paddle.static.program_guard(main_program, startup_program):
hidden = fluid.layers.fc(input=data, size=10, act='relu') data = paddle.static.data(name='image', shape=[None, 784, 784], dtype='float32')
hidden = paddle.static.nn.fc(input=data, size=10, act='relu')
Notes: The temporary :code:`Program` can be used if the user does not need Notes: The temporary :code:`Program` can be used if the user does not need
to construct either of startup program or main program. to construct either of startup program or main program.
...@@ -5424,20 +5427,22 @@ def program_guard(main_program, startup_program=None): ...@@ -5424,20 +5427,22 @@ def program_guard(main_program, startup_program=None):
Examples: Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import paddle
main_program = fluid.Program() paddle.enable_static()
# does not care about startup program. Just pass a temporary value. main_program = paddle.static.Program()
with fluid.program_guard(main_program, fluid.Program()): # does not care about startup program. Just pass a temporary value.
data = fluid.data(name='image', shape=[None, 784, 784], dtype='float32') with paddle.static.program_guard(main_program, paddle.static.Program()):
data = paddle.static.data(name='image', shape=[None, 784, 784], dtype='float32')
""" """
from .data_feeder import check_type from .data_feeder import check_type
check_type(main_program, 'main_program', Program, 'fluid.program_guard') check_type(main_program, 'main_program', Program,
'paddle.static.program_guard')
main_program = switch_main_program(main_program) main_program = switch_main_program(main_program)
if startup_program is not None: if startup_program is not None:
check_type(startup_program, 'startup_program', Program, check_type(startup_program, 'startup_program', Program,
'fluid.program_guard') 'paddle.static.program_guard')
startup_program = switch_startup_program(startup_program) startup_program = switch_startup_program(startup_program)
try: try:
yield yield
......
...@@ -9287,8 +9287,8 @@ def pad2d(input, ...@@ -9287,8 +9287,8 @@ def pad2d(input,
than height-1. And the width dimension has the same condition. than height-1. And the width dimension has the same condition.
Parameters: Parameters:
input (Variable): The input image with [N, C, H, W] format or [N, H, W, C] format, which is a 4-D Tensor with data type float32. input (Tensor): The input image with [N, C, H, W] format or [N, H, W, C] format, which is a 4-D Tensor with data type float32.
paddings (Variable | List[int32]): The padding size. If padding is a List, it must paddings (Tensor | List[int32]): The padding size. If padding is a List, it must
contain four integers, (padding_top, padding_bottom, padding_left, padding_right). contain four integers, (padding_top, padding_bottom, padding_left, padding_right).
Otherwise, it is a 1-D Tensor with shape [4]. Data type is int32. Otherwise, it is a 1-D Tensor with shape [4]. Data type is int32.
Default is [0, 0, 0, 0]. Default is [0, 0, 0, 0].
...@@ -9304,10 +9304,7 @@ def pad2d(input, ...@@ -9304,10 +9304,7 @@ def pad2d(input,
name (str, optional) : The default value is None. Normally there is no need for name (str, optional) : The default value is None. Normally there is no need for
user to set this property. For more information, please refer to :ref:`api_guide_Name` . user to set this property. For more information, please refer to :ref:`api_guide_Name` .
Returns: a 4-D Tensor padded according to paddings and mode and data type is same as input. Returns: Tensor, a 4-D Tensor padded according to paddings and mode and data type is same as input.
Return Type: Variable
Examples: Examples:
.. code-block:: text .. code-block:: text
...@@ -9340,9 +9337,33 @@ def pad2d(input, ...@@ -9340,9 +9337,33 @@ def pad2d(input,
Code Examples: Code Examples:
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import numpy as np
data = fluid.data(name='data', shape=[None, 3, 32, 32], dtype='float32') import paddle
result = fluid.layers.pad2d(input=data, paddings=[0, 1, 2, 3], mode='reflect') import paddle.nn.functional as F
# example 1
x_shape = (1, 1, 3, 4)
x = np.arange(np.prod(x_shape), dtype=np.float32).reshape(x_shape) + 1
tensor_x = paddle.to_tensor(x)
y = F.pad2d(tensor_x, paddings=[1, 2, 2, 1], pad_value=1, mode='constant')
print(y.numpy())
# [[[[ 1. 1. 1. 1. 1. 1. 1.]
# [ 1. 1. 1. 2. 3. 4. 1.]
# [ 1. 1. 5. 6. 7. 8. 1.]
# [ 1. 1. 9. 10. 11. 12. 1.]
# [ 1. 1. 1. 1. 1. 1. 1.]
# [ 1. 1. 1. 1. 1. 1. 1.]]]]
# example 2
x_shape = (1, 1, 2, 3)
x = np.arange(np.prod(x_shape), dtype=np.float32).reshape(x_shape) + 1
tensor_x = paddle.to_tensor(x)
y = F.pad2d(tensor_x, paddings=[1, 1, 1, 1], mode='reflect')
print(y.numpy())
# [[[[5. 4. 5. 6. 5.]
# [2. 1. 2. 3. 2.]
# [5. 4. 5. 6. 5.]
# [2. 1. 2. 3. 2.]]]]
""" """
check_variable_and_dtype( check_variable_and_dtype(
input, 'input', ['float16', 'float32', 'float64', 'int32', 'int64'], input, 'input', ['float16', 'float32', 'float64', 'int32', 'int64'],
......
...@@ -394,7 +394,8 @@ foreach(TEST_OP ${TEST_OPS}) ...@@ -394,7 +394,8 @@ foreach(TEST_OP ${TEST_OPS})
py_test_modules(${TEST_OP} MODULES ${TEST_OP}) py_test_modules(${TEST_OP} MODULES ${TEST_OP})
endforeach(TEST_OP) endforeach(TEST_OP)
py_test_modules(test_adam_op_multi_thread MODULES test_adam_op ENVS FLAGS_inner_op_parallelism=4) py_test_modules(test_adam_op_multi_thread MODULES test_adam_op ENVS FLAGS_inner_op_parallelism=4)
py_test_modules(test_warpctc_op MODULES test_warpctc_op) # disable test_warpctc_op
# py_test_modules(test_warpctc_op MODULES test_warpctc_op)
py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op ENVS ${GC_ENVS}) py_test_modules(test_bilinear_interp_op MODULES test_bilinear_interp_op ENVS ${GC_ENVS})
py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op ENVS ${GC_ENVS}) py_test_modules(test_nearest_interp_op MODULES test_nearest_interp_op ENVS ${GC_ENVS})
py_test_modules(test_imperative_resnet MODULES test_imperative_resnet ENVS py_test_modules(test_imperative_resnet MODULES test_imperative_resnet ENVS
......
...@@ -60,8 +60,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): ...@@ -60,8 +60,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
self.assertTrue(optimizer.user_defined_strategy.a_sync) self.assertTrue(fleet._final_strategy().a_sync)
a_sync_configs = optimizer.user_defined_strategy.a_sync_configs a_sync_configs = fleet._final_strategy().a_sync_configs
self.assertTrue(a_sync_configs['k_steps'] == 0) self.assertTrue(a_sync_configs['k_steps'] == 0)
......
...@@ -72,8 +72,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): ...@@ -72,8 +72,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
self.assertTrue(optimizer.user_defined_strategy.a_sync) self.assertTrue(fleet._final_strategy().a_sync)
a_sync_configs = optimizer.user_defined_strategy.a_sync_configs a_sync_configs = fleet._final_strategy().a_sync_configs
self.assertTrue(a_sync_configs['k_steps'] == 0) self.assertTrue(a_sync_configs['k_steps'] == 0)
......
...@@ -60,8 +60,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase): ...@@ -60,8 +60,8 @@ class TestFleetGradientMergeMetaOptimizer(unittest.TestCase):
optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
self.assertTrue(optimizer.user_defined_strategy.a_sync) self.assertTrue(fleet._final_strategy().a_sync)
a_sync_configs = optimizer.user_defined_strategy.a_sync_configs a_sync_configs = fleet._final_strategy().a_sync_configs
self.assertTrue(a_sync_configs['k_steps'] == 800) self.assertTrue(a_sync_configs['k_steps'] == 800)
......
...@@ -18,6 +18,8 @@ import unittest ...@@ -18,6 +18,8 @@ import unittest
import paddle import paddle
import os import os
paddle.enable_static()
class TestFleetAMPOptimizer(unittest.TestCase): class TestFleetAMPOptimizer(unittest.TestCase):
def setUp(self): def setUp(self):
...@@ -55,6 +57,8 @@ class TestFleetAMPOptimizer(unittest.TestCase): ...@@ -55,6 +57,8 @@ class TestFleetAMPOptimizer(unittest.TestCase):
optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy) optimizer = fleet.distributed_optimizer(optimizer, strategy=strategy)
optimizer.minimize(avg_cost) optimizer.minimize(avg_cost)
strategy = fleet._final_strategy()
ops = [op.type for op in avg_cost.block.ops] ops = [op.type for op in avg_cost.block.ops]
self.assertIn('cast', ops) self.assertIn('cast', ops)
self.assertIn('check_finite_and_unscale', ops) self.assertIn('check_finite_and_unscale', ops)
......
...@@ -18,6 +18,8 @@ import os ...@@ -18,6 +18,8 @@ import os
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet.base.role_maker as role_maker
paddle.enable_static()
class TestDistributedStrategyAuto(unittest.TestCase): class TestDistributedStrategyAuto(unittest.TestCase):
def setUp(self): def setUp(self):
......
...@@ -167,6 +167,8 @@ class TestFleetDygraph(unittest.TestCase): ...@@ -167,6 +167,8 @@ class TestFleetDygraph(unittest.TestCase):
state_dict = adam.state_dict() state_dict = adam.state_dict()
adam.set_state_dict(state_dict) adam.set_state_dict(state_dict)
final_strategy = fleet._final_strategy()
class TestFleetBaseSingleRunCollective(unittest.TestCase): class TestFleetBaseSingleRunCollective(unittest.TestCase):
def setUp(self): def setUp(self):
......
...@@ -19,6 +19,8 @@ import os ...@@ -19,6 +19,8 @@ import os
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet.base.role_maker as role_maker
paddle.enable_static()
class TestFleetLambMetaOptimizer(unittest.TestCase): class TestFleetLambMetaOptimizer(unittest.TestCase):
def setUp(self): def setUp(self):
......
...@@ -19,6 +19,8 @@ import os ...@@ -19,6 +19,8 @@ import os
import paddle.distributed.fleet as fleet import paddle.distributed.fleet as fleet
import paddle.distributed.fleet.base.role_maker as role_maker import paddle.distributed.fleet.base.role_maker as role_maker
paddle.enable_static()
class TestFleetLarsMetaOptimizer(unittest.TestCase): class TestFleetLarsMetaOptimizer(unittest.TestCase):
def setUp(self): def setUp(self):
......
...@@ -20,6 +20,7 @@ import paddle ...@@ -20,6 +20,7 @@ import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.fluid.core as core import paddle.fluid.core as core
from paddle.static import program_guard, Program
from op_test import OpTest from op_test import OpTest
...@@ -37,7 +38,7 @@ class TestMVOp(OpTest): ...@@ -37,7 +38,7 @@ class TestMVOp(OpTest):
self.check_grad(['X', 'Vec'], 'Out') self.check_grad(['X', 'Vec'], 'Out')
def init_config(self): def init_config(self):
self.x = np.random.random((5, 100)).astype("float64") self.x = np.random.random((2, 100)).astype("float64")
self.vec = np.random.random((100)).astype("float64") self.vec = np.random.random((100)).astype("float64")
...@@ -57,21 +58,36 @@ class TestMVAPI(unittest.TestCase): ...@@ -57,21 +58,36 @@ class TestMVAPI(unittest.TestCase):
paddle.enable_static() paddle.enable_static()
def test_static_graph(self): def test_static_graph(self):
paddle.enable_static() for x_stop_gradient in [False, True]:
for vec_stop_gradient in [False, True]:
paddle.enable_static()
train_program = Program()
startup_program = Program()
self.input_x = np.random.rand(5, 100).astype("float64")
self.input_vec = np.random.rand(100).astype("float64")
with program_guard(train_program, startup_program):
data_x = paddle.static.data(
"x", shape=[5, 100], dtype="float64")
data_vec = paddle.static.data(
"vec", shape=[100], dtype="float64")
data_x.stop_gradient = x_stop_gradient
data_vec.stop_gradient = vec_stop_gradient
result_vec = paddle.mv(data_x, data_vec)
self.input_x = np.random.rand(5, 100).astype("float64") self.place = paddle.CPUPlace()
self.input_vec = np.random.rand(100).astype("float64") exe = paddle.static.Executor(self.place)
res, = exe.run(
data_x = paddle.static.data("x", shape=[5, 100], dtype="float64") feed={"x": self.input_x,
data_vec = paddle.static.data("vec", shape=[100], dtype="float64") "vec": self.input_vec},
result_vec = paddle.mv(data_x, data_vec) fetch_list=[result_vec])
self.place = paddle.CPUPlace() z_expected = np.array(np.dot(self.input_x, self.input_vec))
exe = paddle.static.Executor(self.place) self.assertTrue(np.allclose(res, z_expected))
res, = exe.run(feed={"x": self.input_x,
"vec": self.input_vec},
fetch_list=[result_vec])
z_expected = np.array(np.dot(self.input_x, self.input_vec))
self.assertTrue(np.allclose(res, z_expected))
class TestMVError(unittest.TestCase): class TestMVError(unittest.TestCase):
......
...@@ -30,7 +30,6 @@ from ...fluid.layers import nn, utils ...@@ -30,7 +30,6 @@ from ...fluid.layers import nn, utils
from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.data_feeder import check_variable_and_dtype
from ...fluid.param_attr import ParamAttr from ...fluid.param_attr import ParamAttr
from ...fluid.layer_helper import LayerHelper from ...fluid.layer_helper import LayerHelper
from .common import pad2d
def _is_list_or_tuple(input): def _is_list_or_tuple(input):
......
...@@ -14,24 +14,20 @@ ...@@ -14,24 +14,20 @@
__all__ = [ __all__ = [
'Adadelta', 'AdadeltaOptimizer', 'Adagrad', 'AdagradOptimizer', 'Adam', 'Adadelta', 'AdadeltaOptimizer', 'Adagrad', 'AdagradOptimizer', 'Adam',
'Adamax', 'AdamW', 'DecayedAdagrad', 'DecayedAdagradOptimizer', 'Adamax', 'AdamW', 'DecayedAdagrad', 'DecayedAdagradOptimizer', 'Dpsgd',
'DGCMomentumOptimizer', 'Dpsgd', 'DpsgdOptimizer', 'DpsgdOptimizer', 'ExponentialMovingAverage', 'Ftrl', 'FtrlOptimizer',
'ExponentialMovingAverage', 'Ftrl', 'FtrlOptimizer', 'LambOptimizer', 'LookaheadOptimizer', 'ModelAverage', 'Momentum', 'MomentumOptimizer',
'LarsMomentum', 'LarsMomentumOptimizer', 'LookaheadOptimizer', 'RMSProp', 'SGD', 'SGDOptimizer', 'Optimizer', '_LRScheduler', 'NoamLR',
'ModelAverage', 'Momentum', 'MomentumOptimizer', 'PipelineOptimizer', 'PiecewiseLR', 'NaturalExpLR', 'InverseTimeLR', 'PolynomialLR',
'RecomputeOptimizer', 'RMSProp', 'SGD', 'SGDOptimizer', 'Optimizer', 'LinearLrWarmup', 'ExponentialLR', 'MultiStepLR', 'StepLR', 'LambdaLR',
'_LRScheduler', 'NoamLR', 'PiecewiseLR', 'NaturalExpLR', 'InverseTimeLR', 'ReduceLROnPlateau', 'CosineAnnealingLR'
'PolynomialLR', 'LinearLrWarmup', 'ExponentialLR', 'MultiStepLR', 'StepLR',
'LambdaLR', 'ReduceLROnPlateau', 'CosineAnnealingLR'
] ]
from ..fluid.optimizer import Momentum, Adagrad, Dpsgd, DecayedAdagrad, Ftrl,\ from ..fluid.optimizer import Momentum, Adagrad, Dpsgd, DecayedAdagrad, Ftrl,\
AdagradOptimizer,DpsgdOptimizer,\ AdagradOptimizer, DpsgdOptimizer, DecayedAdagradOptimizer, \
DecayedAdagradOptimizer,FtrlOptimizer,AdadeltaOptimizer, \ FtrlOptimizer, AdadeltaOptimizer, ModelAverage, \
ModelAverage, LarsMomentum, DGCMomentumOptimizer, LambOptimizer,\ ExponentialMovingAverage, LookaheadOptimizer
ExponentialMovingAverage, PipelineOptimizer, LookaheadOptimizer, \
RecomputeOptimizer, LarsMomentumOptimizer
from .optimizer import Optimizer from .optimizer import Optimizer
from .adam import Adam from .adam import Adam
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册