未验证 提交 b6996598 编写于 作者: zhouweiwei2014's avatar zhouweiwei2014 提交者: GitHub

[cherry-pick2.5] [Zero-Dim] Support...

[cherry-pick2.5] [Zero-Dim] Support all/any/min/max/prod/logsumexp/amax/amin/some loss output 0D (#53192)
上级 f84ac449
...@@ -86,10 +86,10 @@ template <typename T> ...@@ -86,10 +86,10 @@ template <typename T>
nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape, nvinfer1::Dims Vec2TRT_Dims(const std::vector<T>& shape,
std::string input, std::string input,
bool with_dynamic_shape = false) { bool with_dynamic_shape = false) {
PADDLE_ENFORCE_GT(shape.size(), PADDLE_ENFORCE_GE(shape.size(),
0UL, 0UL,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"TensorRT's tensor input requires at least 1 " "TensorRT's tensor input requires at least 0 "
"dimensions, but input %s has %d dims.", "dimensions, but input %s has %d dims.",
input, input,
shape.size())); shape.size()));
......
...@@ -58,8 +58,8 @@ class AssertOp : public framework::OperatorBase { ...@@ -58,8 +58,8 @@ class AssertOp : public framework::OperatorBase {
"Input(Condition) of AssertOp is not found.")); "Input(Condition) of AssertOp is not found."));
const phi::DenseTensor &cond = cond_var_ptr->Get<phi::DenseTensor>(); const phi::DenseTensor &cond = cond_var_ptr->Get<phi::DenseTensor>();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
cond.dims(), cond.numel(),
phi::make_ddim({1}), 1,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"The numel of Input(Condition) of AssertOp must be 1. But now " "The numel of Input(Condition) of AssertOp must be 1. But now "
"the Condition's shape is %s.", "the Condition's shape is %s.",
......
...@@ -98,10 +98,9 @@ class __reduce_meanMaker__ : public ops::ReduceBaseOpMaker { ...@@ -98,10 +98,9 @@ class __reduce_meanMaker__ : public ops::ReduceBaseOpMaker {
virtual std::string GetOpType() const { return "Reduce reduce_mean"; } virtual std::string GetOpType() const { return "Reduce reduce_mean"; }
}; };
DECLARE_INFER_SHAPE_FUNCTOR( DECLARE_INFER_SHAPE_FUNCTOR(reduce_mean,
reduce_mean, ReduceMeanInferShapeFunctor,
ReduceMeanInferShapeFunctor, PD_INFER_META(phi::OriginReduceInferMetaBase));
PD_INFER_META(phi::ReduceIntArrayAxisInferMetaBase));
REGISTER_OPERATOR(reduce_mean, REGISTER_OPERATOR(reduce_mean,
ops::ReduceBaseOp, ops::ReduceBaseOp,
......
...@@ -1132,7 +1132,7 @@ void prod_grad(const Tensor& x, ...@@ -1132,7 +1132,7 @@ void prod_grad(const Tensor& x,
if (!keep_dim) { if (!keep_dim) {
auto axis_ = std::vector<int64_t>(); auto axis_ = std::vector<int64_t>();
if (reduce_all) { if (reduce_all) {
for (int64_t i = 1; i < x_dim_size; i++) { for (int64_t i = 0; i < x_dim_size; i++) {
axis_.push_back(i); axis_.push_back(i);
} }
} else { } else {
...@@ -1187,7 +1187,7 @@ void max_grad(const Tensor& x, ...@@ -1187,7 +1187,7 @@ void max_grad(const Tensor& x,
} else { } else {
auto axis_ = std::vector<int64_t>(); auto axis_ = std::vector<int64_t>();
if (reduce_all) { if (reduce_all) {
for (int64_t i = 1; i < x_dim_size; i++) { for (int64_t i = 0; i < x_dim_size; i++) {
axis_.push_back(i); axis_.push_back(i);
} }
} else { } else {
......
...@@ -820,7 +820,7 @@ ...@@ -820,7 +820,7 @@
args : (Tensor x, IntArray axis={}, bool keepdim=false) args : (Tensor x, IntArray axis={}, bool keepdim=false)
output : Tensor(out) output : Tensor(out)
infer_meta : infer_meta :
func : ReduceIntArrayAxisInferMeta func : OriginReduceInferMeta
kernel : kernel :
func : mean func : mean
backward : mean_grad backward : mean_grad
......
...@@ -2146,7 +2146,7 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x, ...@@ -2146,7 +2146,7 @@ void MaxPoolWithIndexInferMeta(const MetaTensor& x,
} }
void MeanAllInferMeta(const MetaTensor& x, MetaTensor* out) { void MeanAllInferMeta(const MetaTensor& x, MetaTensor* out) {
out->set_dims(phi::make_ddim({1})); out->set_dims(phi::make_ddim({}));
out->set_dtype(x.dtype()); out->set_dtype(x.dtype());
out->set_layout(x.layout()); out->set_layout(x.layout());
} }
...@@ -3050,29 +3050,19 @@ DDim ReduceInferDim(const MetaTensor& x, ...@@ -3050,29 +3050,19 @@ DDim ReduceInferDim(const MetaTensor& x,
reduce_all = reduce_all || full_dim; reduce_all = reduce_all || full_dim;
std::vector<int64_t> out_dim_vector; std::vector<int64_t> out_dim_vector;
if (keep_dim) { for (int64_t i = 0; i < x_rank; ++i) {
for (int64_t i = 0; i < x_rank; ++i) { if (reduce_all || dims_set.find(i) != dims_set.end()) {
if (reduce_all || dims_set.find(i) != dims_set.end()) { if (keep_dim) {
out_dim_vector.push_back(1); out_dim_vector.push_back(1);
} else { } else {
out_dim_vector.push_back(x.dims().at(i));
}
}
} else {
for (int64_t i = 0; i < x_rank; ++i) {
if (reduce_all || dims_set.find(i) != dims_set.end()) {
continue; continue;
} else {
out_dim_vector.push_back(x.dims().at(i));
} }
} } else {
out_dim_vector.push_back(x.dims().at(i));
if (x_rank > 0 && out_dim_vector.size() == 0) {
out_dim_vector.push_back(1);
} }
} }
DDim out_dim = phi::make_ddim(out_dim_vector);
DDim out_dim = phi::make_ddim(out_dim_vector);
return out_dim; return out_dim;
} }
...@@ -3086,14 +3076,14 @@ DDim ReduceInferDimForIntArrayAxis(const MetaTensor& x, ...@@ -3086,14 +3076,14 @@ DDim ReduceInferDimForIntArrayAxis(const MetaTensor& x,
if (keep_dim) { if (keep_dim) {
vec_dim = std::vector<int64_t>(x.dims().size(), 1); vec_dim = std::vector<int64_t>(x.dims().size(), 1);
} else { } else {
vec_dim = {1}; vec_dim = {};
} }
} else { } else {
if (keep_dim) { if (keep_dim) {
vec_dim = std::vector<int64_t>(x.dims().size(), -1); vec_dim = std::vector<int64_t>(x.dims().size(), -1);
} else { } else {
auto x_rank = static_cast<size_t>(x.dims().size()); auto x_rank = static_cast<size_t>(x.dims().size());
if (vec_axis.size() >= x_rank) { if (vec_axis.size() > x_rank) {
vec_dim = {-1}; vec_dim = {-1};
} else { } else {
vec_dim = std::vector<int64_t>(x.dims().size() - vec_axis.size(), -1); vec_dim = std::vector<int64_t>(x.dims().size() - vec_axis.size(), -1);
...@@ -3125,22 +3115,6 @@ void ReduceInferMetaBase(const MetaTensor& x, ...@@ -3125,22 +3115,6 @@ void ReduceInferMetaBase(const MetaTensor& x,
out->set_layout(x.layout()); out->set_layout(x.layout());
} }
void ReduceIntArrayAxisInferMetaBase(const MetaTensor& x,
const IntArray& axis,
bool keep_dim,
bool reduce_all,
MetaTensor* out,
MetaConfig config) {
if (config.is_runtime || !axis.FromTensor()) {
ReduceInferMetaBase(x, axis.GetData(), keep_dim, reduce_all, out);
} else {
DDim out_dim = ReduceInferDimForIntArrayAxis(x, axis, keep_dim, reduce_all);
out->set_dims(out_dim);
out->set_dtype(x.dtype());
out->set_layout(x.layout());
}
}
void ReduceIntArrayAxisInferMeta(const MetaTensor& x, void ReduceIntArrayAxisInferMeta(const MetaTensor& x,
const IntArray& axis, const IntArray& axis,
bool keep_dim, bool keep_dim,
...@@ -3153,6 +3127,23 @@ void ReduceIntArrayAxisInferMeta(const MetaTensor& x, ...@@ -3153,6 +3127,23 @@ void ReduceIntArrayAxisInferMeta(const MetaTensor& x,
ReduceIntArrayAxisInferMetaBase(x, axis, keep_dim, reduce_all, out, config); ReduceIntArrayAxisInferMetaBase(x, axis, keep_dim, reduce_all, out, config);
} }
void ReduceIntArrayAxisInferMetaBase(const MetaTensor& x,
const IntArray& axis,
bool keep_dim,
bool reduce_all,
MetaTensor* out,
MetaConfig config) {
DDim out_dim;
if (config.is_runtime || !axis.FromTensor()) {
out_dim = ReduceInferDim(x, axis.GetData(), keep_dim, reduce_all);
} else {
out_dim = ReduceInferDimForIntArrayAxis(x, axis, keep_dim, reduce_all);
}
out->set_dims(out_dim);
out->set_dtype(x.dtype());
out->set_layout(x.layout());
}
void ReduceScatterInferMeta(const MetaTensor& x, int nranks, MetaTensor* out) { void ReduceScatterInferMeta(const MetaTensor& x, int nranks, MetaTensor* out) {
auto dim = x.dims(); auto dim = x.dims();
if (dim[0] > 0 || dim[0] < -1) { if (dim[0] > 0 || dim[0] < -1) {
...@@ -3951,6 +3942,105 @@ void StridedSliceInferMeta(const MetaTensor& x, ...@@ -3951,6 +3942,105 @@ void StridedSliceInferMeta(const MetaTensor& x,
x, axes, starts, ends, strides, infer_flags, decrease_axis, out, config); x, axes, starts, ends, strides, infer_flags, decrease_axis, out, config);
} }
// TODO(zhouwei): OriginReduceInferDim doesn't support 0D, remove in future
DDim OriginReduceInferDim(const MetaTensor& x,
const std::vector<int64_t>& axis,
bool keep_dim,
bool reduce_all) {
auto x_rank = x.dims().size();
std::vector<int64_t> formated_axis = axis;
for (size_t i = 0; i < axis.size(); ++i) {
if (x_rank == 0) {
PADDLE_ENFORCE_EQ(
axis[i] == 0 || axis[i] == -1,
true,
phi::errors::InvalidArgument(
"When input 0D Tensor, the axis can only be -1, 0, None or []"));
} else {
PADDLE_ENFORCE_LT(axis[i],
x_rank,
errors::InvalidArgument(
"The reduce dim index %d should be in the "
"range [ -dimension(X), dimension(X) ) "
"which dimesion = %d. But received dim index = %d.",
i,
x_rank,
axis[i]));
PADDLE_ENFORCE_GE(axis[i],
-x_rank,
errors::InvalidArgument(
"The reduce dim index %d should be in the "
"range [ -dimension(X), dimension(X) ) "
"which dimesion = %d. But received dim index = %d.",
i,
x_rank,
axis[i]));
}
if (axis[i] < 0) {
formated_axis[i] = axis[i] + x_rank;
}
}
bool full_dim = true;
std::set<int64_t> dims_set(formated_axis.begin(), formated_axis.end());
for (int64_t i = 0; i < x_rank; ++i) {
if (dims_set.find(i) == dims_set.end()) {
full_dim = false;
break;
}
}
reduce_all = reduce_all || full_dim;
std::vector<int64_t> out_dim_vector;
for (int64_t i = 0; i < x_rank; ++i) {
if (reduce_all || dims_set.find(i) != dims_set.end()) {
if (keep_dim) {
out_dim_vector.push_back(1);
} else {
continue;
}
} else {
out_dim_vector.push_back(x.dims().at(i));
}
}
if (x_rank > 0 && out_dim_vector.size() == 0) {
out_dim_vector.push_back(1);
}
DDim out_dim = phi::make_ddim(out_dim_vector);
return out_dim;
}
// TODO(zhouwei): OriginReduceInferDim doesn't support 0D, remove in future
DDim OriginReduceInferDimForIntArrayAxis(const MetaTensor& x,
const IntArray& axis,
bool keep_dim,
bool reduce_all) {
std::vector<int64_t> vec_axis = axis.GetData();
std::vector<int64_t> vec_dim;
if (reduce_all) {
if (keep_dim) {
vec_dim = std::vector<int64_t>(x.dims().size(), 1);
} else {
vec_dim = {1};
}
} else {
if (keep_dim) {
vec_dim = std::vector<int64_t>(x.dims().size(), -1);
} else {
auto x_rank = static_cast<size_t>(x.dims().size());
if (vec_axis.size() >= x_rank) {
vec_dim = {-1};
} else {
vec_dim = std::vector<int64_t>(x.dims().size() - vec_axis.size(), -1);
}
}
}
return phi::make_ddim(vec_dim);
}
/* Why not use SumRawInferMeta directly? /* Why not use SumRawInferMeta directly?
Because we need make InferMetaFunction's args follow the design of Because we need make InferMetaFunction's args follow the design of
ops.yaml ops.yaml
...@@ -3977,9 +4067,10 @@ void SumRawInferMeta(const MetaTensor& x, ...@@ -3977,9 +4067,10 @@ void SumRawInferMeta(const MetaTensor& x,
MetaConfig config) { MetaConfig config) {
DDim out_dim; DDim out_dim;
if (config.is_runtime || !axis.FromTensor()) { if (config.is_runtime || !axis.FromTensor()) {
out_dim = ReduceInferDim(x, axis.GetData(), keep_dim, reduce_all); out_dim = OriginReduceInferDim(x, axis.GetData(), keep_dim, reduce_all);
} else { } else {
out_dim = ReduceInferDimForIntArrayAxis(x, axis, keep_dim, reduce_all); out_dim =
OriginReduceInferDimForIntArrayAxis(x, axis, keep_dim, reduce_all);
} }
DataType out_dtype; DataType out_dtype;
...@@ -3998,6 +4089,38 @@ void SumRawInferMeta(const MetaTensor& x, ...@@ -3998,6 +4089,38 @@ void SumRawInferMeta(const MetaTensor& x,
out->set_layout(x.layout()); out->set_layout(x.layout());
} }
// TODO(zhouwei): OriginReduce doesn't support 0D, remove in future
void OriginReduceInferMeta(const MetaTensor& x,
const IntArray& axis,
bool keep_dim,
MetaTensor* out,
MetaConfig config) {
bool reduce_all = false;
if (axis.size() == 0) {
reduce_all = true;
}
OriginReduceInferMetaBase(x, axis, keep_dim, reduce_all, out, config);
}
// TODO(zhouwei): OriginReduce doesn't support 0D, remove in future
void OriginReduceInferMetaBase(const MetaTensor& x,
const IntArray& axis,
bool keep_dim,
bool reduce_all,
MetaTensor* out,
MetaConfig config) {
DDim out_dim;
if (config.is_runtime || !axis.FromTensor()) {
out_dim = OriginReduceInferDim(x, axis.GetData(), keep_dim, reduce_all);
} else {
out_dim =
OriginReduceInferDimForIntArrayAxis(x, axis, keep_dim, reduce_all);
}
out->set_dims(out_dim);
out->set_dtype(x.dtype());
out->set_layout(x.layout());
}
void SvdInferMeta(const MetaTensor& x, void SvdInferMeta(const MetaTensor& x,
bool full_matrices, bool full_matrices,
MetaTensor* u, MetaTensor* u,
......
...@@ -572,6 +572,19 @@ void SumRawInferMeta(const MetaTensor& x, ...@@ -572,6 +572,19 @@ void SumRawInferMeta(const MetaTensor& x,
MetaTensor* out, MetaTensor* out,
MetaConfig config = MetaConfig()); MetaConfig config = MetaConfig());
void OriginReduceInferMeta(const MetaTensor& x,
const IntArray& axis,
bool keep_dim,
MetaTensor* out,
MetaConfig config = MetaConfig());
void OriginReduceInferMetaBase(const MetaTensor& x,
const IntArray& axis,
bool keep_dim,
bool reduce_all,
MetaTensor* out,
MetaConfig config = MetaConfig());
void SvdInferMeta(const MetaTensor& x, void SvdInferMeta(const MetaTensor& x,
bool full_matrices, bool full_matrices,
MetaTensor* u, MetaTensor* u,
......
...@@ -32,10 +32,12 @@ void MeanAllGradKernel(const Context& dev_ctx, ...@@ -32,10 +32,12 @@ void MeanAllGradKernel(const Context& dev_ctx,
out_grad.numel())); out_grad.numel()));
dev_ctx.template Alloc<T>(x_grad); dev_ctx.template Alloc<T>(x_grad);
T ig_size = static_cast<T>(x_grad->numel()); T x_numel = static_cast<T>(x_grad->numel());
Eigen::DSizes<int, 1> bcast(static_cast<int>(ig_size)); Eigen::DSizes<int, 1> bcast(static_cast<int>(x_numel));
EigenVector<T>::Flatten(*x_grad).device(*dev_ctx.eigen_device()) = auto eigen_x = EigenVector<T>::Flatten(*x_grad);
(EigenVector<T>::From(out_grad) / ig_size).broadcast(bcast); auto eigen_dout = EigenVector<T>::Flatten(out_grad);
eigen_x.device(*dev_ctx.eigen_device()) =
(eigen_dout / x_numel).broadcast(bcast);
} }
} // namespace phi } // namespace phi
......
...@@ -105,19 +105,19 @@ inline DDim GetOutputSqueezeShape(const std::vector<int> squeeze_dims, ...@@ -105,19 +105,19 @@ inline DDim GetOutputSqueezeShape(const std::vector<int> squeeze_dims,
inline DDim GetUnsqueezeShape(const std::vector<int64_t> unsqz_dims, inline DDim GetUnsqueezeShape(const std::vector<int64_t> unsqz_dims,
const DDim& in_dims) { const DDim& in_dims) {
int output_size = in_dims.size() + static_cast<int>(unsqz_dims.size()); int output_rank = in_dims.size() + static_cast<int>(unsqz_dims.size());
int cur_output_size = in_dims.size(); int cur_output_rank = in_dims.size();
std::vector<int64_t> output_shape(output_size, 0); std::vector<int64_t> output_shape(output_rank, 0);
// Validity Check: rank range. // Validity Check: rank range.
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
output_size, output_rank,
6, 6,
phi::errors::InvalidArgument("The output " phi::errors::InvalidArgument("The output "
"tensor's rank should be less than 6.")); "tensor's rank should be less than 6."));
for (int axis : unsqz_dims) { for (int axis : unsqz_dims) {
int cur = axis < 0 ? axis + cur_output_size + 1 : axis; int cur = axis < 0 ? axis + cur_output_rank + 1 : axis;
// Vaildity Check: the axis bound // Vaildity Check: the axis bound
PADDLE_ENFORCE_GE( PADDLE_ENFORCE_GE(
cur, cur,
...@@ -125,12 +125,12 @@ inline DDim GetUnsqueezeShape(const std::vector<int64_t> unsqz_dims, ...@@ -125,12 +125,12 @@ inline DDim GetUnsqueezeShape(const std::vector<int64_t> unsqz_dims,
phi::errors::InvalidArgument("The insert dimension value should " phi::errors::InvalidArgument("The insert dimension value should "
"not be less than 0")); "not be less than 0"));
PADDLE_ENFORCE_LE(cur, PADDLE_ENFORCE_LE(cur,
cur_output_size, cur_output_rank,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"The insert dimension value shoule not be larger " "The insert dimension value shoule not be larger "
"than the dimension size of input tensor")); "than the dimension size of input tensor"));
// Move old axis, and insert new axis // Move old axis, and insert new axis
for (int i = cur_output_size; i >= cur; --i) { for (int i = cur_output_rank; i >= cur; --i) {
if (output_shape[i] == 1) { if (output_shape[i] == 1) {
// Move axis // Move axis
output_shape[i + 1] = 1; output_shape[i + 1] = 1;
...@@ -139,11 +139,11 @@ inline DDim GetUnsqueezeShape(const std::vector<int64_t> unsqz_dims, ...@@ -139,11 +139,11 @@ inline DDim GetUnsqueezeShape(const std::vector<int64_t> unsqz_dims,
} }
output_shape[cur] = 1; output_shape[cur] = 1;
// Add the output size. // Add the output size.
cur_output_size++; cur_output_rank++;
} }
// Make output shape // Make output shape
for (int in_idx = 0, out_idx = 0; out_idx < output_size; ++out_idx) { for (int in_idx = 0, out_idx = 0; out_idx < output_rank; ++out_idx) {
if (output_shape[out_idx] == 0) { if (output_shape[out_idx] == 0) {
output_shape[out_idx] = in_dims[in_idx++]; output_shape[out_idx] = in_dims[in_idx++];
} }
......
...@@ -102,8 +102,10 @@ void ReduceKernel(const Context& dev_ctx, ...@@ -102,8 +102,10 @@ void ReduceKernel(const Context& dev_ctx,
reduction_p->execute(astream, reduction_args); reduction_p->execute(astream, reduction_args);
astream.wait(); astream.wait();
out->set_mem_desc( const auto reshape_dims = out->dims().size() != 0
dst_memory_p->get_desc().reshape(vectorize<int64_t>(out->dims()))); ? vectorize<int64_t>(out->dims())
: std::vector<int64_t>{1};
out->set_mem_desc(dst_memory_p->get_desc().reshape(reshape_dims));
} }
} }
......
...@@ -13,7 +13,8 @@ ...@@ -13,7 +13,8 @@
# limitations under the License # limitations under the License
from collections import OrderedDict from collections import OrderedDict
from functools import reduce
import numpy as np
import paddle import paddle
from paddle.utils.flops import flops from paddle.utils.flops import flops
...@@ -807,7 +808,7 @@ class CommOpCost(OpCost): ...@@ -807,7 +808,7 @@ class CommOpCost(OpCost):
factor = 8 factor = 8
else: else:
raise ValueError(f"Unsupported comm dtype {dtype}") raise ValueError(f"Unsupported comm dtype {dtype}")
comm_count = reduce(lambda x, y: x * y, shape) * factor comm_count = int(np.prod(shape)) * factor
self._comm_count = comm_count self._comm_count = comm_count
return self._comm_count return self._comm_count
......
...@@ -242,7 +242,7 @@ def unscale_method(self, optimizer): ...@@ -242,7 +242,7 @@ def unscale_method(self, optimizer):
paddle.distributed.all_reduce( paddle.distributed.all_reduce(
is_found_inf, op=paddle.distributed.ReduceOp.MAX, group=None is_found_inf, op=paddle.distributed.ReduceOp.MAX, group=None
) )
self._found_inf = is_found_inf.numpy()[0] self._found_inf = int(is_found_inf)
class MixPrecisionScaler: class MixPrecisionScaler:
......
...@@ -179,7 +179,7 @@ def monkey_patch_math_varbase(): ...@@ -179,7 +179,7 @@ def monkey_patch_math_varbase():
@property @property
def _size_(var): def _size_(var):
return np.prod(var.shape) return int(np.prod(var.shape))
@property @property
def _T_(var): def _T_(var):
......
...@@ -286,7 +286,7 @@ class TestDistRunnerBase: ...@@ -286,7 +286,7 @@ class TestDistRunnerBase:
fetch_list=[avg_cost.name], fetch_list=[avg_cost.name],
feed=feeder.feed(get_data()), feed=feeder.feed(get_data()),
) )
out_losses.append(loss[0]) out_losses.append(float(loss))
print_to_err(type(self).__name__, "run step %d finished" % i) print_to_err(type(self).__name__, "run step %d finished" % i)
print_to_err(type(self).__name__, "trainer run finished") print_to_err(type(self).__name__, "trainer run finished")
print_to_err(type(self).__name__, f"dist losses: {out_losses}") print_to_err(type(self).__name__, f"dist losses: {out_losses}")
...@@ -382,7 +382,7 @@ class TestDistRunnerBase: ...@@ -382,7 +382,7 @@ class TestDistRunnerBase:
fetch_list=[avg_cost.name], fetch_list=[avg_cost.name],
feed=feeder.feed(get_data()), feed=feeder.feed(get_data()),
) )
out_losses.append(loss[0]) out_losses.append(float(loss))
print_to_err(type(self).__name__, "run step %d finished" % i) print_to_err(type(self).__name__, "run step %d finished" % i)
print_to_err(type(self).__name__, "trainer run finished") print_to_err(type(self).__name__, "trainer run finished")
...@@ -619,7 +619,7 @@ class TestDistRunnerBase: ...@@ -619,7 +619,7 @@ class TestDistRunnerBase:
(loss,) = exe.run( (loss,) = exe.run(
binary, fetch_list=[avg_cost.name], feed=feeder.feed(get_data()) binary, fetch_list=[avg_cost.name], feed=feeder.feed(get_data())
) )
out_losses.append(loss[0]) out_losses.append(float(loss))
print_to_err(type(self).__name__, "run step %d finished" % i) print_to_err(type(self).__name__, "run step %d finished" % i)
if lr_scheduler is not None: if lr_scheduler is not None:
lr_scheduler.step() lr_scheduler.step()
......
...@@ -31,17 +31,17 @@ class TestFunctionalL1Loss(unittest.TestCase): ...@@ -31,17 +31,17 @@ class TestFunctionalL1Loss(unittest.TestCase):
dy_result = paddle.nn.functional.l1_loss(input, label) dy_result = paddle.nn.functional.l1_loss(input, label)
expected = np.mean(np.abs(self.input_np - self.label_np)) expected = np.mean(np.abs(self.input_np - self.label_np))
np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05) np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05)
self.assertTrue(dy_result.shape, [1]) self.assertEqual(dy_result.shape, [])
dy_result = paddle.nn.functional.l1_loss(input, label, reduction='sum') dy_result = paddle.nn.functional.l1_loss(input, label, reduction='sum')
expected = np.sum(np.abs(self.input_np - self.label_np)) expected = np.sum(np.abs(self.input_np - self.label_np))
np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05) np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05)
self.assertTrue(dy_result.shape, [1]) self.assertEqual(dy_result.shape, [1])
dy_result = paddle.nn.functional.l1_loss(input, label, reduction='none') dy_result = paddle.nn.functional.l1_loss(input, label, reduction='none')
expected = np.abs(self.input_np - self.label_np) expected = np.abs(self.input_np - self.label_np)
np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05) np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05)
self.assertTrue(dy_result.shape, [10, 10, 5]) self.assertEqual(dy_result.shape, [10, 10, 5])
def run_static(self, use_gpu=False): def run_static(self, use_gpu=False):
input = paddle.static.data( input = paddle.static.data(
...@@ -119,19 +119,19 @@ class TestClassL1Loss(unittest.TestCase): ...@@ -119,19 +119,19 @@ class TestClassL1Loss(unittest.TestCase):
dy_result = l1_loss(input, label) dy_result = l1_loss(input, label)
expected = np.mean(np.abs(self.input_np - self.label_np)) expected = np.mean(np.abs(self.input_np - self.label_np))
np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05) np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05)
self.assertTrue(dy_result.shape, [1]) self.assertEqual(dy_result.shape, [])
l1_loss = paddle.nn.loss.L1Loss(reduction='sum') l1_loss = paddle.nn.loss.L1Loss(reduction='sum')
dy_result = l1_loss(input, label) dy_result = l1_loss(input, label)
expected = np.sum(np.abs(self.input_np - self.label_np)) expected = np.sum(np.abs(self.input_np - self.label_np))
np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05) np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05)
self.assertTrue(dy_result.shape, [1]) self.assertEqual(dy_result.shape, [1])
l1_loss = paddle.nn.loss.L1Loss(reduction='none') l1_loss = paddle.nn.loss.L1Loss(reduction='none')
dy_result = l1_loss(input, label) dy_result = l1_loss(input, label)
expected = np.abs(self.input_np - self.label_np) expected = np.abs(self.input_np - self.label_np)
np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05) np.testing.assert_allclose(dy_result.numpy(), expected, rtol=1e-05)
self.assertTrue(dy_result.shape, [10, 10, 5]) self.assertEqual(dy_result.shape, [10, 10, 5])
def run_static(self, use_gpu=False): def run_static(self, use_gpu=False):
input = paddle.static.data( input = paddle.static.data(
......
...@@ -212,7 +212,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -212,7 +212,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
adam_test.set_dict(opt_state) adam_test.set_dict(opt_state)
self.assertEqual( self.assertEqual(
adam_test._learning_rate.best_loss, adam_test._learning_rate.best_loss,
adam3._learning_rate.best_loss.numpy()[0], adam3._learning_rate.best_loss,
"best_loss is different before and after set_dict", "best_loss is different before and after set_dict",
) )
self.assertEqual( self.assertEqual(
...@@ -275,7 +275,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -275,7 +275,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
t = lr() t = lr()
np.testing.assert_allclose( np.testing.assert_allclose(
t.numpy()[0].item(), right_result[i], rtol=1e-05 t.numpy().item(), right_result[i], rtol=1e-05
) )
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
...@@ -342,7 +342,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -342,7 +342,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
right_result = step_decay( right_result = step_decay(
epoch, learning_rate, step_size, decay_rate epoch, learning_rate, step_size, decay_rate
) )
fluid_result = scheduler().numpy()[0] fluid_result = scheduler().numpy().item()
scheduler.epoch() scheduler.epoch()
self.assertAlmostEqual( self.assertAlmostEqual(
right_result, right_result,
...@@ -371,7 +371,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase): ...@@ -371,7 +371,7 @@ class TestLearningRateDecayDygraph(unittest.TestCase):
for epoch in range(30): for epoch in range(30):
right_result = lambda_decay(epoch, learning_rate, lr_lambda) right_result = lambda_decay(epoch, learning_rate, lr_lambda)
fluid_result = scheduler().numpy()[0] fluid_result = scheduler().numpy().item()
scheduler.epoch() scheduler.epoch()
self.assertAlmostEqual( self.assertAlmostEqual(
right_result, right_result,
......
...@@ -208,7 +208,7 @@ class TestReduceOnPlateauDecay: ...@@ -208,7 +208,7 @@ class TestReduceOnPlateauDecay:
self.assertEqual( self.assertEqual(
scheduler.cooldown_counter, scheduler1.cooldown_counter scheduler.cooldown_counter, scheduler1.cooldown_counter
) )
self.assertEqual(scheduler.best.numpy()[0], scheduler1.best) self.assertEqual(scheduler.best, scheduler1.best)
self.assertEqual(scheduler.num_bad_epochs, scheduler1.num_bad_epochs) self.assertEqual(scheduler.num_bad_epochs, scheduler1.num_bad_epochs)
self.assertEqual(scheduler.last_epoch, scheduler1.last_epoch) self.assertEqual(scheduler.last_epoch, scheduler1.last_epoch)
self.assertEqual(scheduler.last_lr, scheduler1.last_lr) self.assertEqual(scheduler.last_lr, scheduler1.last_lr)
......
...@@ -197,6 +197,7 @@ class TestReduceAPI(unittest.TestCase): ...@@ -197,6 +197,7 @@ class TestReduceAPI(unittest.TestCase):
out_empty_list = api(x, []) out_empty_list = api(x, [])
self.assertEqual(out_empty_list, out) self.assertEqual(out_empty_list, out)
self.assertEqual(out_empty_list.shape, [])
if x.grad is not None: if x.grad is not None:
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
...@@ -218,6 +219,44 @@ class TestReduceAPI(unittest.TestCase): ...@@ -218,6 +219,44 @@ class TestReduceAPI(unittest.TestCase):
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
np.testing.assert_allclose(x.grad.numpy(), np.array(3.0)) np.testing.assert_allclose(x.grad.numpy(), np.array(3.0))
if api in [
paddle.sum,
paddle.mean,
paddle.nanmean,
paddle.nansum,
]:
return
# 2) x is ND, reduce to 0D
if api in [paddle.all, paddle.any]:
x = paddle.randint(0, 2, [3, 5]).astype('bool')
else:
x = paddle.rand([3, 5])
x.stop_gradient = False
out = api(x, None)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
if x.grad is not None:
self.assertEqual(out.grad.shape, [])
self.assertEqual(x.grad.shape, [3, 5])
# 3) x is 1D, axis=0, reduce to 0D
if api in [paddle.all, paddle.any]:
x = paddle.randint(0, 2, [5]).astype('bool')
else:
x = paddle.rand([5])
x.stop_gradient = False
out = api(x, 0)
out.retain_grads()
out.backward()
self.assertEqual(out.shape, [])
if x.grad is not None:
self.assertEqual(out.grad.shape, [])
self.assertEqual(x.grad.shape, [5])
paddle.enable_static() paddle.enable_static()
def test_static_reduce(self): def test_static_reduce(self):
...@@ -262,6 +301,53 @@ class TestReduceAPI(unittest.TestCase): ...@@ -262,6 +301,53 @@ class TestReduceAPI(unittest.TestCase):
np.testing.assert_allclose(res[2], np.array(1.0)) np.testing.assert_allclose(res[2], np.array(1.0))
np.testing.assert_allclose(res[3], np.array(1.0)) np.testing.assert_allclose(res[3], np.array(1.0))
if api in [
paddle.sum,
paddle.mean,
paddle.nanmean,
paddle.nansum,
]:
return
# 2) x is ND, reduce to 0D
if api in [paddle.all, paddle.any]:
x = paddle.randint(0, 2, [3, 5]).astype('bool')
else:
x = paddle.rand([3, 5])
x = paddle.rand([3, 5])
x.stop_gradient = False
out = api(x, None)
paddle.static.append_backward(out)
fetch_list = [out]
if block.has_var(x.grad_name):
fetch_list.extend([out.grad_name, x.grad_name])
res = exe.run(main_prog, fetch_list=fetch_list)
self.assertEqual(res[0].shape, ())
if len(res) > 1:
self.assertEqual(res[1].shape, ())
self.assertEqual(res[2].shape, (3, 5))
# 3) x is 1D, axis=0, reduce to 0D
if api in [paddle.all, paddle.any]:
x = paddle.randint(0, 2, [5]).astype('bool')
else:
x = paddle.rand([5])
x.stop_gradient = False
out = api(x, 0)
paddle.static.append_backward(out)
fetch_list = [out]
if block.has_var(x.grad_name):
fetch_list.extend([out.grad_name, x.grad_name])
res = exe.run(main_prog, fetch_list=fetch_list)
self.assertEqual(res[0].shape, ())
if len(res) > 1:
self.assertEqual(res[1].shape, ())
self.assertEqual(res[2].shape, (5,))
paddle.disable_static() paddle.disable_static()
...@@ -1321,8 +1407,8 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1321,8 +1407,8 @@ class TestSundryAPI(unittest.TestCase):
def test_shape(self): def test_shape(self):
out = paddle.shape(self.x) out = paddle.shape(self.x)
self.assertEqual(out.shape, [0])
np.testing.assert_array_equal(out.numpy(), np.array([])) np.testing.assert_array_equal(out.numpy(), np.array([]))
self.assertEqual(out.shape, [0])
def test_equal_scalar(self): def test_equal_scalar(self):
x = paddle.rand([]) x = paddle.rand([])
...@@ -1382,6 +1468,16 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1382,6 +1468,16 @@ class TestSundryAPI(unittest.TestCase):
self.assertEqual(out.grad.shape, []) self.assertEqual(out.grad.shape, [])
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
x1 = paddle.uniform([], None, -10, 10)
x1.stop_gradient = False
out1 = paddle.clip(x1, paddle.full([], 5.0), paddle.full([], 5.0))
out1.retain_grads()
out1.backward()
self.assertEqual(out1.shape, [])
self.assertEqual(out1.grad.shape, [])
self.assertEqual(x1.grad.shape, [])
def test_increment(self): def test_increment(self):
x = paddle.rand([]) x = paddle.rand([])
x.stop_gradient = False x.stop_gradient = False
...@@ -1614,6 +1710,11 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1614,6 +1710,11 @@ class TestSundryAPI(unittest.TestCase):
self.assertEqual(out.grad.shape, []) self.assertEqual(out.grad.shape, [])
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
def test_scale_(self):
x = paddle.rand([])
out = x.scale_(scale=2.0, bias=1.0)
self.assertEqual(out.shape, [])
def test_floor_divide(self): def test_floor_divide(self):
# 1-d // 0-d # 1-d // 0-d
x = paddle.to_tensor([1, -2, 3], dtype="int64") x = paddle.to_tensor([1, -2, 3], dtype="int64")
...@@ -1946,32 +2047,6 @@ class TestSundryAPI(unittest.TestCase): ...@@ -1946,32 +2047,6 @@ class TestSundryAPI(unittest.TestCase):
# check grad shape with 1D repeats # check grad shape with 1D repeats
self.assertEqual(x.grad.shape, []) self.assertEqual(x.grad.shape, [])
def test_sigmoid_focal_loss(self):
logit = paddle.to_tensor(
[[0.97, 0.91, 0.03], [0.55, 0.43, 0.71]],
dtype='float32',
stop_gradient=False,
)
logit.retain_grads()
label = paddle.to_tensor(
[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], dtype='float32'
)
fg_num_0 = paddle.full([], 2.0)
fg_num_1 = paddle.full([1], 2.0)
out0 = F.sigmoid_focal_loss(logit, label, normalizer=fg_num_0)
out1 = F.sigmoid_focal_loss(logit, label, normalizer=fg_num_1)
out0.retain_grads()
np.testing.assert_array_equal(
out0.numpy(),
out1.numpy(),
)
out0.backward()
self.assertEqual(out0.grad.shape, [1])
self.assertEqual(logit.grad.shape, [2, 3])
def test_allclose(self): def test_allclose(self):
# 1) x is 0D # 1) x is 0D
x = paddle.full([], 0.5) x = paddle.full([], 0.5)
...@@ -2454,6 +2529,7 @@ class TestSundryAPIStatic(unittest.TestCase): ...@@ -2454,6 +2529,7 @@ class TestSundryAPIStatic(unittest.TestCase):
self.assertEqual(res[3].shape, ()) self.assertEqual(res[3].shape, ())
self.assertEqual(res[3], 1.0) self.assertEqual(res[3], 1.0)
@prog_scope()
def test_argmin(self): def test_argmin(self):
# 1) x is 0D # 1) x is 0D
x = paddle.rand([]) x = paddle.rand([])
...@@ -2998,14 +3074,33 @@ class TestSundryAPIStatic(unittest.TestCase): ...@@ -2998,14 +3074,33 @@ class TestSundryAPIStatic(unittest.TestCase):
out = paddle.clip(x, -5, 5) out = paddle.clip(x, -5, 5)
paddle.static.append_backward(out) paddle.static.append_backward(out)
x1 = paddle.uniform([], None, -10, 10)
x1.stop_gradient = False
out1 = paddle.clip(x1, paddle.full([], 5.0), paddle.full([], 5.0))
paddle.static.append_backward(out1)
prog = paddle.static.default_main_program() prog = paddle.static.default_main_program()
res = self.exe.run( res = self.exe.run(
prog, fetch_list=[x, out, x.grad_name, out.grad_name] prog,
fetch_list=[
x,
out,
x.grad_name,
out.grad_name,
x1,
out1,
x1.grad_name,
out1.grad_name,
],
) )
self.assertEqual(res[0].shape, ()) self.assertEqual(res[0].shape, ())
self.assertEqual(res[1].shape, ()) self.assertEqual(res[1].shape, ())
self.assertEqual(res[2].shape, ()) self.assertEqual(res[2].shape, ())
self.assertEqual(res[3].shape, ()) self.assertEqual(res[3].shape, ())
self.assertEqual(res[4].shape, ())
self.assertEqual(res[5].shape, ())
self.assertEqual(res[6].shape, ())
self.assertEqual(res[7].shape, ())
@prog_scope() @prog_scope()
def test_increment(self): def test_increment(self):
...@@ -3340,6 +3435,7 @@ class TestSundryAPIStatic(unittest.TestCase): ...@@ -3340,6 +3435,7 @@ class TestSundryAPIStatic(unittest.TestCase):
self.assertEqual(out2.shape, ()) self.assertEqual(out2.shape, ())
self.assertEqual(out3.shape, ()) self.assertEqual(out3.shape, ())
@prog_scope()
def test_add_n(self): def test_add_n(self):
x1 = paddle.rand([]) x1 = paddle.rand([])
x1.stop_gradient = False x1.stop_gradient = False
...@@ -3962,15 +4058,14 @@ class TestSundryAPIStatic(unittest.TestCase): ...@@ -3962,15 +4058,14 @@ class TestSundryAPIStatic(unittest.TestCase):
np.testing.assert_array_equal(res[0], np.array(2)) np.testing.assert_array_equal(res[0], np.array(2))
@prog_scope() @prog_scope()
def _test_shape(self): def test_shape(self):
x = paddle.full([], 0.5) x = paddle.full([], 0.5)
out = paddle.shape(x) out = paddle.shape(x)
prog = paddle.static.default_main_program() prog = paddle.static.default_main_program()
res = self.exe.run(prog, fetch_list=[out]) res = self.exe.run(prog, fetch_list=[out])
# 0-Size should be [ np.array([]) ], its [None] now
self.assertEqual(res[0].shape, (0))
np.testing.assert_array_equal(res[0], np.array([])) np.testing.assert_array_equal(res[0], np.array([]))
self.assertEqual(res[0].shape, (0,))
def test_broadcast_tensors(self): def test_broadcast_tensors(self):
# 1) x is 0D, y is 0D # 1) x is 0D, y is 0D
...@@ -4725,5 +4820,75 @@ class TestDistribution(unittest.TestCase): ...@@ -4725,5 +4820,75 @@ class TestDistribution(unittest.TestCase):
# self.assertEqual(d.entropy().shape, []) # self.assertEqual(d.entropy().shape, [])
class TestLossAPI(unittest.TestCase):
def test_sigmoid_focal_loss(self):
logit = paddle.to_tensor(
[[0.97, 0.91, 0.03], [0.55, 0.43, 0.71]],
dtype='float32',
stop_gradient=False,
)
logit.retain_grads()
label = paddle.to_tensor(
[[1.0, 0.0, 0.0], [0.0, 1.0, 0.0]], dtype='float32'
)
fg_num_0 = paddle.full([], 2.0)
fg_num_1 = paddle.full([1], 2.0)
out0 = F.sigmoid_focal_loss(
logit, label, normalizer=fg_num_0, reduction='mean'
)
out1 = F.sigmoid_focal_loss(
logit, label, normalizer=fg_num_1, reduction='mean'
)
out0.retain_grads()
np.testing.assert_array_equal(
out0.numpy(),
out1.numpy(),
)
out0.backward()
self.assertEqual(out0.shape, [])
self.assertEqual(out1.shape, [])
self.assertEqual(out0.grad.shape, [])
self.assertEqual(logit.grad.shape, [2, 3])
class TestLossAPIStatic(unittest.TestCase):
def setUp(self):
paddle.enable_static()
self.exe = paddle.static.Executor()
@prog_scope()
def test_sigmoid_focal_loss(self):
logit = paddle.rand([2, 3])
logit.stop_gradient = False
label = paddle.randint(0, 1, [2, 3]).astype('float32')
label.stop_gradient = False
fg_num_0 = paddle.full([], 2.0)
fg_num_1 = paddle.full([1], 2.0)
out0 = F.sigmoid_focal_loss(
logit, label, normalizer=fg_num_0, reduction='mean'
)
out1 = F.sigmoid_focal_loss(
logit, label, normalizer=fg_num_1, reduction='mean'
)
paddle.static.append_backward(out0.sum())
prog = paddle.static.default_main_program()
res = self.exe.run(
prog, fetch_list=[out0, out1, out0.grad_name, logit.grad_name]
)
np.testing.assert_allclose(res[0], res[1])
# because static use paddle.mean
# self.assertEqual(res[0].shape, ())
# self.assertEqual(res[1].shape, ())
# self.assertEqual(res[2].shape, ())
self.assertEqual(res[3].shape, (2, 3))
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
...@@ -81,8 +81,13 @@ class ProgressBar: ...@@ -81,8 +81,13 @@ class ProgressBar:
for i, (k, val) in enumerate(values): for i, (k, val) in enumerate(values):
if k == "loss": if k == "loss":
val = val if isinstance(val, (list, np.ndarray)) else [val] if isinstance(val, list):
if isinstance(val[0], np.uint16): scalar_val = val[0]
elif isinstance(val, np.ndarray):
scalar_val = val.item()
else:
scalar_val = val
if isinstance(scalar_val, np.uint16):
values[i] = ("loss", list(convert_uint16_to_float(val))) values[i] = ("loss", list(convert_uint16_to_float(val)))
if current_num: if current_num:
......
...@@ -698,7 +698,7 @@ class ClipGradByGlobalNorm(ClipGradBase): ...@@ -698,7 +698,7 @@ class ClipGradByGlobalNorm(ClipGradBase):
global_norm_var = paddle.add_n(global_norm_var) global_norm_var = paddle.add_n(global_norm_var)
global_norm_var = paddle.sqrt(global_norm_var) global_norm_var = paddle.sqrt(global_norm_var)
max_global_norm = paddle.full( max_global_norm = paddle.full(
shape=[1], dtype=global_norm_var.dtype, fill_value=self.clip_norm shape=[], dtype=global_norm_var.dtype, fill_value=self.clip_norm
) )
need_clip = False need_clip = False
......
...@@ -178,7 +178,7 @@ class FakeQuantActLSQPlus(Layer): ...@@ -178,7 +178,7 @@ class FakeQuantActLSQPlus(Layer):
s_attr = ParamAttr( s_attr = ParamAttr(
name=self._scale_name, initializer=Constant(1.0), trainable=True name=self._scale_name, initializer=Constant(1.0), trainable=True
) )
self.s = self.create_parameter(shape=[1], attr=s_attr, dtype='float32') self.s = self.create_parameter(shape=[], attr=s_attr, dtype='float32')
self.s.stop_gradient = False self.s.stop_gradient = False
if not self.symmetric: if not self.symmetric:
...@@ -189,7 +189,7 @@ class FakeQuantActLSQPlus(Layer): ...@@ -189,7 +189,7 @@ class FakeQuantActLSQPlus(Layer):
name=self._beta_name, initializer=Constant(0.0), trainable=True name=self._beta_name, initializer=Constant(0.0), trainable=True
) )
self.beta = self.create_parameter( self.beta = self.create_parameter(
shape=[1], attr=beta_attr, dtype='float32' shape=[], attr=beta_attr, dtype='float32'
) )
self.beta.stop_gradient = False self.beta.stop_gradient = False
......
...@@ -256,7 +256,10 @@ def jac(grad_fn, f, inputs): ...@@ -256,7 +256,10 @@ def jac(grad_fn, f, inputs):
_vs = vs.copy() _vs = vs.copy()
_vs[i] = _v _vs[i] = _v
_, grads = grad_fn(f, inputs, _vs) _, grads = grad_fn(f, inputs, _vs)
d_outs = paddle.concat([d_out.flatten() for d_out in grads]) if isinstance(grads, typing.Sequence):
d_outs = paddle.concat([d_out.flatten() for d_out in grads])
else:
d_outs = grads.flatten()
JJ_cols.append(d_outs) JJ_cols.append(d_outs)
# JJ is the fully unrolled jacobian # JJ is the fully unrolled jacobian
JJ = paddle.stack(JJ_cols) JJ = paddle.stack(JJ_cols)
......
...@@ -26,10 +26,7 @@ from paddle.incubate.autograd.utils import as_tensors ...@@ -26,10 +26,7 @@ from paddle.incubate.autograd.utils import as_tensors
# Finite Difference Utils # Finite Difference Utils
########################################################## ##########################################################
def _product(t): def _product(t):
if isinstance(t, int): return int(np.product(t))
return t
else:
return np.product(t)
def _get_item(t, idx): def _get_item(t, idx):
......
...@@ -407,7 +407,7 @@ class BaseModel(paddle.nn.Layer): ...@@ -407,7 +407,7 @@ class BaseModel(paddle.nn.Layer):
parent_ids = [] parent_ids = []
for step_idx in range(paddle.to_tensor(self.beam_max_step_num)): for step_idx in range(paddle.to_tensor(self.beam_max_step_num)):
if paddle.sum(1 - beam_finished).numpy()[0] == 0: if paddle.sum(1 - beam_finished) == 0:
break break
step_input = self._merge_batch_beams(step_input) step_input = self._merge_batch_beams(step_input)
new_dec_hidden, new_dec_cell = [], [] new_dec_hidden, new_dec_cell = [], []
......
...@@ -158,7 +158,7 @@ class TestConvertShapeCompare(unittest.TestCase): ...@@ -158,7 +158,7 @@ class TestConvertShapeCompare(unittest.TestCase):
fetch_list=[eq_out, not_eq_out, long_eq_out], fetch_list=[eq_out, not_eq_out, long_eq_out],
) )
np.testing.assert_array_equal( np.testing.assert_array_equal(
np.array(x_y_eq_out), np.array([[True], [False], [False]]) np.array(x_y_eq_out), np.array([True, False, False])
) )
set_a_zero = np.ones([3, 2]).astype(np.float32) set_a_zero = np.ones([3, 2]).astype(np.float32)
...@@ -168,7 +168,7 @@ class TestConvertShapeCompare(unittest.TestCase): ...@@ -168,7 +168,7 @@ class TestConvertShapeCompare(unittest.TestCase):
fetch_list=[eq_out, not_eq_out, long_eq_out], fetch_list=[eq_out, not_eq_out, long_eq_out],
) )
np.testing.assert_array_equal( np.testing.assert_array_equal(
np.array(x_y_not_eq_out), np.array([[False], [True], [True]]) np.array(x_y_not_eq_out), np.array([False, True, True])
) )
paddle.disable_static() paddle.disable_static()
......
...@@ -28,7 +28,7 @@ from paddle.static import InputSpec ...@@ -28,7 +28,7 @@ from paddle.static import InputSpec
def for_in_range(x): def for_in_range(x):
z = paddle.tensor.fill_constant([1], 'int32', 0) z = paddle.tensor.fill_constant([1], 'int32', 0)
x = fluid.dygraph.to_variable(x) x = fluid.dygraph.to_variable(x)
for i in range(x.numpy()[0]): for i in range(x.numpy().item()):
z = z + i z = z + i
return z return z
......
...@@ -573,7 +573,7 @@ class TestLACModel(unittest.TestCase): ...@@ -573,7 +573,7 @@ class TestLACModel(unittest.TestCase):
words, targets, length = batch words, targets, length = batch
start_time = time.time() start_time = time.time()
avg_cost, crf_decode = model(words, targets, length) avg_cost, crf_decode = model(words, targets, length)
loss_data.append(avg_cost.numpy()[0]) loss_data.append(float(avg_cost))
# backward and optimization # backward and optimization
avg_cost.backward() avg_cost.backward()
......
...@@ -100,7 +100,7 @@ class TestPureFP16(TestMNIST): ...@@ -100,7 +100,7 @@ class TestPureFP16(TestMNIST):
scaled.backward() scaled.backward()
scaler.minimize(optimizer, scaled) scaler.minimize(optimizer, scaled)
loss_data.append(avg_loss.numpy()[0]) loss_data.append(float(avg_loss))
# save checkpoint # save checkpoint
mnist.clear_gradients() mnist.clear_gradients()
if batch_id % 2 == 0: if batch_id % 2 == 0:
......
...@@ -176,7 +176,7 @@ def train(args, place, to_static): ...@@ -176,7 +176,7 @@ def train(args, place, to_static):
state, reward, done, _ = env.step(action) state, reward, done, _ = env.step(action)
# log loss_probs # log loss_probs
loss_data.append(loss.numpy()[0]) loss_data.append(float(loss))
policy.rewards.append(reward) policy.rewards.append(reward)
ep_reward += reward ep_reward += reward
...@@ -191,7 +191,7 @@ def train(args, place, to_static): ...@@ -191,7 +191,7 @@ def train(args, place, to_static):
if i_episode % args.log_interval == 0: if i_episode % args.log_interval == 0:
print( print(
'Episode {}\tLast reward: {:.2f}\tAverage reward: {:.2f}\t loss_probs: {}'.format( 'Episode {}\tLast reward: {:.2f}\tAverage reward: {:.2f}\t loss_probs: {}'.format(
i_episode, ep_reward, running_reward, loss.numpy()[0] i_episode, ep_reward, running_reward, float(loss)
) )
) )
......
...@@ -86,7 +86,7 @@ def train(to_static, build_strategy=None): ...@@ -86,7 +86,7 @@ def train(to_static, build_strategy=None):
scaler.minimize(optimizer, scaled) scaler.minimize(optimizer, scaled)
resnet.clear_gradients() resnet.clear_gradients()
loss_data.append(avg_loss.numpy()[0]) loss_data.append(float(avg_loss))
total_loss += avg_loss total_loss += avg_loss
total_acc1 += acc_top1 total_acc1 += acc_top1
total_acc5 += acc_top5 total_acc5 += acc_top5
......
...@@ -342,7 +342,7 @@ def train(args, to_static): ...@@ -342,7 +342,7 @@ def train(args, to_static):
model.train() model.train()
avg_cost, prediction, acc = model(doc, label) avg_cost, prediction, acc = model(doc, label)
loss_data.append(avg_cost.numpy()[0]) loss_data.append(float(avg_cost))
avg_cost.backward() avg_cost.backward()
sgd_optimizer.minimize(avg_cost) sgd_optimizer.minimize(avg_cost)
...@@ -358,7 +358,7 @@ def train(args, to_static): ...@@ -358,7 +358,7 @@ def train(args, to_static):
"step: %d, ave loss: %f, speed: %f steps/s" "step: %d, ave loss: %f, speed: %f steps/s"
% ( % (
batch_id, batch_id,
avg_cost.numpy()[0], float(avg_cost),
args.log_step / used_time, args.log_step / used_time,
) )
) )
......
...@@ -261,7 +261,7 @@ def train_dygraph(args, batch_generator): ...@@ -261,7 +261,7 @@ def train_dygraph(args, batch_generator):
transformer.clear_gradients() transformer.clear_gradients()
if step_idx % args.print_step == 0: if step_idx % args.print_step == 0:
total_avg_cost = avg_cost.numpy() * trainer_count total_avg_cost = avg_cost.numpy() * trainer_count
avg_loss.append(total_avg_cost[0]) avg_loss.append(float(total_avg_cost))
if step_idx == 0: if step_idx == 0:
logging.info( logging.info(
"step_idx: %d, epoch: %d, batch: %d, avg loss: %f, " "step_idx: %d, epoch: %d, batch: %d, avg loss: %f, "
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册