未验证 提交 e4bf1a8e 编写于 作者: zhouweiwei2014's avatar zhouweiwei2014 提交者: GitHub

[Zero-Dim] Support p_norm/reduce_sum_p output 0D (#53421)

上级 e522ceb7
...@@ -79,9 +79,6 @@ class ReduceSumPrimOpShapeInference : public framework::InferShapeBase { ...@@ -79,9 +79,6 @@ class ReduceSumPrimOpShapeInference : public framework::InferShapeBase {
x_shape.erase(remove(x_shape.begin(), x_shape.end(), kDelFlag), x_shape.erase(remove(x_shape.begin(), x_shape.end(), kDelFlag),
x_shape.end()); x_shape.end());
} }
if (!keepdim && x_shape.size() == 0) {
x_shape.push_back(1);
}
PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_shape); PADDLE_GET(framework::VarDesc *, y_var_ptr)->SetShape(x_shape);
} }
......
...@@ -2750,31 +2750,33 @@ void PNormInferMeta(const MetaTensor& x, ...@@ -2750,31 +2750,33 @@ void PNormInferMeta(const MetaTensor& x,
x_rank, x_rank,
x_dim)); x_dim));
std::vector<int> reduce_dims; std::vector<int> out_dim_vector;
if (asvector) { if (asvector) {
reduce_dims.emplace_back(1);
if (keepdim) { if (keepdim) {
for (int i = 1; i < x_dim.size(); ++i) { for (int i = 0; i < x_rank; ++i) {
reduce_dims.emplace_back(1); out_dim_vector.emplace_back(1);
} }
x_dim = phi::make_ddim(reduce_dims); } else {
out_dim_vector = {};
} }
} else { } else {
if (axis < 0) axis = x_dim.size() + axis; if (axis < 0) axis = axis + x_rank;
if (keepdim) {
for (int i = 0; i < x_dim.size(); ++i) { for (int i = 0; i < x_dim.size(); ++i) {
if (i != axis) reduce_dims.emplace_back(x_dim[i]); if (i != axis) {
} out_dim_vector.emplace_back(x_dim[i]);
if (reduce_dims.size() == 0) { } else {
reduce_dims.emplace_back(1); out_dim_vector.emplace_back(1);
} }
x_dim[axis] = 1;
} }
if (keepdim) {
out->set_dims(x_dim);
} else { } else {
out->set_dims(phi::make_ddim(reduce_dims)); for (int i = 0; i < x_dim.size(); ++i) {
if (i != axis) out_dim_vector.emplace_back(x_dim[i]);
}
} }
}
out->set_dims(phi::make_ddim(out_dim_vector));
out->set_dtype(x.dtype()); out->set_dtype(x.dtype());
} }
......
...@@ -58,7 +58,7 @@ class DistributedReduceSumPrimtiveImpl0(DistributedOperatorImpl): ...@@ -58,7 +58,7 @@ class DistributedReduceSumPrimtiveImpl0(DistributedOperatorImpl):
output_name = outputs[0] output_name = outputs[0]
output_var = dist_op.serial_op.block._var_recursive(output_name) output_var = dist_op.serial_op.block._var_recursive(output_name)
if output_var.shape != (1,): if output_var.shape != ():
return False return False
return True return True
......
...@@ -22,10 +22,6 @@ from paddle import fluid ...@@ -22,10 +22,6 @@ from paddle import fluid
def np_pairwise_distance(x, y, p=2.0, epsilon=1e-6, keepdim=False): def np_pairwise_distance(x, y, p=2.0, epsilon=1e-6, keepdim=False):
distance = np.linalg.norm(x - y + epsilon, ord=p, axis=-1, keepdims=keepdim) distance = np.linalg.norm(x - y + epsilon, ord=p, axis=-1, keepdims=keepdim)
# Paddle currently has not supported for 0-d Tensors, so even if keep_dim is False,
# and neither x nor y is batched, a Tensor of shape (1, ) is returned
if distance.ndim == 0:
distance = np.expand_dims(distance, axis=0)
return distance return distance
......
...@@ -121,9 +121,19 @@ class TestDistPNormDP(TestDistPNorm): ...@@ -121,9 +121,19 @@ class TestDistPNormDP(TestDistPNorm):
op_dist_attr = self.dist_context.get_op_dist_attr_for_program(op) op_dist_attr = self.dist_context.get_op_dist_attr_for_program(op)
if op.type == "p_norm": if op.type == "p_norm":
assert op_dist_attr.impl_type == "p_norm" assert op_dist_attr.impl_type == "p_norm"
if op.type in ["p_norm", "p_norm_grad"]:
for input_attr in op_dist_attr.inputs_dist_attrs.values(): for input_attr in op_dist_attr.inputs_dist_attrs.values():
assert set(input_attr.dims_mapping) == {-1} assert set(input_attr.dims_mapping) == {-1}
for output_attr in op_dist_attr.outputs_dist_attrs.values():
if len(output_attr.dims_mapping) == 0:
assert output_attr.dims_mapping == []
else:
assert set(output_attr.dims_mapping) == {-1}
if op.type == "p_norm_grad":
for input_attr in op_dist_attr.inputs_dist_attrs.values():
if len(input_attr.dims_mapping) == 0:
assert input_attr.dims_mapping == []
else:
assert set(input_attr.dims_mapping) == {-1}
for output_attr in op_dist_attr.outputs_dist_attrs.values(): for output_attr in op_dist_attr.outputs_dist_attrs.values():
assert set(output_attr.dims_mapping) == {-1} assert set(output_attr.dims_mapping) == {-1}
if op.type == 'c_allgather': if op.type == 'c_allgather':
......
...@@ -55,7 +55,7 @@ class TestPrimDistOp(unittest.TestCase): ...@@ -55,7 +55,7 @@ class TestPrimDistOp(unittest.TestCase):
self.tmp1 = paddle.static.data(name='tmp1', shape=[20], dtype='float') self.tmp1 = paddle.static.data(name='tmp1', shape=[20], dtype='float')
self.tmp2 = paddle.static.data(name='tmp2', shape=[20], dtype='float') self.tmp2 = paddle.static.data(name='tmp2', shape=[20], dtype='float')
self.batch_reduced = paddle.static.data( self.batch_reduced = paddle.static.data(
name='batch_reduced', shape=[1], dtype='float' name='batch_reduced', shape=[], dtype='float'
) )
self.attrs = {} self.attrs = {}
...@@ -108,7 +108,6 @@ class TestPrimDistOp(unittest.TestCase): ...@@ -108,7 +108,6 @@ class TestPrimDistOp(unittest.TestCase):
self.main_program, self.startup_program, [(self.w, self.w_grad)] self.main_program, self.startup_program, [(self.w, self.w_grad)]
) )
ops = dist_main_prog.global_block().ops ops = dist_main_prog.global_block().ops
self.assertTrue(ops[1].type == "c_allreduce_sum") self.assertTrue(ops[1].type == "c_allreduce_sum")
self.assertTrue(ops[3].type == "c_allreduce_sum") self.assertTrue(ops[3].type == "c_allreduce_sum")
......
...@@ -110,7 +110,7 @@ paddle.enable_static() ...@@ -110,7 +110,7 @@ paddle.enable_static()
primops.reduce_sum, primops.reduce_sum,
randn(2, 3), randn(2, 3),
{'axis': (0, 1)}, {'axis': (0, 1)},
(1,), (),
'float64', 'float64',
), ),
( (
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册