From fc6ec3b9f6307497088a01b23e526ed833109fbe Mon Sep 17 00:00:00 2001 From: liym27 <33742067+liym27@users.noreply.github.com> Date: Sun, 13 Oct 2019 10:22:14 +0800 Subject: [PATCH] fill_constant support Tensor; (#20521) 2. fix bug in backward.py: using fill_constant instead of fill_constant_batch_size_like 3. fix bug in ExpandGradOp. test=develop --- paddle/fluid/operators/expand_op.cc | 12 +- paddle/fluid/operators/fill_constant_op.cc | 42 +++++- paddle/fluid/operators/fill_constant_op.h | 55 +++++++- python/paddle/fluid/backward.py | 12 +- python/paddle/fluid/layers/tensor.py | 61 +++++++-- .../fluid/tests/unittests/test_expand_op.py | 2 + .../tests/unittests/test_fill_constant_op.py | 128 +++++++++++++++++- 7 files changed, 283 insertions(+), 29 deletions(-) diff --git a/paddle/fluid/operators/expand_op.cc b/paddle/fluid/operators/expand_op.cc index 09c730db395..677130f2f92 100644 --- a/paddle/fluid/operators/expand_op.cc +++ b/paddle/fluid/operators/expand_op.cc @@ -31,7 +31,6 @@ class ExpandOp : public framework::OperatorWithKernel { PADDLE_ENFORCE_EQ(ctx->HasInput("X"), true, "Input(X) should not be null."); PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, "Output(Out) should not be null."); - auto x_dims = ctx->GetInputDim("X"); auto expand_times = ctx->Attrs().Get>("expand_times"); @@ -162,10 +161,13 @@ class ExpandGradOp : public framework::OperatorWithKernel { if (expand_times[i] == -1) { continue; } else { - PADDLE_ENFORCE_EQ(x_dims[i] * expand_times[i], out_dims[i], - "Each dimension size of Input(Out@GRAD) should be " - "equal to multiplication of crroresponding dimension " - "size of Input(X) and Attr(expand_times) value."); + if (ctx->IsRuntime()) { + PADDLE_ENFORCE_EQ( + x_dims[i] * expand_times[i], out_dims[i], + "Each dimension size of Input(Out@GRAD) should be " + "equal to multiplication of crroresponding dimension " + "size of Input(X) and Attr(expand_times) value."); + } } } auto x_grad_name = framework::GradVarName("X"); diff --git a/paddle/fluid/operators/fill_constant_op.cc b/paddle/fluid/operators/fill_constant_op.cc index b11a89c7385..0016aab143e 100644 --- a/paddle/fluid/operators/fill_constant_op.cc +++ b/paddle/fluid/operators/fill_constant_op.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/fluid/operators/fill_constant_op.h" - +#include namespace paddle { namespace operators { @@ -22,9 +22,22 @@ class FillConstantOp : public framework::OperatorWithKernel { using framework::OperatorWithKernel::OperatorWithKernel; void InferShape(framework::InferShapeContext* ctx) const override { - PADDLE_ENFORCE(ctx->HasOutput("Out"), - "Output(Out) of FillConstantOp should not be null."); + PADDLE_ENFORCE_EQ(ctx->HasOutput("Out"), true, + "Output(Out) of FillConstantOp should not be null."); + auto& shape = ctx->Attrs().Get>("shape"); + + if (shape.empty() && ctx->HasInput("ShapeTensor")) { + auto shape_dims = ctx->GetInputDim("ShapeTensor"); + int num_ele = 1; + for (int i = 0; i < shape_dims.size(); ++i) { + num_ele *= shape_dims[i]; + } + auto vec_dims = std::vector(num_ele, -1); + ctx->SetOutputDim("Out", framework::make_ddim(vec_dims)); + + return; + } ctx->SetOutputDim("Out", framework::make_ddim(shape)); } @@ -35,6 +48,16 @@ class FillConstantOp : public framework::OperatorWithKernel { framework::proto::VarType::Type(ctx.Attr("dtype")), ctx.GetPlace()); } + + framework::OpKernelType GetKernelTypeForVar( + const std::string& var_name, const Tensor& tensor, + const framework::OpKernelType& expected_kernel_type) const override { + if (var_name == "ShapeTensor" || var_name == "ShapeTensorList") { + return expected_kernel_type; + } + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); + } }; class FillConstantOpVarTypeInference : public framework::VarTypeInference { @@ -55,7 +78,18 @@ class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker { "Output data type") .SetDefault(framework::proto::VarType::FP32); AddAttr>("shape", - "(vector) The shape of the output"); + "(vector) The shape of the output") + .SetDefault({}); + AddInput("ShapeTensor", + "(Tensor), optional). The shape of the output." + "It has a higher priority than Attr(shape).") + .AsDispensable(); + AddInput("ShapeTensorList", + "(vector>, optional). The shape of the output. " + "It has a higher priority than Attr(shape)." + "The shape of the element in vector must be [1].") + .AsDuplicable() + .AsDispensable(); AddAttr("value", "(float, default 0) The value to be filled") .SetDefault(0.0f); AddAttr("force_cpu", diff --git a/paddle/fluid/operators/fill_constant_op.h b/paddle/fluid/operators/fill_constant_op.h index 417c5b4da61..1359d25df70 100644 --- a/paddle/fluid/operators/fill_constant_op.h +++ b/paddle/fluid/operators/fill_constant_op.h @@ -22,6 +22,53 @@ limitations under the License. */ namespace paddle { namespace operators { + +using Tensor = framework::Tensor; + +inline framework::DDim GetShape(const framework::ExecutionContext &ctx) { + // 1. shape is a Tensor + if (ctx.HasInput("ShapeTensor")) { + auto *shape_tensor = ctx.Input("ShapeTensor"); + auto *shape_data = shape_tensor->data(); + framework::Tensor cpu_shape_tensor; + if (platform::is_gpu_place(shape_tensor->place())) { + TensorCopySync(*shape_tensor, platform::CPUPlace(), &cpu_shape_tensor); + shape_data = cpu_shape_tensor.data(); + } + auto vec_shape = + std::vector(shape_data, shape_data + shape_tensor->numel()); + return framework::make_ddim(vec_shape); + } + + // 2. shape is a list/tuple containing Tensor + auto shape_tensor_list = ctx.MultiInput("ShapeTensorList"); + if (shape_tensor_list.size() > 0) { + std::vector vec_shape; + for (size_t i = 0; i < shape_tensor_list.size(); ++i) { + auto tensor = shape_tensor_list[i]; + PADDLE_ENFORCE_EQ( + tensor->dims(), framework::make_ddim({1}), + "ShapeError: If the element type of 'shape' in FillConstantOp is " + "Tensor, " + "the element's shape must be [1]. But received the element's shape " + "is [%s]", + tensor->dims()); + if (platform::is_gpu_place(tensor->place())) { + framework::Tensor temp; + TensorCopySync(*tensor, platform::CPUPlace(), &temp); + vec_shape.push_back(*temp.data()); + } else { + vec_shape.push_back(*tensor->data()); + } + } + return framework::make_ddim(vec_shape); + } + + // 3. shape is a list/tuple without containing Tensor + auto vec_shape = ctx.Attr>("shape"); + return framework::make_ddim(vec_shape); +} + template class FillConstantKernel : public framework::OpKernel { public: @@ -35,14 +82,14 @@ class FillConstantKernel : public framework::OpKernel { framework::Variable *out_var = ctx.OutputVar("Out"); + auto shape = GetShape(ctx); + if (out_var->IsType()) { tensor = out_var->GetMutable(); - tensor->Resize( - framework::make_ddim(ctx.Attr>("shape"))); + tensor->Resize(shape); } else if (out_var->IsType()) { tensor = out_var->GetMutable()->mutable_value(); - tensor->Resize( - framework::make_ddim(ctx.Attr>("shape"))); + tensor->Resize(shape); } else { PADDLE_THROW( "fill constant op's output only" diff --git a/python/paddle/fluid/backward.py b/python/paddle/fluid/backward.py index 1894ac41527..1306fbc574e 100644 --- a/python/paddle/fluid/backward.py +++ b/python/paddle/fluid/backward.py @@ -23,7 +23,7 @@ import logging from .. import compat as cpt from . import unique_name from . import log_helper - +import paddle.fluid __all__ = [ 'append_backward', 'gradients', @@ -1247,15 +1247,15 @@ def calc_gradient(targets, inputs, target_gradients=None, no_grad_set=None): target = targets[i] if grad is None: grad_name = _append_grad_suffix_(target.name) - op_desc = _create_op_desc_("fill_constant_batch_size_like", - {"Input": [target.name]}, + target_shape = paddle.fluid.layers.shape(target) + op_desc = _create_op_desc_("fill_constant", + {"ShapeTensor": [target_shape.name]}, {"Out": [grad_name]}, { - "shape": target.shape, + "shape": [], "value": 1.0, "dtype": target.dtype, - 'input_dim_idx': 0, - 'output_dim_idx': 0 }) + block.desc.append_op().copy_from(op_desc) input_grad_names_set.add(grad_name) else: diff --git a/python/paddle/fluid/layers/tensor.py b/python/paddle/fluid/layers/tensor.py index 2928cd69ba3..504e7a3e41d 100644 --- a/python/paddle/fluid/layers/tensor.py +++ b/python/paddle/fluid/layers/tensor.py @@ -532,7 +532,6 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None): data2 = fluid.layers.fill_constant(shape=[2,1], value=5, dtype='int64', out=data1) #data1=[[5], [5]] data2=[[5], [5]] """ - helper = LayerHelper("fill_constant", **locals()) if convert_dtype(dtype) not in [ 'bool', 'float16', 'float32', 'float64', 'int32', 'int64' @@ -541,6 +540,56 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None): "The create data type in fill_constant must be one of 'bool', float16, float32," "float64, int32 or int64, but received %s." % convert_dtype( (dtype))) + + if not isinstance(shape, (list, tuple, Variable)): + raise TypeError( + "The type of 'shape' in fill_constant must be Variable, list or tuple, but " + "received %s." % (type(shape))) + + inputs = {} + attrs = { + 'value': float(value), + 'force_cpu': force_cpu or force_init_on_cpu() + } + + def _contain_var(one_list): + for ele in one_list: + if isinstance(ele, Variable): + return True + return False + + def _get_attr_shape(list_shape): + attr_shape = [] + for idx, dim in enumerate(list_shape): + if isinstance(dim, Variable): + attr_shape.append(-1) + else: + attr_shape.append(dim) + return attr_shape + + def _get_shape_tensor(list_shape): + new_shape_tensor = [] + for dim in list_shape: + if isinstance(dim, Variable): + dim.stop_gradient = True + new_shape_tensor.append(dim) + else: + temp_out = helper.create_variable_for_type_inference('int32') + fill_constant([1], 'int32', dim, force_cpu=True, out=temp_out) + new_shape_tensor.append(temp_out) + return new_shape_tensor + + if isinstance(shape, Variable): + shape.stop_gradient = True + inputs["ShapeTensor"] = shape + elif isinstance(shape, (list, tuple)): + assert len(shape) > 0, ( + "The size of 'shape' in fill_constant can't be zero, " + "but received %s." % len(shape)) + attrs["shape"] = _get_attr_shape(shape) + if _contain_var(shape): + inputs['ShapeTensorList'] = _get_shape_tensor(shape) + if out is None: out = helper.create_variable_for_type_inference(dtype=dtype) else: @@ -549,16 +598,12 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None): "The create data type in op must be same with out type" "but received %s and out dtype %s." % (convert_dtype( (dtype), convert_dtype(out.dtype)))) + attrs['dtype'] = out.dtype helper.append_op( type='fill_constant', - inputs={}, + inputs=inputs, outputs={'Out': [out]}, - attrs={ - 'shape': shape, - 'dtype': out.dtype, - 'value': float(value), - 'force_cpu': force_cpu or force_init_on_cpu() - }, + attrs=attrs, stop_gradient=True) out.stop_gradient = True return out diff --git a/python/paddle/fluid/tests/unittests/test_expand_op.py b/python/paddle/fluid/tests/unittests/test_expand_op.py index b4efda63e10..910ab684f47 100644 --- a/python/paddle/fluid/tests/unittests/test_expand_op.py +++ b/python/paddle/fluid/tests/unittests/test_expand_op.py @@ -222,6 +222,8 @@ class TestExpandAPI(OpTest): out_2 = fluid.layers.expand(x, expand_times=[positive_2, 3]) out_3 = fluid.layers.expand(x, expand_times=expand_times) + g0 = fluid.backward.calc_gradient(out_2, x) + exe = fluid.Executor(place=fluid.CPUPlace()) res_1, res_2, res_3 = exe.run(fluid.default_main_program(), feed={ diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py index 9401007643e..3d2340fddce 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py @@ -24,6 +24,7 @@ import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard +# Situation 1: Attr(shape) is a list(without tensor) class TestFillConstantOp1(OpTest): def setUp(self): '''Test fill_constant op with specified value @@ -106,10 +107,121 @@ class TestFillConstantOpWithSelectedRows(OpTest): self.check_with_place(place) +# Situation 2: Attr(shape) is a list(with tensor) +class TestFillConstantOp1_ShapeTensorList(OpTest): + def setUp(self): + '''Test fill_constant op with specified value + ''' + self.op_type = "fill_constant" + self.init_data() + shape_tensor_list = [] + for index, ele in enumerate(self.shape): + shape_tensor_list.append(("x" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = {"ShapeTensorList": shape_tensor_list} + self.attrs = {'shape': self.infer_shape, 'value': self.value} + self.outputs = {'Out': np.full(self.shape, self.value)} + + def init_data(self): + self.shape = [123, 92] + self.infer_shape = [-1, 92] + self.value = 3.8 + + def test_check_output(self): + self.check_output() + + +class TestFillConstantOp2_ShapeTensorList(OpTest): + def setUp(self): + '''Test fill_constant op with default value + ''' + self.op_type = "fill_constant" + self.init_data() + shape_tensor_list = [] + for index, ele in enumerate(self.shape): + shape_tensor_list.append(("x" + str(index), np.ones( + (1)).astype('int32') * ele)) + + self.inputs = {"ShapeTensorList": shape_tensor_list} + self.attrs = {'shape': self.infer_shape} + self.outputs = {'Out': np.full(self.shape, 0.0)} + + def init_data(self): + self.shape = [123, 92] + self.infer_shape = [-1, -1] + + def test_check_output(self): + self.check_output() + + +class TestFillConstantOp3_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): + def init_data(self): + self.shape = [123, 92] + self.infer_shape = [123, -1] + self.value = 10000000000 + + +class TestFillConstantOp4_ShapeTensorList(TestFillConstantOp1_ShapeTensorList): + def init_data(self): + self.shape = [123, 92] + self.infer_shape = [123, -1] + self.value = 3 + + +# Situation 3: shape is a tensor +class TestFillConstantOp1_ShapeTensor(OpTest): + def setUp(self): + '''Test fill_constant op with specified value + ''' + self.op_type = "fill_constant" + self.init_data() + + self.inputs = {"ShapeTensor": np.array(self.shape).astype("int32")} + self.attrs = {'value': self.value} + self.outputs = {'Out': np.full(self.shape, self.value)} + + def init_data(self): + self.shape = [123, 92] + self.value = 3.8 + + def test_check_output(self): + self.check_output() + + +# # Test python API +class TestFillConstantAPI(OpTest): + def test_api(self): + positive_2 = fluid.layers.fill_constant([1], "int32", 2) + shape_tensor = fluid.layers.data( + name="shape_tensor", + shape=[2], + append_batch_size=False, + dtype="int32") + + out_1 = fluid.layers.fill_constant( + shape=[1, 2], dtype="float32", value=1.1) + out_2 = fluid.layers.fill_constant( + shape=[1, positive_2], dtype="float32", value=1.1) + + out_3 = fluid.layers.fill_constant( + shape=shape_tensor, dtype="float32", value=1.1) + + exe = fluid.Executor(place=fluid.CPUPlace()) + res_1, res_2, res_3 = exe.run( + fluid.default_main_program(), + feed={"shape_tensor": np.array([1, 2]).astype("int32")}, + fetch_list=[out_1, out_2, out_3]) + + assert np.array_equal(res_1, np.full([1, 2], 1.1, dtype="float32")) + assert np.array_equal(res_2, np.full([1, 2], 1.1, dtype="float32")) + assert np.array_equal(res_3, np.full([1, 2], 1.1, dtype="float32")) + + class TestFillConstantOpError(OpTest): def test_errors(self): with program_guard(Program(), Program()): - #for ci coverage + #for ci coverage x1 = fluid.layers.data(name='x1', shape=[1], dtype="int16") self.assertRaises( ValueError, @@ -124,9 +236,10 @@ class TestFillConstantOpError(OpTest): value=5, dtype='int16', out=x1) - # The input dtype of fill_constant must be one of bool, float16, + # The input dtype of fill_constant must be one of bool, float16, #float32, float64, int32 or int64 x2 = fluid.layers.data(name='x2', shape=[1], dtype="int32") + self.assertRaises( TypeError, fluid.layers.fill_constant, @@ -141,6 +254,17 @@ class TestFillConstantOpError(OpTest): dtype='float64', out=x2) + # test Error of Shape + def test_shape_type(): + fluid.layers.fill_constant(shape=1, dtype="float32", value=1) + + self.assertRaises(TypeError, test_shape_type) + + def test_shape_size(): + fluid.layers.fill_constant(shape=[], dtype="float32", value=1) + + self.assertRaises(AssertionError, test_shape_size) + if __name__ == "__main__": unittest.main() -- GitLab