未验证 提交 29c4fae1 编写于 作者: W wangchaochaohu 提交者: GitHub

Tensor value support (#23491)

* add support for value tensor support of fill_constant Op
上级 e8efaee9
...@@ -48,16 +48,6 @@ class FillConstantOp : public framework::OperatorWithKernel { ...@@ -48,16 +48,6 @@ class FillConstantOp : public framework::OperatorWithKernel {
framework::proto::VarType::Type(ctx.Attr<int>("dtype")), framework::proto::VarType::Type(ctx.Attr<int>("dtype")),
ctx.GetPlace()); ctx.GetPlace());
} }
framework::OpKernelType GetKernelTypeForVar(
const std::string& var_name, const Tensor& tensor,
const framework::OpKernelType& expected_kernel_type) const override {
if (var_name == "ShapeTensor" || var_name == "ShapeTensorList") {
return expected_kernel_type;
}
return framework::OpKernelType(expected_kernel_type.data_type_,
tensor.place(), tensor.layout());
}
}; };
class FillConstantOpVarTypeInference : public framework::VarTypeInference { class FillConstantOpVarTypeInference : public framework::VarTypeInference {
...@@ -80,6 +70,11 @@ class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -80,6 +70,11 @@ class FillConstantOpMaker : public framework::OpProtoAndCheckerMaker {
AddAttr<std::vector<int64_t>>("shape", AddAttr<std::vector<int64_t>>("shape",
"(vector<int64_t>) The shape of the output") "(vector<int64_t>) The shape of the output")
.SetDefault({}); .SetDefault({});
AddInput("ValueTensor",
"(Tensor, optional) If provided, fill_constant Op will use this "
"as value to set the output Tensor, this has a higher priority "
"than attr(str_value), the shape of this tensor MUST BE [1].")
.AsDispensable();
AddInput("ShapeTensor", AddInput("ShapeTensor",
"(Tensor<int>), optional). The shape of the output." "(Tensor<int>), optional). The shape of the output."
"It has a higher priority than Attr(shape).") "It has a higher priority than Attr(shape).")
......
...@@ -99,6 +99,22 @@ class FillConstantKernel : public framework::OpKernel<T> { ...@@ -99,6 +99,22 @@ class FillConstantKernel : public framework::OpKernel<T> {
value = static_cast<T>(tmp_value); value = static_cast<T>(tmp_value);
} }
} }
if (ctx.HasInput("ValueTensor")) {
auto *value_tensor = ctx.Input<framework::Tensor>("ValueTensor");
PADDLE_ENFORCE_EQ(
value_tensor->numel(), 1,
platform::errors::InvalidArgument(
"When use Tensor as value to set Tensor value in fill_cosntant, "
"value input(ValueTensor) size must be 1, but get %d",
value_tensor->numel()));
const T *tensor_data = value_tensor->data<T>();
framework::Tensor cpu_tensor;
if (platform::is_gpu_place(value_tensor->place())) {
TensorCopySync(*value_tensor, platform::CPUPlace(), &cpu_tensor);
tensor_data = cpu_tensor.data<T>();
}
value = tensor_data[0];
}
auto shape = GetShape(ctx); auto shape = GetShape(ctx);
if (out_var->IsType<framework::LoDTensor>()) { if (out_var->IsType<framework::LoDTensor>()) {
......
...@@ -42,19 +42,6 @@ void AdamOp::InferShape(framework::InferShapeContext *ctx) const { ...@@ -42,19 +42,6 @@ void AdamOp::InferShape(framework::InferShapeContext *ctx) const {
platform::errors::NotFound( platform::errors::NotFound(
"Input(Beta2Pow) of AdamOp should not be null.")); "Input(Beta2Pow) of AdamOp should not be null."));
if (ctx->IsRuntime() && ctx->HasInput("Beta1Tensor")) {
auto beta1 = ctx->Inputs("Beta1Tensor");
PADDLE_ENFORCE_EQ(
beta1.size(), 1,
platform::errors::InvalidArgument("Input(Beta1Tensor) size must be 1"));
}
if (ctx->IsRuntime() && ctx->HasInput("Beta2Tensor")) {
auto beta2 = ctx->Inputs("Beta2Tensor");
PADDLE_ENFORCE_EQ(
beta2.size(), 1,
platform::errors::InvalidArgument("Input(Beta2Tensor) size must be 1"));
}
PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true, PADDLE_ENFORCE_EQ(ctx->HasOutput("ParamOut"), true,
platform::errors::NotFound( platform::errors::NotFound(
"Output(ParamOut) of AdamOp should not be null.")); "Output(ParamOut) of AdamOp should not be null."));
......
...@@ -151,11 +151,19 @@ class AdamOpCUDAKernel : public framework::OpKernel<T> { ...@@ -151,11 +151,19 @@ class AdamOpCUDAKernel : public framework::OpKernel<T> {
T beta1 = static_cast<T>(ctx.Attr<float>("beta1")); T beta1 = static_cast<T>(ctx.Attr<float>("beta1"));
if (ctx.HasInput("Beta1Tensor")) { if (ctx.HasInput("Beta1Tensor")) {
auto* beta1_tensor = ctx.Input<framework::Tensor>("Beta1Tensor"); auto* beta1_tensor = ctx.Input<framework::Tensor>("Beta1Tensor");
PADDLE_ENFORCE_EQ(beta1_tensor->numel(), 1,
platform::errors::InvalidArgument(
"Input(Beta1Tensor) size must be 1, but get %d",
beta1_tensor->numel()));
beta1 = static_cast<T>(GetAttrFromTensor(beta1_tensor)); beta1 = static_cast<T>(GetAttrFromTensor(beta1_tensor));
} }
T beta2 = static_cast<T>(ctx.Attr<float>("beta2")); T beta2 = static_cast<T>(ctx.Attr<float>("beta2"));
if (ctx.HasInput("Beta2Tensor")) { if (ctx.HasInput("Beta2Tensor")) {
auto* beta2_tensor = ctx.Input<framework::Tensor>("Beta2Tensor"); auto* beta2_tensor = ctx.Input<framework::Tensor>("Beta2Tensor");
PADDLE_ENFORCE_EQ(beta2_tensor->numel(), 1,
platform::errors::InvalidArgument(
"Input(Beta2Tensor) size must be 1, but get %d",
beta2_tensor->numel()));
beta2 = static_cast<T>(GetAttrFromTensor(beta2_tensor)); beta2 = static_cast<T>(GetAttrFromTensor(beta2_tensor));
} }
VLOG(3) << "beta1_pow.numel() : " << beta1_pow->numel() VLOG(3) << "beta1_pow.numel() : " << beta1_pow->numel()
......
...@@ -406,11 +406,19 @@ class AdamOpKernel : public framework::OpKernel<T> { ...@@ -406,11 +406,19 @@ class AdamOpKernel : public framework::OpKernel<T> {
T beta1 = static_cast<T>(ctx.Attr<float>("beta1")); T beta1 = static_cast<T>(ctx.Attr<float>("beta1"));
if (ctx.HasInput("Beta1Tensor")) { if (ctx.HasInput("Beta1Tensor")) {
auto* beta1_tensor = ctx.Input<framework::Tensor>("Beta1Tensor"); auto* beta1_tensor = ctx.Input<framework::Tensor>("Beta1Tensor");
PADDLE_ENFORCE_EQ(beta1_tensor->numel(), 1,
platform::errors::InvalidArgument(
"Input(Beta1Tensor) size must be 1, but get %d",
beta1_tensor->numel()));
beta1 = static_cast<T>(GetAttrFromTensor(beta1_tensor)); beta1 = static_cast<T>(GetAttrFromTensor(beta1_tensor));
} }
T beta2 = static_cast<T>(ctx.Attr<float>("beta2")); T beta2 = static_cast<T>(ctx.Attr<float>("beta2"));
if (ctx.HasInput("Beta2Tensor")) { if (ctx.HasInput("Beta2Tensor")) {
auto* beta2_tensor = ctx.Input<framework::Tensor>("Beta2Tensor"); auto* beta2_tensor = ctx.Input<framework::Tensor>("Beta2Tensor");
PADDLE_ENFORCE_EQ(beta2_tensor->numel(), 1,
platform::errors::InvalidArgument(
"Input(Beta2Tensor) size must be 1, but get %d",
beta2_tensor->numel()));
beta2 = static_cast<T>(GetAttrFromTensor(beta2_tensor)); beta2 = static_cast<T>(GetAttrFromTensor(beta2_tensor));
} }
VLOG(3) << "beta1_pow.numel() : " << beta1_pow->numel() VLOG(3) << "beta1_pow.numel() : " << beta1_pow->numel()
......
...@@ -550,8 +550,9 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None): ...@@ -550,8 +550,9 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
If ``shape`` is an Variable, it should be an 1-D Tensor . If ``shape`` is an Variable, it should be an 1-D Tensor .
dtype(np.dtype|core.VarDesc.VarType|str): Data type of the output tensor which can dtype(np.dtype|core.VarDesc.VarType|str): Data type of the output tensor which can
be float16, float32, float64, int32, int64. be float16, float32, float64, int32, int64.
value(float): The constant value used to initialize the Tensor to be created. value(float16|float32|float64|int32|int64|Variable): The constant value used to initialize
force_cpu(True): data should be on CPU if it's true, default value is False. the Tensor to be created. If value is an Variable, it should be an 1-D Tensor.
force_cpu(bool): data should be on CPU if it's true, default value is False.
out(Variable, optional): Optional output which can be any created out(Variable, optional): Optional output which can be any created
Variable that meets the requirements to store the result of operation. Variable that meets the requirements to store the result of operation.
if out is None, a new Varibale will be create to store the result. if out is None, a new Varibale will be create to store the result.
...@@ -579,13 +580,21 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None): ...@@ -579,13 +580,21 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
# attr shape is an Variable Tensor. # attr shape is an Variable Tensor.
shape = fluid.layers.fill_constant([1,2], "int32", 2) # shape=[2,2] shape = fluid.layers.fill_constant([1,2], "int32", 2) # shape=[2,2]
data4 = fluid.layers.fill_constant(shape=shape, dtype='bool', value=True) # data4=[[True,True],[True,True]] data4 = fluid.layers.fill_constant(shape=shape, dtype='bool', value=True) # data4=[[True,True],[True,True]]
# attr value is an Variable Tensor.
val = fluid.layers.fill_constant([1], "float32", 2.0) # val=[2.0]
data5 = fluid.layers.fill_constant(shape=[2,1], value=val, dtype='float32') #data5=[[2.0],[2.0]]
""" """
attrs = {'value': float(value), 'force_cpu': force_cpu} inputs = {}
attrs = {'force_cpu': force_cpu}
if convert_dtype(dtype) in ['int64', 'int32']: if isinstance(value, Variable):
attrs['str_value'] = str(int(value)) inputs['ValueTensor'] = value
else: else:
attrs['str_value'] = str(float(value)) attrs['value'] = float(value)
if convert_dtype(dtype) in ['int64', 'int32']:
attrs['str_value'] = str(int(value))
else:
attrs['str_value'] = str(float(value))
if in_dygraph_mode(): if in_dygraph_mode():
if isinstance(shape, (list, tuple)): if isinstance(shape, (list, tuple)):
...@@ -596,6 +605,13 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None): ...@@ -596,6 +605,13 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
shape = list(shape.numpy().astype(int)) shape = list(shape.numpy().astype(int))
if out is None: if out is None:
out = _varbase_creator(dtype=dtype) out = _varbase_creator(dtype=dtype)
if isinstance(value, Variable):
if convert_dtype(dtype) in ['int64', 'int32']:
attrs['str_value'] = str(int(value.numpy()))
else:
attrs['str_value'] = str(float(value.numpy()))
core.ops.fill_constant(out, 'value', core.ops.fill_constant(out, 'value',
float(value), 'force_cpu', force_cpu, 'dtype', float(value), 'force_cpu', force_cpu, 'dtype',
out.dtype, 'str_value', attrs['str_value'], out.dtype, 'str_value', attrs['str_value'],
...@@ -608,55 +624,12 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None): ...@@ -608,55 +624,12 @@ def fill_constant(shape, dtype, value, force_cpu=False, out=None):
['bool', 'float16', 'float32', 'float64', 'int32', 'int64'], ['bool', 'float16', 'float32', 'float64', 'int32', 'int64'],
'fill_constant') 'fill_constant')
check_type(shape, 'shape', (Variable, list, tuple), 'fill_constant') check_type(shape, 'shape', (Variable, list, tuple), 'fill_constant')
inputs = {} inputs = utils._get_shape_tensor_inputs(
attrs = {'value': float(value), 'force_cpu': force_cpu} inputs=inputs,
helper=helper,
if convert_dtype(dtype) in ['int64', 'int32']: attrs=attrs,
attrs['str_value'] = str(int(value)) shape=shape,
else: op_type='fill_constant')
attrs['str_value'] = str(float(value))
def _get_attr_shape(list_shape):
attr_shape = []
for idx, dim in enumerate(list_shape):
if isinstance(dim, Variable):
attr_shape.append(-1)
else:
attr_shape.append(dim)
return attr_shape
def _get_shape_tensor(list_shape):
new_shape_tensor = []
for idx, dim in enumerate(list_shape):
if isinstance(dim, Variable):
dim.stop_gradient = True
check_dtype(
dim.dtype, 'shape[' + str(idx) + ']', ['int32', 'int64'],
'fill_constant',
'(When type of shape in fill_constant is list or tuple.)')
if convert_dtype(dim.dtype) == 'int64':
dim = cast(x=dim, dtype='int32')
new_shape_tensor.append(dim)
else:
temp_out = helper.create_variable_for_type_inference('int32')
fill_constant([1], 'int32', dim, force_cpu=True, out=temp_out)
new_shape_tensor.append(temp_out)
return new_shape_tensor
if isinstance(shape, Variable):
shape.stop_gradient = True
check_dtype(shape.dtype, 'shape', ['int32', 'int64'], 'fill_constant',
'(When type of shape in fill_constant is Variable.)')
if (convert_dtype(shape.dtype) == 'int64'):
shape = cast(shape, 'int32')
inputs["ShapeTensor"] = shape
elif isinstance(shape, (list, tuple)):
assert len(shape) > 0, (
"The size of 'shape' in fill_constant can't be zero, "
"but received %s." % len(shape))
attrs["shape"] = _get_attr_shape(shape)
if utils._contain_var(shape):
inputs['ShapeTensorList'] = _get_shape_tensor(shape)
if out is None: if out is None:
out = helper.create_variable_for_type_inference(dtype=dtype) out = helper.create_variable_for_type_inference(dtype=dtype)
......
...@@ -18,6 +18,8 @@ import copy ...@@ -18,6 +18,8 @@ import copy
import six import six
import numpy as np import numpy as np
from ..framework import Variable from ..framework import Variable
from ..data_feeder import convert_dtype, check_variable_and_dtype, check_type, check_dtype
from ..layer_helper import LayerHelper
def convert_to_list(value, n, name, dtype=np.int): def convert_to_list(value, n, name, dtype=np.int):
...@@ -274,3 +276,50 @@ def _contain_var(list_or_tuple): ...@@ -274,3 +276,50 @@ def _contain_var(list_or_tuple):
if isinstance(item, Variable): if isinstance(item, Variable):
return True return True
return False return False
def _get_shape_tensor_inputs(inputs, helper, attrs, shape, op_type):
from .tensor import fill_constant, cast
def _get_attr_shape(list_shape):
attr_shape = []
for idx, dim in enumerate(list_shape):
if isinstance(dim, Variable):
attr_shape.append(-1)
else:
attr_shape.append(dim)
return attr_shape
def _get_shape_tensor(list_shape):
new_shape_tensor = []
for idx, dim in enumerate(list_shape):
if isinstance(dim, Variable):
dim.stop_gradient = True
check_dtype(
dim.dtype, 'shape[' + str(idx) + ']', ['int32', 'int64'],
op_type,
'(When type of shape in' + op_type + 'is list or tuple.)')
if convert_dtype(dim.dtype) == 'int64':
dim = cast(x=dim, dtype='int32')
new_shape_tensor.append(dim)
else:
temp_out = fill_constant([1], 'int32', dim, force_cpu=True)
new_shape_tensor.append(temp_out)
return new_shape_tensor
if isinstance(shape, Variable):
shape.stop_gradient = True
check_dtype(shape.dtype, 'shape', ['int32', 'int64'], 'fill_constant',
'(When type of shape in' + op_type + ' is Variable.)')
if (convert_dtype(shape.dtype) == 'int64'):
shape = cast(shape, 'int32')
inputs["ShapeTensor"] = shape
elif isinstance(shape, (list, tuple)):
assert len(shape) > 0, (
"The size of 'shape' in" + op_type + " can't be zero, "
"but received %s." % len(shape))
attrs["shape"] = _get_attr_shape(shape)
if _contain_var(shape):
inputs['ShapeTensorList'] = _get_shape_tensor(shape)
return inputs
...@@ -212,6 +212,54 @@ class TestFillConstantOp1_ShapeTensor(OpTest): ...@@ -212,6 +212,54 @@ class TestFillConstantOp1_ShapeTensor(OpTest):
self.check_output() self.check_output()
# Situation 4: value is a tensor
class TestFillConstantOp1_ValueTensor(OpTest):
def setUp(self):
'''Test fill_constant op with specified value
'''
self.op_type = "fill_constant"
self.init_data()
self.inputs = {
"ShapeTensor": np.array(self.shape).astype("int32"),
'ValueTensor': np.array([self.value]).astype("float32")
}
self.attrs = {'value': self.value + 1.0}
self.outputs = {'Out': np.full(self.shape, self.value)}
def init_data(self):
self.shape = [123, 92]
self.value = 3.8
self.dtype = np.float32
def test_check_output(self):
self.check_output()
# Situation 5: value is a tensor
class TestFillConstantOp2_ValueTensor(OpTest):
def setUp(self):
'''Test fill_constant op with specified value
'''
self.op_type = "fill_constant"
self.init_data()
self.inputs = {
"ShapeTensor": np.array(self.shape).astype("int32"),
'ValueTensor': np.array([self.value]).astype("int32")
}
self.attrs = {'value': self.value, 'dtype': 2}
self.outputs = {'Out': np.full(self.shape, self.value)}
def init_data(self):
self.shape = [123, 92]
self.value = 3
self.dtype = np.int32
def test_check_output(self):
self.check_output()
# Test python API # Test python API
class TestFillConstantAPI(unittest.TestCase): class TestFillConstantAPI(unittest.TestCase):
def test_api(self): def test_api(self):
...@@ -242,14 +290,18 @@ class TestFillConstantAPI(unittest.TestCase): ...@@ -242,14 +290,18 @@ class TestFillConstantAPI(unittest.TestCase):
out_6 = fluid.layers.fill_constant( out_6 = fluid.layers.fill_constant(
shape=shape_tensor_int64, dtype=np.float32, value=1.1) shape=shape_tensor_int64, dtype=np.float32, value=1.1)
val = fluid.layers.fill_constant(shape=[1], dtype=np.float32, value=1.1)
out_7 = fluid.layers.fill_constant(
shape=shape_tensor_int64, dtype=np.float32, value=val)
exe = fluid.Executor(place=fluid.CPUPlace()) exe = fluid.Executor(place=fluid.CPUPlace())
res_1, res_2, res_3, res_4, res_5, res_6 = exe.run( res_1, res_2, res_3, res_4, res_5, res_6, res_7 = exe.run(
fluid.default_main_program(), fluid.default_main_program(),
feed={ feed={
"shape_tensor_int32": np.array([1, 2]).astype("int32"), "shape_tensor_int32": np.array([1, 2]).astype("int32"),
"shape_tensor_int64": np.array([1, 2]).astype("int64"), "shape_tensor_int64": np.array([1, 2]).astype("int64"),
}, },
fetch_list=[out_1, out_2, out_3, out_4, out_5, out_6]) fetch_list=[out_1, out_2, out_3, out_4, out_5, out_6, out_7])
assert np.array_equal(res_1, np.full([1, 2], 1.1, dtype="float32")) assert np.array_equal(res_1, np.full([1, 2], 1.1, dtype="float32"))
assert np.array_equal(res_2, np.full([1, 2], 1.1, dtype="float32")) assert np.array_equal(res_2, np.full([1, 2], 1.1, dtype="float32"))
...@@ -257,6 +309,7 @@ class TestFillConstantAPI(unittest.TestCase): ...@@ -257,6 +309,7 @@ class TestFillConstantAPI(unittest.TestCase):
assert np.array_equal(res_4, np.full([1, 2], 1.1, dtype="float32")) assert np.array_equal(res_4, np.full([1, 2], 1.1, dtype="float32"))
assert np.array_equal(res_5, np.full([1, 2], 1.1, dtype="float32")) assert np.array_equal(res_5, np.full([1, 2], 1.1, dtype="float32"))
assert np.array_equal(res_6, np.full([1, 2], 1.1, dtype="float32")) assert np.array_equal(res_6, np.full([1, 2], 1.1, dtype="float32"))
assert np.array_equal(res_7, np.full([1, 2], 1.1, dtype="float32"))
class TestFillConstantOpError(unittest.TestCase): class TestFillConstantOpError(unittest.TestCase):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册