提交 01d9c465 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #4478 from reyoung/stable_elemwise_mul

Stable elemwise mul
...@@ -77,20 +77,18 @@ PYBIND11_PLUGIN(core) { ...@@ -77,20 +77,18 @@ PYBIND11_PLUGIN(core) {
}) })
.def("set", PyCPUTensorSetFromArray<float>) .def("set", PyCPUTensorSetFromArray<float>)
.def("set", PyCPUTensorSetFromArray<int>) .def("set", PyCPUTensorSetFromArray<int>)
.def("set", PyCPUTensorSetFromArray<double>)
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
.def("set", PyCUDATensorSetFromArray<float>) .def("set", PyCUDATensorSetFromArray<float>)
.def("set", PyCUDATensorSetFromArray<int>) .def("set", PyCUDATensorSetFromArray<int>)
.def("set", PyCUDATensorSetFromArray<double>)
#endif #endif
.def("shape", [](Tensor &self) { return vectorize(self.dims()); }) .def("shape", [](Tensor &self) { return vectorize(self.dims()); })
.def("set_float_element", .def("set_float_element", TensorSetElement<float>)
[](Tensor &self, size_t offset, float f) { .def("get_float_element", TensorGetElement<float>)
// TODO(yuyang18): Only support GPU now. .def("set_double_element", TensorSetElement<double>)
self.data<float>()[offset] = f; .def("get_double_element", TensorGetElement<double>)
}) .def("dtype", [](Tensor &self) { return ToDataType(self.type()); });
.def("get_float_element", [](Tensor &self, size_t offset) -> float {
// TODO(yuyang18): Only support GPU now.
return self.data<float>()[offset];
});
py::class_<LoDTensor, Tensor>(m, "LoDTensor") py::class_<LoDTensor, Tensor>(m, "LoDTensor")
.def_buffer( .def_buffer(
......
...@@ -73,10 +73,23 @@ struct CastToPyBufferImpl<true, I, ARGS...> { ...@@ -73,10 +73,23 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
}; };
} // namespace details } // namespace details
inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) { inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
auto buffer_info = details::CastToPyBufferImpl<true, 0, float, int>()(tensor); auto buffer_info =
details::CastToPyBufferImpl<true, 0, float, int, double>()(tensor);
return buffer_info; return buffer_info;
} }
template <typename T>
T TensorGetElement(framework::Tensor &self, size_t offset) {
PADDLE_ENFORCE(platform::is_cpu_place(self.place()));
return self.data<T>()[offset];
}
template <typename T>
void TensorSetElement(framework::Tensor &self, size_t offset, T elem) {
PADDLE_ENFORCE(platform::is_cpu_place(self.place()));
self.data<T>()[offset] = elem;
}
template <typename T> template <typename T>
void PyCPUTensorSetFromArray( void PyCPUTensorSetFromArray(
framework::Tensor &self, framework::Tensor &self,
......
...@@ -12,17 +12,19 @@ def grad_var_name(var_name): ...@@ -12,17 +12,19 @@ def grad_var_name(var_name):
def create_op(scope, op_type, inputs, outputs, attrs): def create_op(scope, op_type, inputs, outputs, attrs):
kwargs = dict() kwargs = dict()
def __create_var__(name, var_name):
scope.new_var(var_name)
kwargs[name].append(var_name)
for in_name, in_dup in Operator.get_op_inputs(op_type): for in_name, in_dup in Operator.get_op_inputs(op_type):
if in_name in inputs: if in_name in inputs:
kwargs[in_name] = [] kwargs[in_name] = []
if in_dup: if in_dup:
sub_in = inputs[in_name] sub_in = inputs[in_name]
for sub_in_name, _ in sub_in: for sub_in_name, _ in sub_in:
var = scope.new_var(sub_in_name) __create_var__(in_name, sub_in_name)
kwargs[in_name].append(sub_in_name)
else: else:
var = scope.new_var(in_name) __create_var__(in_name, in_name)
kwargs[in_name].append(in_name)
for out_name, out_dup in Operator.get_op_outputs(op_type): for out_name, out_dup in Operator.get_op_outputs(op_type):
if out_name in outputs: if out_name in outputs:
...@@ -30,11 +32,9 @@ def create_op(scope, op_type, inputs, outputs, attrs): ...@@ -30,11 +32,9 @@ def create_op(scope, op_type, inputs, outputs, attrs):
if out_dup: if out_dup:
sub_out = outputs[out_name] sub_out = outputs[out_name]
for sub_out_name, _ in sub_out: for sub_out_name, _ in sub_out:
var = scope.new_var(sub_out_name) __create_var__(out_name, sub_out_name)
kwargs[out_name].append(sub_out_name)
else: else:
var = scope.new_var(out_name) __create_var__(out_name, out_name)
kwargs[out_name].append(out_name)
for attr_name in Operator.get_op_attr_names(op_type): for attr_name in Operator.get_op_attr_names(op_type):
if attr_name in attrs: if attr_name in attrs:
...@@ -44,49 +44,46 @@ def create_op(scope, op_type, inputs, outputs, attrs): ...@@ -44,49 +44,46 @@ def create_op(scope, op_type, inputs, outputs, attrs):
def set_input(scope, op, inputs, place): def set_input(scope, op, inputs, place):
def __set_input__(var_name, var):
tensor = scope.find_var(var_name).get_tensor()
if isinstance(var, tuple):
tensor.set_lod(var[1])
var = var[0]
tensor.set_dims(var.shape)
tensor.set(var, place)
for in_name, in_dup in Operator.get_op_inputs(op.type()): for in_name, in_dup in Operator.get_op_inputs(op.type()):
if in_name in inputs: if in_name in inputs:
if in_dup: if in_dup:
sub_in = inputs[in_name] sub_in = inputs[in_name]
for sub_in_name, sub_in_val in sub_in: for sub_in_name, sub_in_val in sub_in:
var = scope.find_var(sub_in_name) __set_input__(sub_in_name, sub_in_val)
tensor = var.get_tensor()
sub_in_array = sub_in_val[0] \
if isinstance(sub_in_val, tuple) else sub_in_val
tensor.set_dims(sub_in_array.shape)
tensor.set(sub_in_array, place)
if isinstance(sub_in_val, tuple):
tensor.set_lod(sub_in_val[1])
else: else:
var = scope.find_var(in_name) __set_input__(in_name, inputs[in_name])
tensor = var.get_tensor()
in_val = inputs[in_name]
in_array = in_val[0] if isinstance(in_val, tuple) else in_val
tensor.set_dims(in_array.shape)
tensor.set(in_array, place)
if isinstance(in_val, tuple):
tensor.set_lod(in_val[1])
def set_output_grad(scope, op, outputs, place): def set_output_grad(scope, op, outputs, place):
def __set_tensor__(name):
out_tensor = scope.find_var(name).get_tensor()
grad_tensor = scope.new_var(grad_var_name(name)).get_tensor()
out_dtype = out_tensor.dtype()
if out_dtype == core.DataType.FP64:
data = np.ones(out_tensor.shape(), dtype=np.float64)
elif out_dtype == core.DataType.FP32:
data = np.ones(out_tensor.shape(), dtype=np.float32)
else:
raise ValueError("Not supported data type " + str(out_dtype))
grad_tensor.set(data, place)
for out_name, out_dup in Operator.get_op_outputs(op.type()): for out_name, out_dup in Operator.get_op_outputs(op.type()):
if out_name in outputs: if out_name in outputs:
if out_dup: if out_dup:
sub_out = outputs[out_name] sub_out = outputs[out_name]
for sub_out_name, _ in sub_out: for sub_out_name, _ in sub_out:
out_tensor = scope.find_var(sub_out_name).get_tensor() __set_tensor__(sub_out_name)
grad_tensor = scope.new_var(grad_var_name(
sub_out_name)).get_tensor()
grad_tensor.set_dims(out_tensor.shape())
data = np.ones(out_tensor.shape(), dtype=np.float32)
grad_tensor.set(data, place)
else: else:
out_tensor = scope.find_var(out_name).get_tensor() __set_tensor__(out_name)
grad_tensor = scope.new_var(grad_var_name(out_name)).get_tensor(
)
grad_tensor.set_dims(out_tensor.shape())
data = np.ones(out_tensor.shape(), dtype=np.float32)
grad_tensor.set(data, place)
def get_numeric_gradient(scope, def get_numeric_gradient(scope,
...@@ -96,7 +93,6 @@ def get_numeric_gradient(scope, ...@@ -96,7 +93,6 @@ def get_numeric_gradient(scope,
output_names, output_names,
delta=0.005, delta=0.005,
in_place=False): in_place=False):
set_input(scope, op, inputs, core.CPUPlace()) set_input(scope, op, inputs, core.CPUPlace())
tensor_to_check = scope.find_var(input_to_check).get_tensor() tensor_to_check = scope.find_var(input_to_check).get_tensor()
...@@ -115,7 +111,29 @@ def get_numeric_gradient(scope, ...@@ -115,7 +111,29 @@ def get_numeric_gradient(scope,
tensor_to_check = scope.find_var(input_to_check).get_tensor() tensor_to_check = scope.find_var(input_to_check).get_tensor()
tensor_size = product(tensor_to_check.get_dims()) tensor_size = product(tensor_to_check.get_dims())
gradient_flat = np.zeros(shape=(tensor_size, ), dtype='float32') tensor_to_check_dtype = tensor_to_check.dtype()
if tensor_to_check_dtype == core.DataType.FP32:
tensor_to_check_dtype = np.float32
elif tensor_to_check_dtype == core.DataType.FP64:
tensor_to_check_dtype = np.float64
else:
raise ValueError("Not supported data type " + str(
tensor_to_check_dtype))
gradient_flat = np.zeros(shape=(tensor_size, ), dtype=tensor_to_check_dtype)
def __get_elem__(tensor, i):
if tensor_to_check_dtype == np.float32:
return tensor.get_float_element(i)
else:
return tensor.get_double_element(i)
def __set_elem__(tensor, i, e):
if tensor_to_check_dtype == np.float32:
tensor.set_float_element(i, e)
else:
tensor.set_double_element(i, e)
# we only compute gradient of one element each time. # we only compute gradient of one element each time.
# we use a for loop to compute the gradient of every element. # we use a for loop to compute the gradient of every element.
for i in xrange(tensor_size): for i in xrange(tensor_size):
...@@ -123,20 +141,20 @@ def get_numeric_gradient(scope, ...@@ -123,20 +141,20 @@ def get_numeric_gradient(scope,
set_input(scope, op, inputs, core.CPUPlace()) set_input(scope, op, inputs, core.CPUPlace())
# get one input element throw it's index i. # get one input element throw it's index i.
origin = tensor_to_check.get_float_element(i) origin = __get_elem__(tensor_to_check, i)
# add delta to it, run op and then get the sum of the result tensor. # add delta to it, run op and then get the sum of the result tensor.
x_pos = origin + delta x_pos = origin + delta
tensor_to_check.set_float_element(i, x_pos) __set_elem__(tensor_to_check, i, x_pos)
y_pos = get_output() y_pos = get_output()
if in_place: if in_place:
set_input(scope, op, inputs, core.CPUPlace()) set_input(scope, op, inputs, core.CPUPlace())
x_neg = origin - delta x_neg = origin - delta
tensor_to_check.set_float_element(i, x_neg) __set_elem__(tensor_to_check, i, x_neg)
y_neg = get_output() y_neg = get_output()
tensor_to_check.set_float_element(i, origin) __set_elem__(tensor_to_check, i, origin)
gradient_flat[i] = (y_pos - y_neg) / delta / 2 gradient_flat[i] = (y_pos - y_neg) / delta / 2
return gradient_flat.reshape(tensor_to_check.get_dims()) return gradient_flat.reshape(tensor_to_check.get_dims())
......
...@@ -80,7 +80,7 @@ class TestCrossEntropyOp3(OpTest): ...@@ -80,7 +80,7 @@ class TestCrossEntropyOp3(OpTest):
cross_entropy2 = (-label * np.log(X)).sum( cross_entropy2 = (-label * np.log(X)).sum(
axis=1, keepdims=True).astype("float32") axis=1, keepdims=True).astype("float32")
self.inputs = {"X": X, "Label": label} self.inputs = {"X": X, "Label": label.astype(np.float32)}
self.outputs = {"Y": cross_entropy} self.outputs = {"Y": cross_entropy}
self.attrs = {"softLabel": True} self.attrs = {"softLabel": True}
......
...@@ -7,8 +7,8 @@ class ElementwiseMulOp(OpTest): ...@@ -7,8 +7,8 @@ class ElementwiseMulOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "elementwise_mul" self.op_type = "elementwise_mul"
self.inputs = { self.inputs = {
'X': np.random.uniform(0.1, 1, [13, 17]).astype("float32"), 'X': np.random.uniform(0.1, 1, [13, 17]).astype("float64"),
'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float32") 'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float64")
} }
self.outputs = {'Out': np.multiply(self.inputs['X'], self.inputs['Y'])} self.outputs = {'Out': np.multiply(self.inputs['X'], self.inputs['Y'])}
...@@ -16,23 +16,21 @@ class ElementwiseMulOp(OpTest): ...@@ -16,23 +16,21 @@ class ElementwiseMulOp(OpTest):
self.check_output() self.check_output()
def test_check_grad_normal(self): def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.1) self.check_grad(['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self): def test_check_grad_ingore_x(self):
self.check_grad( self.check_grad(['Y'], 'Out', no_grad_set=set("X"))
['Y'], 'Out', max_relative_error=0.1, no_grad_set=set("X"))
def test_check_grad_ingore_y(self): def test_check_grad_ingore_y(self):
self.check_grad( self.check_grad(['X'], 'Out', no_grad_set=set('Y'))
['X'], 'Out', max_relative_error=0.1, no_grad_set=set('Y'))
class TestElementwiseMulOp_Vector(ElementwiseMulOp): class TestElementwiseMulOp_Vector(ElementwiseMulOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_mul" self.op_type = "elementwise_mul"
self.inputs = { self.inputs = {
'X': np.random.random((32, )).astype("float32"), 'X': np.random.random((32, )).astype("float64"),
'Y': np.random.random((32, )).astype("float32") 'Y': np.random.random((32, )).astype("float64")
} }
self.outputs = {'Out': np.multiply(self.inputs['X'], self.inputs['Y'])} self.outputs = {'Out': np.multiply(self.inputs['X'], self.inputs['Y'])}
...@@ -41,8 +39,8 @@ class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp): ...@@ -41,8 +39,8 @@ class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_mul" self.op_type = "elementwise_mul"
self.inputs = { self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32), 'X': np.random.rand(2, 3, 4).astype(np.float64),
'Y': np.random.rand(2).astype(np.float32) 'Y': np.random.rand(2).astype(np.float64)
} }
self.attrs = {'axis': 0} self.attrs = {'axis': 0}
...@@ -55,8 +53,8 @@ class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp): ...@@ -55,8 +53,8 @@ class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_mul" self.op_type = "elementwise_mul"
self.inputs = { self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32), 'X': np.random.rand(2, 3, 4).astype(np.float64),
'Y': np.random.rand(3).astype(np.float32) 'Y': np.random.rand(3).astype(np.float64)
} }
self.attrs = {'axis': 1} self.attrs = {'axis': 1}
...@@ -69,8 +67,8 @@ class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp): ...@@ -69,8 +67,8 @@ class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_mul" self.op_type = "elementwise_mul"
self.inputs = { self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32), 'X': np.random.rand(2, 3, 4).astype(np.float64),
'Y': np.random.rand(4).astype(np.float32) 'Y': np.random.rand(4).astype(np.float64)
} }
self.outputs = { self.outputs = {
...@@ -82,8 +80,8 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): ...@@ -82,8 +80,8 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_mul" self.op_type = "elementwise_mul"
self.inputs = { self.inputs = {
'X': np.random.rand(2, 3, 4, 5).astype(np.float32), 'X': np.random.rand(2, 3, 4, 5).astype(np.float64),
'Y': np.random.rand(3, 4).astype(np.float32) 'Y': np.random.rand(3, 4).astype(np.float64)
} }
self.attrs = {'axis': 1} self.attrs = {'axis': 1}
......
...@@ -17,7 +17,7 @@ class PReluTest(OpTest): ...@@ -17,7 +17,7 @@ class PReluTest(OpTest):
x_np_sign = np.sign(x_np) x_np_sign = np.sign(x_np)
x_np = x_np_sign * np.maximum(x_np, .005) x_np = x_np_sign * np.maximum(x_np, .005)
alpha_np = np.array([.1]) alpha_np = np.array([.1], dtype="float32")
self.inputs = {'X': x_np, 'Alpha': alpha_np} self.inputs = {'X': x_np, 'Alpha': alpha_np}
out_np = np.maximum(self.inputs['X'], 0.) out_np = np.maximum(self.inputs['X'], 0.)
out_np = out_np + np.minimum(self.inputs['X'], out_np = out_np + np.minimum(self.inputs['X'],
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册