提交 fd479631 编写于 作者: Y Yu Yang

Stablize elementwise_mul by using double precision

上级 5934aae4
...@@ -77,20 +77,18 @@ PYBIND11_PLUGIN(core) { ...@@ -77,20 +77,18 @@ PYBIND11_PLUGIN(core) {
}) })
.def("set", PyCPUTensorSetFromArray<float>) .def("set", PyCPUTensorSetFromArray<float>)
.def("set", PyCPUTensorSetFromArray<int>) .def("set", PyCPUTensorSetFromArray<int>)
.def("set", PyCPUTensorSetFromArray<double>)
#ifndef PADDLE_ONLY_CPU #ifndef PADDLE_ONLY_CPU
.def("set", PyCUDATensorSetFromArray<float>) .def("set", PyCUDATensorSetFromArray<float>)
.def("set", PyCUDATensorSetFromArray<int>) .def("set", PyCUDATensorSetFromArray<int>)
.def("set", PyCUDATensorSetFromArray<double>)
#endif #endif
.def("shape", [](Tensor &self) { return vectorize(self.dims()); }) .def("shape", [](Tensor &self) { return vectorize(self.dims()); })
.def("set_float_element", .def("set_float_element", TensorSetElement<float>)
[](Tensor &self, size_t offset, float f) { .def("get_float_element", TensorGetElement<float>)
// TODO(yuyang18): Only support GPU now. .def("set_double_element", TensorSetElement<double>)
self.data<float>()[offset] = f; .def("get_double_element", TensorGetElement<double>)
}) .def("dtype", [](Tensor &self) { return ToDataType(self.type()); });
.def("get_float_element", [](Tensor &self, size_t offset) -> float {
// TODO(yuyang18): Only support GPU now.
return self.data<float>()[offset];
});
py::class_<LoDTensor, Tensor>(m, "LoDTensor") py::class_<LoDTensor, Tensor>(m, "LoDTensor")
.def_buffer( .def_buffer(
......
...@@ -73,10 +73,23 @@ struct CastToPyBufferImpl<true, I, ARGS...> { ...@@ -73,10 +73,23 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
}; };
} // namespace details } // namespace details
inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) { inline py::buffer_info CastToPyBuffer(framework::Tensor &tensor) {
auto buffer_info = details::CastToPyBufferImpl<true, 0, float, int>()(tensor); auto buffer_info =
details::CastToPyBufferImpl<true, 0, float, int, double>()(tensor);
return buffer_info; return buffer_info;
} }
template <typename T>
T TensorGetElement(framework::Tensor &self, size_t offset) {
PADDLE_ENFORCE(platform::is_cpu_place(self.place()));
return self.data<T>()[offset];
}
template <typename T>
void TensorSetElement(framework::Tensor &self, size_t offset, T elem) {
PADDLE_ENFORCE(platform::is_cpu_place(self.place()));
self.data<T>()[offset] = elem;
}
template <typename T> template <typename T>
void PyCPUTensorSetFromArray( void PyCPUTensorSetFromArray(
framework::Tensor &self, framework::Tensor &self,
......
...@@ -69,24 +69,27 @@ def set_input(scope, op, inputs, place): ...@@ -69,24 +69,27 @@ def set_input(scope, op, inputs, place):
def set_output_grad(scope, op, outputs, place): def set_output_grad(scope, op, outputs, place):
def __set_tensor__(name):
out_tensor = scope.find_var(name).get_tensor()
grad_tensor = scope.new_var(grad_var_name(name)).get_tensor()
out_dtype = out_tensor.dtype()
if out_dtype == core.DataType.FP64:
data = np.ones(out_tensor.shape(), dtype=np.float64)
elif out_dtype == core.DataType.FP32:
data = np.ones(out_tensor.shape(), dtype=np.float32)
else:
raise ValueError("Not supported data type " + str(out_dtype))
grad_tensor.set(data, place)
for out_name, out_dup in Operator.get_op_outputs(op.type()): for out_name, out_dup in Operator.get_op_outputs(op.type()):
if out_name in outputs: if out_name in outputs:
if out_dup: if out_dup:
sub_out = outputs[out_name] sub_out = outputs[out_name]
for sub_out_name, _ in sub_out: for sub_out_name, _ in sub_out:
out_tensor = scope.find_var(sub_out_name).get_tensor() __set_tensor__(sub_out_name)
grad_tensor = scope.new_var(grad_var_name(
sub_out_name)).get_tensor()
grad_tensor.set_dims(out_tensor.shape())
data = np.ones(out_tensor.shape(), dtype=np.float32)
grad_tensor.set(data, place)
else: else:
out_tensor = scope.find_var(out_name).get_tensor() __set_tensor__(out_name)
grad_tensor = scope.new_var(grad_var_name(out_name)).get_tensor(
)
grad_tensor.set_dims(out_tensor.shape())
data = np.ones(out_tensor.shape(), dtype=np.float32)
grad_tensor.set(data, place)
def get_numeric_gradient(scope, def get_numeric_gradient(scope,
...@@ -96,7 +99,6 @@ def get_numeric_gradient(scope, ...@@ -96,7 +99,6 @@ def get_numeric_gradient(scope,
output_names, output_names,
delta=0.005, delta=0.005,
in_place=False): in_place=False):
set_input(scope, op, inputs, core.CPUPlace()) set_input(scope, op, inputs, core.CPUPlace())
tensor_to_check = scope.find_var(input_to_check).get_tensor() tensor_to_check = scope.find_var(input_to_check).get_tensor()
...@@ -115,7 +117,29 @@ def get_numeric_gradient(scope, ...@@ -115,7 +117,29 @@ def get_numeric_gradient(scope,
tensor_to_check = scope.find_var(input_to_check).get_tensor() tensor_to_check = scope.find_var(input_to_check).get_tensor()
tensor_size = product(tensor_to_check.get_dims()) tensor_size = product(tensor_to_check.get_dims())
gradient_flat = np.zeros(shape=(tensor_size, ), dtype='float32') tensor_to_check_dtype = tensor_to_check.dtype()
if tensor_to_check_dtype == core.DataType.FP32:
tensor_to_check_dtype = np.float32
elif tensor_to_check_dtype == core.DataType.FP64:
tensor_to_check_dtype = np.float64
else:
raise ValueError("Not supported data type " + str(
tensor_to_check_dtype))
gradient_flat = np.zeros(shape=(tensor_size, ), dtype=tensor_to_check_dtype)
def __get_elem__(tensor, i):
if tensor_to_check_dtype == np.float32:
return tensor.get_float_element(i)
else:
return tensor.get_double_element(i)
def __set_elem__(tensor, i, e):
if tensor_to_check_dtype == np.float32:
tensor.set_float_element(i, e)
else:
tensor.set_double_element(i, e)
# we only compute gradient of one element each time. # we only compute gradient of one element each time.
# we use a for loop to compute the gradient of every element. # we use a for loop to compute the gradient of every element.
for i in xrange(tensor_size): for i in xrange(tensor_size):
...@@ -123,20 +147,20 @@ def get_numeric_gradient(scope, ...@@ -123,20 +147,20 @@ def get_numeric_gradient(scope,
set_input(scope, op, inputs, core.CPUPlace()) set_input(scope, op, inputs, core.CPUPlace())
# get one input element throw it's index i. # get one input element throw it's index i.
origin = tensor_to_check.get_float_element(i) origin = __get_elem__(tensor_to_check, i)
# add delta to it, run op and then get the sum of the result tensor. # add delta to it, run op and then get the sum of the result tensor.
x_pos = origin + delta x_pos = origin + delta
tensor_to_check.set_float_element(i, x_pos) __set_elem__(tensor_to_check, i, x_pos)
y_pos = get_output() y_pos = get_output()
if in_place: if in_place:
set_input(scope, op, inputs, core.CPUPlace()) set_input(scope, op, inputs, core.CPUPlace())
x_neg = origin - delta x_neg = origin - delta
tensor_to_check.set_float_element(i, x_neg) __set_elem__(tensor_to_check, i, x_neg)
y_neg = get_output() y_neg = get_output()
tensor_to_check.set_float_element(i, origin) __set_elem__(tensor_to_check, i, origin)
gradient_flat[i] = (y_pos - y_neg) / delta / 2 gradient_flat[i] = (y_pos - y_neg) / delta / 2
return gradient_flat.reshape(tensor_to_check.get_dims()) return gradient_flat.reshape(tensor_to_check.get_dims())
......
...@@ -7,8 +7,8 @@ class ElementwiseMulOp(OpTest): ...@@ -7,8 +7,8 @@ class ElementwiseMulOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = "elementwise_mul" self.op_type = "elementwise_mul"
self.inputs = { self.inputs = {
'X': np.random.uniform(0.1, 1, [13, 17]).astype("float32"), 'X': np.random.uniform(0.1, 1, [13, 17]).astype("float64"),
'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float32") 'Y': np.random.uniform(0.1, 1, [13, 17]).astype("float64")
} }
self.outputs = {'Out': np.multiply(self.inputs['X'], self.inputs['Y'])} self.outputs = {'Out': np.multiply(self.inputs['X'], self.inputs['Y'])}
...@@ -16,23 +16,21 @@ class ElementwiseMulOp(OpTest): ...@@ -16,23 +16,21 @@ class ElementwiseMulOp(OpTest):
self.check_output() self.check_output()
def test_check_grad_normal(self): def test_check_grad_normal(self):
self.check_grad(['X', 'Y'], 'Out', max_relative_error=0.1) self.check_grad(['X', 'Y'], 'Out')
def test_check_grad_ingore_x(self): def test_check_grad_ingore_x(self):
self.check_grad( self.check_grad(['Y'], 'Out', no_grad_set=set("X"))
['Y'], 'Out', max_relative_error=0.1, no_grad_set=set("X"))
def test_check_grad_ingore_y(self): def test_check_grad_ingore_y(self):
self.check_grad( self.check_grad(['X'], 'Out', no_grad_set=set('Y'))
['X'], 'Out', max_relative_error=0.1, no_grad_set=set('Y'))
class TestElementwiseMulOp_Vector(ElementwiseMulOp): class TestElementwiseMulOp_Vector(ElementwiseMulOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_mul" self.op_type = "elementwise_mul"
self.inputs = { self.inputs = {
'X': np.random.random((32, )).astype("float32"), 'X': np.random.random((32, )).astype("float64"),
'Y': np.random.random((32, )).astype("float32") 'Y': np.random.random((32, )).astype("float64")
} }
self.outputs = {'Out': np.multiply(self.inputs['X'], self.inputs['Y'])} self.outputs = {'Out': np.multiply(self.inputs['X'], self.inputs['Y'])}
...@@ -41,8 +39,8 @@ class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp): ...@@ -41,8 +39,8 @@ class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_mul" self.op_type = "elementwise_mul"
self.inputs = { self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32), 'X': np.random.rand(2, 3, 4).astype(np.float64),
'Y': np.random.rand(2).astype(np.float32) 'Y': np.random.rand(2).astype(np.float64)
} }
self.attrs = {'axis': 0} self.attrs = {'axis': 0}
...@@ -55,8 +53,8 @@ class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp): ...@@ -55,8 +53,8 @@ class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_mul" self.op_type = "elementwise_mul"
self.inputs = { self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32), 'X': np.random.rand(2, 3, 4).astype(np.float64),
'Y': np.random.rand(3).astype(np.float32) 'Y': np.random.rand(3).astype(np.float64)
} }
self.attrs = {'axis': 1} self.attrs = {'axis': 1}
...@@ -69,8 +67,8 @@ class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp): ...@@ -69,8 +67,8 @@ class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_mul" self.op_type = "elementwise_mul"
self.inputs = { self.inputs = {
'X': np.random.rand(2, 3, 4).astype(np.float32), 'X': np.random.rand(2, 3, 4).astype(np.float64),
'Y': np.random.rand(4).astype(np.float32) 'Y': np.random.rand(4).astype(np.float64)
} }
self.outputs = { self.outputs = {
...@@ -82,8 +80,8 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): ...@@ -82,8 +80,8 @@ class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp):
def setUp(self): def setUp(self):
self.op_type = "elementwise_mul" self.op_type = "elementwise_mul"
self.inputs = { self.inputs = {
'X': np.random.rand(2, 3, 4, 5).astype(np.float32), 'X': np.random.rand(2, 3, 4, 5).astype(np.float64),
'Y': np.random.rand(3, 4).astype(np.float32) 'Y': np.random.rand(3, 4).astype(np.float64)
} }
self.attrs = {'axis': 1} self.attrs = {'axis': 1}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册