diff --git a/paddle/operators/cross_entropy_op.cc b/paddle/operators/cross_entropy_op.cc index a865991db3111d2a7cec9f7731b3c34876864299..d94b96200c2a5cd112b17e45aa6cd4a63bdd04d0 100644 --- a/paddle/operators/cross_entropy_op.cc +++ b/paddle/operators/cross_entropy_op.cc @@ -162,6 +162,8 @@ or not. But the output only shares the LoD with input `X`. namespace ops = paddle::operators; REGISTER_OP(cross_entropy, ops::CrossEntropyOp, ops::CrossEntropyOpMaker, cross_entropy_grad, ops::CrossEntropyGradientOp); -REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel); +REGISTER_OP_CPU_KERNEL(cross_entropy, ops::CrossEntropyOpKernel, + ops::CrossEntropyOpKernel); REGISTER_OP_CPU_KERNEL(cross_entropy_grad, - ops::CrossEntropyGradientOpKernel); + ops::CrossEntropyGradientOpKernel, + ops::CrossEntropyGradientOpKernel); diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index c492dddb09a41e3731a211b4fa083e57ad780f42..5f8a6cd5ef6fbb554112085adc6b85ef8e765e86 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -108,6 +108,8 @@ class CrossEntropyGradientOpCUDAKernel : public framework::OpKernel { } // namespace paddle namespace ops = paddle::operators; -REGISTER_OP_GPU_KERNEL(cross_entropy, ops::CrossEntropyOpCUDAKernel); +REGISTER_OP_GPU_KERNEL(cross_entropy, ops::CrossEntropyOpCUDAKernel, + ops::CrossEntropyOpCUDAKernel); REGISTER_OP_GPU_KERNEL(cross_entropy_grad, - ops::CrossEntropyGradientOpCUDAKernel); + ops::CrossEntropyGradientOpCUDAKernel, + ops::CrossEntropyGradientOpCUDAKernel); diff --git a/paddle/operators/math/cross_entropy.cc b/paddle/operators/math/cross_entropy.cc index 150a65f2751aaeac17f9403404d2efd990a0c72b..cb28add3f01c321797b75230f45f19f8d403387a 100644 --- a/paddle/operators/math/cross_entropy.cc +++ b/paddle/operators/math/cross_entropy.cc @@ -54,6 +54,7 @@ class CrossEntropyFunctor { }; template class CrossEntropyFunctor; +template class CrossEntropyFunctor; } // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/operators/math/cross_entropy.cu b/paddle/operators/math/cross_entropy.cu index db878129d650d663e187ecabb106eea0e39db6fa..80db130aa0900553db30ead8f2cd5b850f3df1e5 100644 --- a/paddle/operators/math/cross_entropy.cu +++ b/paddle/operators/math/cross_entropy.cu @@ -39,11 +39,36 @@ __device__ __forceinline__ T sum_single_warp(T val) { return val; } +// CUDA do not support dynamic arrary in template +// https://stackoverflow.com/questions/20497209 +template +struct SharedMemory { + // Ensure that we won't compile any un-specialized types + __device__ T* GetPointer() { return NULL; } +}; + +template <> +struct SharedMemory { + __device__ float* GetPointer() { + extern __shared__ float s_float[]; + return s_float; + } +}; + +template <> +struct SharedMemory { + __device__ double* GetPointer() { + extern __shared__ double s_double[]; + return s_double; + } +}; + template __global__ void SoftCrossEntropyKernel(T* Y, const T* X, const T* label, const int class_num) { int tid = threadIdx.x; - extern __shared__ T d_sum[]; + SharedMemory d_sum_shared; + T* d_sum = d_sum_shared.GetPointer(); d_sum[tid] = 0; int cur_idx = tid; @@ -102,6 +127,7 @@ class CrossEntropyFunctor { }; template class CrossEntropyFunctor; +template class CrossEntropyFunctor; } // namespace math } // namespace operators } // namespace paddle diff --git a/python/paddle/v2/framework/tests/op_test.py b/python/paddle/v2/framework/tests/op_test.py index a7de01dcddd65b6f0f064e6ce6fcb3e5cad73931..8fc61c9831efb684d72ee14a5243e8d9c2eceef0 100644 --- a/python/paddle/v2/framework/tests/op_test.py +++ b/python/paddle/v2/framework/tests/op_test.py @@ -8,6 +8,15 @@ from paddle.v2.framework.executor import Executor from paddle.v2.framework.framework import Program, OpProtoHolder +def randomize_probability(batch_size, class_num, dtype='float32'): + prob = np.random.uniform( + 0.1, 1.0, size=(batch_size, class_num)).astype(dtype) + prob_sum = prob.sum(axis=1) + for i in xrange(len(prob)): + prob[i] /= prob_sum[i] + return prob + + def grad_var_name(var_name): return var_name + "@GRAD" @@ -233,7 +242,7 @@ def append_input_output(block, op_proto, np_list, is_input): if (var_name not in np_list) and var_proto.dispensable: continue assert (var_name in np_list) or (var_proto.dispensable), \ - "Missing {} as input".format(var_name) + "Missing {} as input".format(var_name) if var_proto.duplicable: assert isinstance(np_list[var_name], list), \ "Duplicable {} should be set as list".format(var_name) @@ -379,9 +388,9 @@ class OpTest(unittest.TestCase): def err_msg(): offset = np.argmax(diff_mat > max_relative_error) return ("%s Variable %s max gradient diff %f over limit %f, " - "the first error element is %d") % ( + "the first error element is %d, %f, %f") % ( msg_prefix, name, max_diff, max_relative_error, - offset) + offset, a.flatten()[offset], b.flatten()[offset]) self.assertLessEqual(max_diff, max_relative_error, err_msg()) @@ -389,6 +398,7 @@ class OpTest(unittest.TestCase): inputs_to_check, output_names, no_grad_set=None, + numeric_grad_delta=0.005, in_place=False, max_relative_error=0.005, user_defined_grads=None): @@ -411,6 +421,7 @@ class OpTest(unittest.TestCase): self.inputs, input_to_check, output_names, + delta=numeric_grad_delta, in_place=in_place) for input_to_check in inputs_to_check ] grad_names = [ diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index e1c45c2674ee9cc7c7240bdd67de05cb218ac287..6f28ce723a88246724f96a4a931e9d57ed0550db 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -1,6 +1,6 @@ import unittest import numpy as np -from op_test import OpTest +from op_test import OpTest, randomize_probability class TestCrossEntropyOp1(OpTest): @@ -12,12 +12,12 @@ class TestCrossEntropyOp1(OpTest): batch_size = 30 class_num = 10 - X = np.random.uniform(0.1, 1.0, - [batch_size, class_num]).astype("float32") + X = randomize_probability(batch_size, class_num, dtype='float64') + label = np.random.randint(0, class_num, (batch_size, 1), dtype="int32") cross_entropy = np.asmatrix( [[-np.log(X[i][label[i][0]])] for i in range(X.shape[0])], - dtype="float32") + dtype="float64") self.inputs = {"X": X, "Label": label} self.outputs = {"Y": cross_entropy} @@ -27,7 +27,7 @@ class TestCrossEntropyOp1(OpTest): self.check_output() def test_check_grad(self): - self.check_grad(["X"], "Y") + self.check_grad(["X"], "Y", numeric_grad_delta=0.001) class TestCrossEntropyOp2(OpTest): @@ -39,8 +39,7 @@ class TestCrossEntropyOp2(OpTest): batch_size = 5 class_num = 37 - X = np.random.uniform(0.1, 1.0, - [batch_size, class_num]).astype("float32") + X = randomize_probability(batch_size, class_num) label = np.random.uniform(0.1, 1.0, [batch_size, class_num]).astype("float32") label /= label.sum(axis=1, keepdims=True) @@ -55,7 +54,8 @@ class TestCrossEntropyOp2(OpTest): self.check_output() def test_check_grad(self): - self.check_grad(["X"], "Y", max_relative_error=0.05) + self.check_grad( + ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001) class TestCrossEntropyOp3(OpTest): @@ -67,8 +67,7 @@ class TestCrossEntropyOp3(OpTest): batch_size = 5 class_num = 17 - X = np.random.uniform(0.1, 1.0, - [batch_size, class_num]).astype("float32") + X = randomize_probability(batch_size, class_num) label_index = np.random.randint( 0, class_num, (batch_size), dtype="int32") label = np.zeros(X.shape) @@ -88,7 +87,8 @@ class TestCrossEntropyOp3(OpTest): self.check_output() def test_check_grad(self): - self.check_grad(["X"], "Y", max_relative_error=0.05) + self.check_grad( + ["X"], "Y", max_relative_error=0.05, numeric_grad_delta=0.001) if __name__ == "__main__":