提交 26475cd9 编写于 作者: D dangqingqing

Use clipping log in cuda kernel, making it same with CPU.

上级 6f7a8260
...@@ -20,6 +20,21 @@ namespace operators { ...@@ -20,6 +20,21 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
template <typename T>
struct clipping_log {
__host__ __device__ T operator()(const T x) {
PADDLE_ASSERT(std::is_floating_point<T>::value);
const T kApproInf = 1e20;
if (x == INFINITY) {
return kApproInf;
}
if (x == -INFINITY) {
return -kApproInf;
}
return x;
}
};
template <typename T> template <typename T>
__global__ void CrossEntropyKernel(T* Y, const T* X, const int* label, __global__ void CrossEntropyKernel(T* Y, const T* X, const int* label,
const int N, const int D) { const int N, const int D) {
...@@ -28,10 +43,11 @@ __global__ void CrossEntropyKernel(T* Y, const T* X, const int* label, ...@@ -28,10 +43,11 @@ __global__ void CrossEntropyKernel(T* Y, const T* X, const int* label,
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
i += blockDim.x * gridDim.x) { i += blockDim.x * gridDim.x) {
PADDLE_ASSERT(label[i] >= 0 && label[i] < D); PADDLE_ASSERT(label[i] >= 0 && label[i] < D);
Y[i] = -log(X[i * D + label[i]]); Y[i] = -clipping_log<T>()(X[i * D + label[i]]);
} }
} }
// TODO(qingqing): make zero setting an common function.
template <typename T> template <typename T>
__global__ void zero(T* X, const int N) { __global__ void zero(T* X, const int N) {
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N;
...@@ -98,7 +114,6 @@ class OnehotCrossEntropyGradientOpCUDAKernel : public framework::OpKernel { ...@@ -98,7 +114,6 @@ class OnehotCrossEntropyGradientOpCUDAKernel : public framework::OpKernel {
int D = X->dims()[1]; int D = X->dims()[1];
int block = 512; int block = 512;
int grid = (N * D + block - 1) / block; int grid = (N * D + block - 1) / block;
// TODO(qingqing): make zero an common function.
zero<T><<<grid, block>>>(dXdata, N * D); zero<T><<<grid, block>>>(dXdata, N * D);
grid = (N + block - 1) / block; grid = (N + block - 1) / block;
......
...@@ -21,7 +21,7 @@ namespace operators { ...@@ -21,7 +21,7 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
template <typename T> template <typename T>
T tolerable_value(T x) { T tolerable_value(const T x) {
static_assert(std::is_floating_point<T>::value, static_assert(std::is_floating_point<T>::value,
"tolerable_value works only on float, " "tolerable_value works only on float, "
"double and double double."); "double and double double.");
...@@ -85,6 +85,7 @@ class OnehotCrossEntropyGradientOpKernel : public framework::OpKernel { ...@@ -85,6 +85,7 @@ class OnehotCrossEntropyGradientOpKernel : public framework::OpKernel {
const int batch_size = X->dims()[0]; const int batch_size = X->dims()[0];
const int class_num = X->dims()[1]; const int class_num = X->dims()[1];
// TODO(qingqing): make zero setting an common function.
memset(dXdata, 0, sizeof(T) * batch_size * class_num); memset(dXdata, 0, sizeof(T) * batch_size * class_num);
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
int index = i * class_num + label_data[i]; int index = i * class_num + label_data[i];
......
...@@ -64,7 +64,8 @@ class OpTestMeta(type): ...@@ -64,7 +64,8 @@ class OpTestMeta(type):
actual = numpy.array(scope.find_var(out_name).get_tensor()) actual = numpy.array(scope.find_var(out_name).get_tensor())
expect = self.outputs[out_name] expect = self.outputs[out_name]
self.assertTrue( self.assertTrue(
numpy.allclose(actual, expect), numpy.allclose(
actual, expect, atol=1e-04),
"output name: " + out_name + "has diff") "output name: " + out_name + "has diff")
obj.test_all = test_all obj.test_all = test_all
......
...@@ -8,9 +8,8 @@ class TestCrossEntropy(unittest.TestCase): ...@@ -8,9 +8,8 @@ class TestCrossEntropy(unittest.TestCase):
__metaclass__ = OpTestMeta __metaclass__ = OpTestMeta
def setUp(self): def setUp(self):
# TODO this unit test is not passed
self.type = "onehot_cross_entropy" self.type = "onehot_cross_entropy"
batch_size = 100 batch_size = 30
class_num = 10 class_num = 10
X = numpy.random.random((batch_size, class_num)).astype("float32") X = numpy.random.random((batch_size, class_num)).astype("float32")
label = 5 * numpy.ones(batch_size).astype("int32") label = 5 * numpy.ones(batch_size).astype("int32")
...@@ -24,7 +23,7 @@ class TestCrossEntropy(unittest.TestCase): ...@@ -24,7 +23,7 @@ class TestCrossEntropy(unittest.TestCase):
class CrossEntropyGradOpTest(GradientChecker): class CrossEntropyGradOpTest(GradientChecker):
def test_check_grad(self): def test_check_grad(self):
op = create_op("onehot_cross_entropy") op = create_op("onehot_cross_entropy")
batch_size = 100 batch_size = 30
class_num = 10 class_num = 10
inputs = { inputs = {
"X": numpy.random.uniform( "X": numpy.random.uniform(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册