From a3a8a0900df325809920c2ffcbe3e5afdaa27dbe Mon Sep 17 00:00:00 2001 From: caoying03 Date: Wed, 20 Sep 2017 23:19:19 +0800 Subject: [PATCH] optimize cross entropy kernel by using reduce. --- paddle/operators/cross_entropy_op.cu | 36 ++++++++++++++----- .../framework/tests/test_cross_entropy_op.py | 27 +++++++------- 2 files changed, 41 insertions(+), 22 deletions(-) diff --git a/paddle/operators/cross_entropy_op.cu b/paddle/operators/cross_entropy_op.cu index ab6ad0e0622..4b38af7dc32 100644 --- a/paddle/operators/cross_entropy_op.cu +++ b/paddle/operators/cross_entropy_op.cu @@ -32,16 +32,33 @@ __global__ void CrossEntropyKernel(T* Y, const T* X, const int* label, } } -template +template __global__ void SoftCrossEntropyKernel(T* Y, const T* X, const T* label, const int N, const int D) { - for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < N; - i += blockDim.x * gridDim.x) { - T sum = static_cast(0); - for (int j = 0; j < D; j++) { - sum += label[i * D + j] * tolerable_value(log(X[i * D + j])); + int tid = threadIdx.x; + __shared__ T d_sum[blockSize]; + int next_idx = blockIdx.x * D + tid; + + d_sum[tid] = 0; + int cur_idx = tid; + while (cur_idx < D) { + d_sum[tid] += tolerable_value(std::log(X[next_idx])) * label[next_idx]; + next_idx += blockSize; + cur_idx += blockSize; + } + __syncthreads(); + + for (int stride = blockSize >> 1; stride > 0; stride >>= 1) { + __syncthreads(); + if (tid < stride) { + next_idx = tid + stride; + d_sum[tid] += d_sum[next_idx]; } - Y[i] = -sum; + } + __syncthreads(); + + if (tid == 0) { + Y[blockIdx.x] = -d_sum[0]; } } @@ -104,8 +121,9 @@ class CrossEntropyOpCUDAKernel : public framework::OpKernel { // base on ExecutionContext. if (ctx.Attr("soft_label") == 1) { auto* label_data = ctx.Input("Label")->data(); - SoftCrossEntropyKernel<<>>(y_data, x_data, label_data, n, - d); + grid = d; + SoftCrossEntropyKernel<<>>(y_data, x_data, + label_data, n, d); } else { auto* label_data = ctx.Input("Label")->data(); CrossEntropyKernel<<>>(y_data, x_data, label_data, n, d); diff --git a/python/paddle/v2/framework/tests/test_cross_entropy_op.py b/python/paddle/v2/framework/tests/test_cross_entropy_op.py index 0206ca064be..5cccfc6ade0 100644 --- a/python/paddle/v2/framework/tests/test_cross_entropy_op.py +++ b/python/paddle/v2/framework/tests/test_cross_entropy_op.py @@ -19,7 +19,7 @@ class TestCrossEntropyOp1(OpTest): dtype="float32") self.inputs = {"X": X, "Label": label} self.outputs = {"Y": cross_entropy} - self.attrs = {'soft_label': 0} + self.attrs = {"soft_label": 0} def test_check_output(self): self.check_output() @@ -34,8 +34,8 @@ class TestCrossEntropyOp2(OpTest): def setUp(self): self.op_type = "cross_entropy" - batch_size = 10 - class_num = 5 + batch_size = 13 + class_num = 37 X = np.random.uniform(0.1, 1.0, [batch_size, class_num]).astype("float32") label = np.random.uniform(0.1, 1.0, @@ -43,15 +43,16 @@ class TestCrossEntropyOp2(OpTest): label /= label.sum(axis=1, keepdims=True) cross_entropy = (-label * np.log(X)).sum( axis=1, keepdims=True).astype("float32") - self.inputs = {'X': X, 'Label': label} - self.outputs = {'Y': cross_entropy} - self.attrs = {'soft_label': 1} + + self.inputs = {"X": X, "Label": label} + self.outputs = {"Y": cross_entropy} + self.attrs = {"soft_label": 1} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y') + self.check_grad(["X"], "Y", max_relative_error=0.05) class TestCrossEntropyOp3(OpTest): @@ -61,8 +62,8 @@ class TestCrossEntropyOp3(OpTest): def setUp(self): self.op_type = "cross_entropy" - batch_size = 30 - class_num = 10 + batch_size = 13 + class_num = 37 X = np.random.uniform(0.1, 1.0, [batch_size, class_num]).astype("float32") label_index = np.random.randint( @@ -74,15 +75,15 @@ class TestCrossEntropyOp3(OpTest): dtype="float32") cross_entropy2 = (-label * np.log(X)).sum( axis=1, keepdims=True).astype("float32") - self.inputs = {'X': X, 'Label': label} - self.outputs = {'Y': cross_entropy} - self.attrs = {'soft_label': 1} + self.inputs = {"X": X, "Label": label} + self.outputs = {"Y": cross_entropy} + self.attrs = {"soft_label": 1} def test_check_output(self): self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Y') + self.check_grad(["X"], "Y", max_relative_error=0.05) if __name__ == "__main__": -- GitLab