fix assert_float_eq in blobcmp && issues in Conv kernel (#207)

* fix assert_float_eq in blobcmp * fix cuda_stream

fix assert_float_eq in blobcmp && issues in Conv kernel (#207)
* fix assert_float_eq in blobcmp * fix cuda_stream
e3066df5 · kingsmad · chengtbf · 16be8e81 · e3066df5 · e3066df5
4 changed file
--- a/oneflow/core/kernel/convolution_kernel.cu
+++ b/oneflow/core/kernel/convolution_kernel.cu
@@ -102,7 +102,8 @@ class ConvolutionKernelUtil<DeviceType::kGPU, FloatingPointType> final {
        (width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
    int num_kernels = channels * height_col * width_col;
    Im2ColGpuKernel<FloatingPointType>
-        <<<BlocksNum4ThreadsNum(num_kernels), kCudaThreadsNumPerBlock>>>(
+        <<<BlocksNum4ThreadsNum(num_kernels), kCudaThreadsNumPerBlock, 0,
+           ctx.device_ctx->cuda_stream()>>>(
            num_kernels, data_im, height, width, kernel_h, kernel_w, pad_h,
            pad_w, stride_h, stride_w, dilation_h, dilation_w, height_col,
            width_col, data_col);
@@ -122,7 +123,8 @@ class ConvolutionKernelUtil<DeviceType::kGPU, FloatingPointType> final {
    // To avoid involving atomic operations, we will launch one kernel per
    // bottom dimension, and then in the kernel add up the top dimensions.
    Col2ImGpuKernel<FloatingPointType>
-        <<<BlocksNum4ThreadsNum(num_kernels), kCudaThreadsNumPerBlock>>>(
+        <<<BlocksNum4ThreadsNum(num_kernels), kCudaThreadsNumPerBlock, 0,
+           ctx.device_ctx->cuda_stream()>>>(
            num_kernels, data_col, height, width, channels, kernel_h, kernel_w,
            pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
            height_col, width_col, data_im);

--- a/oneflow/core/kernel/kernel_test_common.cpp
+++ b/oneflow/core/kernel/kernel_test_common.cpp
@@ -62,7 +62,7 @@ class KernelTestCommon<DeviceType::kCPU, FloatingPointType> final {
    size_t dptr_size = lhs->shape().elem_cnt();

    for (size_t i = 0; i < dptr_size; ++i) {
-      ASSERT_NEAR(dptr_lhs[i], dptr_rhs[i], 0.0000001);
+      ASSERT_FLOAT_EQ(dptr_lhs[i], dptr_rhs[i]);
    }
  }


--- a/oneflow/core/kernel/rmsprop_model_update_kernel.cpp
+++ b/oneflow/core/kernel/rmsprop_model_update_kernel.cpp
@@ -57,7 +57,8 @@ class RMSPropMdUpdateKernelUtil<DeviceType::kCPU, FloatingPointType> final {
                          const FloatingPointType alpha) {
    ctx.device_ctx->cpu_stream()->SendWork([=]() {
      for (int64_t i = 0; i < n; ++i) {
-        model[i] -= alpha * model_diff[i] / (std::sqrt(mean_square[i]) + epsilon);
+        model[i] -=
+            alpha * model_diff[i] / (std::sqrt(mean_square[i]) + epsilon);
      }
    });
  }

--- a/oneflow/core/kernel/rmsprop_model_update_kernel.cu
+++ b/oneflow/core/kernel/rmsprop_model_update_kernel.cu
@@ -25,7 +25,7 @@ __global__ void UpdateModelGpu(const int64_t n, FloatingPointType* model,
                               const FloatingPointType epsilon,
                               const FloatingPointType alpha) {
  CUDA_1D_KERNEL_LOOP(i, n) {
-    model[i] -=  alpha * model_diff[i] / (std::sqrt(mean_square[i]) + epsilon);
+    model[i] -= alpha * model_diff[i] / (std::sqrt(mean_square[i]) + epsilon);
  }
 }