提交 e3066df5 编写于 作者: K kingsmad 提交者: chengtbf

fix assert_float_eq in blobcmp && issues in Conv kernel (#207)

* fix assert_float_eq in blobcmp

* fix cuda_stream
上级 16be8e81
......@@ -102,7 +102,8 @@ class ConvolutionKernelUtil<DeviceType::kGPU, FloatingPointType> final {
(width + 2 * pad_w - (dilation_w * (kernel_w - 1) + 1)) / stride_w + 1;
int num_kernels = channels * height_col * width_col;
Im2ColGpuKernel<FloatingPointType>
<<<BlocksNum4ThreadsNum(num_kernels), kCudaThreadsNumPerBlock>>>(
<<<BlocksNum4ThreadsNum(num_kernels), kCudaThreadsNumPerBlock, 0,
ctx.device_ctx->cuda_stream()>>>(
num_kernels, data_im, height, width, kernel_h, kernel_w, pad_h,
pad_w, stride_h, stride_w, dilation_h, dilation_w, height_col,
width_col, data_col);
......@@ -122,7 +123,8 @@ class ConvolutionKernelUtil<DeviceType::kGPU, FloatingPointType> final {
// To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions.
Col2ImGpuKernel<FloatingPointType>
<<<BlocksNum4ThreadsNum(num_kernels), kCudaThreadsNumPerBlock>>>(
<<<BlocksNum4ThreadsNum(num_kernels), kCudaThreadsNumPerBlock, 0,
ctx.device_ctx->cuda_stream()>>>(
num_kernels, data_col, height, width, channels, kernel_h, kernel_w,
pad_h, pad_w, stride_h, stride_w, dilation_h, dilation_w,
height_col, width_col, data_im);
......
......@@ -62,7 +62,7 @@ class KernelTestCommon<DeviceType::kCPU, FloatingPointType> final {
size_t dptr_size = lhs->shape().elem_cnt();
for (size_t i = 0; i < dptr_size; ++i) {
ASSERT_NEAR(dptr_lhs[i], dptr_rhs[i], 0.0000001);
ASSERT_FLOAT_EQ(dptr_lhs[i], dptr_rhs[i]);
}
}
......
......@@ -57,7 +57,8 @@ class RMSPropMdUpdateKernelUtil<DeviceType::kCPU, FloatingPointType> final {
const FloatingPointType alpha) {
ctx.device_ctx->cpu_stream()->SendWork([=]() {
for (int64_t i = 0; i < n; ++i) {
model[i] -= alpha * model_diff[i] / (std::sqrt(mean_square[i]) + epsilon);
model[i] -=
alpha * model_diff[i] / (std::sqrt(mean_square[i]) + epsilon);
}
});
}
......
......@@ -25,7 +25,7 @@ __global__ void UpdateModelGpu(const int64_t n, FloatingPointType* model,
const FloatingPointType epsilon,
const FloatingPointType alpha) {
CUDA_1D_KERNEL_LOOP(i, n) {
model[i] -= alpha * model_diff[i] / (std::sqrt(mean_square[i]) + epsilon);
model[i] -= alpha * model_diff[i] / (std::sqrt(mean_square[i]) + epsilon);
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册