未验证 提交 67093da3 编写于 作者: W wopeizl 提交者: GitHub

Merge pull request #15122 from wopeizl/windows/fixhuberloss

fix the huber loss compile issue on windows test=develop
...@@ -53,7 +53,7 @@ if (WITH_GPU) ...@@ -53,7 +53,7 @@ if (WITH_GPU)
op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale) op_library(warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale)
endif() endif()
# conv_fusion_op needs cudnn 7 above # conv_fusion_op needs cudnn 7 above
if (NOT ${CUDNN_MAJOR_VERSION} VERSION_LESS 7) if (NOT ${CUDNN_VERSION} VERSION_LESS 7100)
op_library(conv_fusion_op) op_library(conv_fusion_op)
file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_fusion);\n") file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(conv2d_fusion);\n")
endif() endif()
......
...@@ -148,7 +148,7 @@ class DensityPriorBoxOpCUDAKernel : public framework::OpKernel<T> { ...@@ -148,7 +148,7 @@ class DensityPriorBoxOpCUDAKernel : public framework::OpKernel<T> {
// blockx is multiple of 32. // blockx is multiple of 32.
int blockx = std::min( int blockx = std::min(
static_cast<int64_t>(((feature_width * num_priors + 31) >> 5) << 5), static_cast<int64_t>(((feature_width * num_priors + 31) >> 5) << 5),
512L); static_cast<int64_t>(512L));
int gridx = (feature_width * num_priors + blockx - 1) / blockx; int gridx = (feature_width * num_priors + blockx - 1) / blockx;
dim3 threads(blockx, 1); dim3 threads(blockx, 1);
dim3 grids(gridx, feature_height); dim3 grids(gridx, feature_height);
......
...@@ -105,14 +105,16 @@ class HuberLossGradKernel : public framework::OpKernel<T> { ...@@ -105,14 +105,16 @@ class HuberLossGradKernel : public framework::OpKernel<T> {
out0->mutable_data<T>(context.GetPlace()); out0->mutable_data<T>(context.GetPlace());
auto x_grad = EigenVector<T>::Flatten(*out0); auto x_grad = EigenVector<T>::Flatten(*out0);
x_grad.device(place) = x_grad.device(place) =
out_grad * residual.unaryExpr(HuberLossBackward<T>(delta, -1.0)); residual.unaryExpr(HuberLossBackward<T>(delta, -1.0));
x_grad.device(place) = out_grad * x_grad;
} }
if (out1) { if (out1) {
out1->mutable_data<T>(context.GetPlace()); out1->mutable_data<T>(context.GetPlace());
auto y_grad = EigenVector<T>::Flatten(*out1); auto y_grad = EigenVector<T>::Flatten(*out1);
y_grad.device(place) = y_grad.device(place) =
out_grad * residual.unaryExpr(HuberLossBackward<T>(delta, 1.0)); residual.unaryExpr(HuberLossBackward<T>(delta, 1.0));
y_grad.device(place) = out_grad * y_grad;
} }
} }
}; };
......
...@@ -25,7 +25,7 @@ endif(NOT WITH_DISTRIBUTE) ...@@ -25,7 +25,7 @@ endif(NOT WITH_DISTRIBUTE)
if (NOT ${WITH_GPU}) if (NOT ${WITH_GPU})
LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op)
elseif(${CUDNN_MAJOR_VERSION} VERSION_LESS 7) elseif(${CUDNN_VERSION} VERSION_LESS 7100)
LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op) LIST(REMOVE_ITEM TEST_OPS test_conv2d_fusion_op)
endif() endif()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册