未验证 提交 8ffcf596 编写于 作者: F From00 提交者: GitHub

Fix Jetson compilation error in pooling (#40586)

上级 00183a93
...@@ -392,7 +392,7 @@ void Pool2dDirectCUDAFunctor<PoolProcess, T>::operator()( ...@@ -392,7 +392,7 @@ void Pool2dDirectCUDAFunctor<PoolProcess, T>::operator()(
int nthreads = batch_size * output_channels * output_height * output_width; int nthreads = batch_size * output_channels * output_height * output_width;
int thread_num = 1024; int thread_num = 1024;
#ifdef WITH_NV_JETSON #ifdef WITH_NV_JETSON
// paddle::platform::ChangeThreadNum(context, &thread_num); // backends::gpu::ChangeThreadNum(context, &thread_num);
thread_num = 512; thread_num = 512;
#endif #endif
int blocks = (nthreads + thread_num - 1) / thread_num; int blocks = (nthreads + thread_num - 1) / thread_num;
...@@ -460,7 +460,7 @@ class Pool2dFunctor<phi::GPUContext, PoolProcess, T> { ...@@ -460,7 +460,7 @@ class Pool2dFunctor<phi::GPUContext, PoolProcess, T> {
int nthreads = batch_size * output_channels * output_height * output_width; int nthreads = batch_size * output_channels * output_height * output_width;
int thread_num = 1024; int thread_num = 1024;
#ifdef WITH_NV_JETSON #ifdef WITH_NV_JETSON
paddle::platform::ChangeThreadNum(context, &thread_num); backends::gpu::ChangeThreadNum(context, &thread_num);
#endif #endif
int blocks = (nthreads + thread_num - 1) / thread_num; int blocks = (nthreads + thread_num - 1) / thread_num;
dim3 threads(thread_num, 1); dim3 threads(thread_num, 1);
...@@ -527,7 +527,7 @@ class Pool2dFunctor<phi::GPUContext, PoolProcess, T> { ...@@ -527,7 +527,7 @@ class Pool2dFunctor<phi::GPUContext, PoolProcess, T> {
int nthreads = batch_size * output_channels * output_height * output_width; int nthreads = batch_size * output_channels * output_height * output_width;
int thread_num = 1024; int thread_num = 1024;
#ifdef WITH_NV_JETSON #ifdef WITH_NV_JETSON
paddle::platform::ChangeThreadNum(context, &thread_num); backends::gpu::ChangeThreadNum(context, &thread_num);
#endif #endif
int blocks = (nthreads + thread_num - 1) / thread_num; int blocks = (nthreads + thread_num - 1) / thread_num;
dim3 threads(thread_num, 1); dim3 threads(thread_num, 1);
...@@ -1293,7 +1293,7 @@ class Pool3dFunctor<phi::GPUContext, PoolProcess, T> { ...@@ -1293,7 +1293,7 @@ class Pool3dFunctor<phi::GPUContext, PoolProcess, T> {
output_width; output_width;
int thread_num = 1024; int thread_num = 1024;
#ifdef WITH_NV_JETSON #ifdef WITH_NV_JETSON
paddle::platform::ChangeThreadNum(context, &thread_num); backends::gpu::ChangeThreadNum(context, &thread_num);
#endif #endif
int blocks = (nthreads + thread_num - 1) / thread_num; int blocks = (nthreads + thread_num - 1) / thread_num;
dim3 threads(thread_num, 1); dim3 threads(thread_num, 1);
...@@ -1369,7 +1369,7 @@ class Pool3dFunctor<phi::GPUContext, PoolProcess, T> { ...@@ -1369,7 +1369,7 @@ class Pool3dFunctor<phi::GPUContext, PoolProcess, T> {
output_width; output_width;
int thread_num = 1024; int thread_num = 1024;
#ifdef WITH_NV_JETSON #ifdef WITH_NV_JETSON
paddle::platform::ChangeThreadNum(context, &thread_num); backends::gpu::ChangeThreadNum(context, &thread_num);
#endif #endif
int blocks = (nthreads + thread_num - 1) / thread_num; int blocks = (nthreads + thread_num - 1) / thread_num;
dim3 threads(thread_num, 1); dim3 threads(thread_num, 1);
...@@ -1906,7 +1906,7 @@ class MaxPool2dWithIndexFunctor<phi::GPUContext, T1, T2> { ...@@ -1906,7 +1906,7 @@ class MaxPool2dWithIndexFunctor<phi::GPUContext, T1, T2> {
int nthreads = batch_size * output_channels * output_height * output_width; int nthreads = batch_size * output_channels * output_height * output_width;
int thread_num = 1024; int thread_num = 1024;
#ifdef WITH_NV_JETSON #ifdef WITH_NV_JETSON
paddle::platform::ChangeThreadNum(context, &thread_num); backends::gpu::ChangeThreadNum(context, &thread_num);
#endif #endif
int blocks = (nthreads + thread_num - 1) / thread_num; int blocks = (nthreads + thread_num - 1) / thread_num;
...@@ -2205,7 +2205,7 @@ class MaxPool3dWithIndexFunctor<phi::GPUContext, T1, T2> { ...@@ -2205,7 +2205,7 @@ class MaxPool3dWithIndexFunctor<phi::GPUContext, T1, T2> {
output_width; output_width;
int thread_num = 1024; int thread_num = 1024;
#ifdef WITH_NV_JETSON #ifdef WITH_NV_JETSON
paddle::platform::ChangeThreadNum(context, &thread_num); backends::gpu::ChangeThreadNum(context, &thread_num);
#endif #endif
int blocks = (nthreads + thread_num - 1) / thread_num; int blocks = (nthreads + thread_num - 1) / thread_num;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册