diff --git a/paddle/fluid/operators/sequence_expand_op.cu b/paddle/fluid/operators/sequence_expand_op.cu index 1bd73426522bcac608e54f979a5c049b2d2fa62b..8a35bc908e8d73f9bdc5f0fbf268fb40e5d228ce 100644 --- a/paddle/fluid/operators/sequence_expand_op.cu +++ b/paddle/fluid/operators/sequence_expand_op.cu @@ -13,7 +13,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #define EIGEN_USE_GPU -#include #include #include "paddle/fluid/operators/sequence_expand_op.h" #include "paddle/fluid/platform/cuda_helper.h" @@ -109,12 +108,10 @@ struct SequenceExpandFunctor { const framework::Vector& x_lod, /*expand source lod*/ const framework::Vector& ref_lod, /*expand referenced lod*/ LoDTensor* out) { - int x_item_length = 1; - x_item_length = x.numel() / x.dims()[0]; - VLOG(0) << "x_item_length" << x_item_length; - int thread_x = std::max(static_cast(ref_lod.size()), 32); - int thread_y = std::max(1024 / thread_x, 16); - int thread_z = std::min(1024 / thread_x / thread_y, 16); + int x_item_length = x.numel() / x.dims()[0]; + int thread_x = std::min(32, std::max(static_cast(ref_lod.size()), 16)); + int thread_y = 16; + int thread_z = 1024 / thread_x / thread_y; int block_x = static_cast(ref_lod.size()); dim3 block_size(thread_x, thread_y, thread_z); dim3 grid_size(block_x, 1); @@ -133,12 +130,10 @@ struct SequenceExpandGradFunctor { const framework::Vector& x_lod, /*expand source lod*/ const framework::Vector& ref_lod, /*expand based lod*/ LoDTensor* dx) { - int x_item_length = 1; - x_item_length = framework::product(dx->dims()) / dx->dims()[0]; - - int thread_x = std::max(static_cast(ref_lod.size()), 32); - int thread_y = std::max(1024 / thread_x, 16); - int thread_z = std::min(1024 / thread_x / thread_y, 16); + int x_item_length = framework::product(dx->dims()) / dx->dims()[0]; + int thread_x = std::min(32, std::max(static_cast(ref_lod.size()), 16)); + int thread_y = 16; + int thread_z = 1024 / thread_x / thread_y; int block_x = static_cast(ref_lod.size()); dim3 block_size(thread_x, thread_y, thread_z); dim3 grid_size(block_x, 1); diff --git a/paddle/fluid/operators/sequence_expand_op.h b/paddle/fluid/operators/sequence_expand_op.h index c55c3e215abdf91e3fe9b1bdb23823747865839a..d62c387c3eebf9df0ab532f4e891da006f239468 100644 --- a/paddle/fluid/operators/sequence_expand_op.h +++ b/paddle/fluid/operators/sequence_expand_op.h @@ -15,8 +15,6 @@ limitations under the License. */ #pragma once #include // std::iota -#include -#include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/memory/memcpy.h" #include "paddle/fluid/operators/math/math_function.h"