diff --git a/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h index f27b66b03f544a763233c8f7c8456b341c0ecdd2..c53ae6d1184703ca06f155c17a853565f490bd3a 100644 --- a/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/gather_nd_op_plugin.h @@ -14,8 +14,6 @@ #pragma once -#include - #include #include #include diff --git a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu index 1cfc9fade7b152033d895e41f3056fae20939b35..0150564e5820616c21218a0699bb4b61009f63a7 100644 --- a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.cu @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include @@ -63,9 +64,7 @@ void SplitPlugin::shareData(const SplitPlugin* another) { inner_cols_ = another->inner_cols_; same_shape_ = another->same_shape_; axis_shape_ = another->axis_shape_; - d_segment_offsets_ = another->d_segment_offsets_; segment_offsets_ = another->segment_offsets_; - d_output_ptrs_.resize(another->d_output_ptrs_.size(), nullptr); } int SplitPlugin::initialize() TRT_NOEXCEPT { @@ -93,9 +92,7 @@ int SplitPlugin::initialize() TRT_NOEXCEPT { segment_offsets.push_back(segment_offsets.back() + output_length_[i]); } axis_shape_ = dims.d[axis_]; - d_segment_offsets_ = segment_offsets; segment_offsets_ = std::move(segment_offsets); - d_output_ptrs_.resize(this->getNbOutputs(), nullptr); return 0; } @@ -133,13 +130,18 @@ int SplitPlugin::enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) TRT_NOEXCEPT { #endif + // this two thrust variables decalared here , not with in .h + // to avoid compiling error in cuda 11.6 + thrust::device_vector d_segment_offsets = segment_offsets_; + thrust::device_vector d_output_ptrs; + d_output_ptrs.resize(segment_offsets_.size(), nullptr); const int* d_segment_offsets_ptr = - thrust::raw_pointer_cast(&d_segment_offsets_[0]); + thrust::raw_pointer_cast(&d_segment_offsets[0]); float const* input_ptr = reinterpret_cast(inputs[0]); float* const* h_odatas = reinterpret_cast(outputs); - float** output_ptrs = thrust::raw_pointer_cast(&d_output_ptrs_[0]); + float** output_ptrs = thrust::raw_pointer_cast(&d_output_ptrs[0]); PADDLE_ENFORCE_GPU_SUCCESS(cudaMemcpyAsync( - output_ptrs, h_odatas, d_output_ptrs_.size() * sizeof(float*), + output_ptrs, h_odatas, d_output_ptrs.size() * sizeof(float*), cudaMemcpyHostToDevice, stream)); int outer_rows = outer_rows_ * batchSize; @@ -150,7 +152,7 @@ int SplitPlugin::enqueue(int batchSize, const void* const* inputs, std::min((outer_rows_ - 1) / block.z + 1, 65535u)); split_kernel<<>>( - d_segment_offsets_.size(), d_segment_offsets_ptr, input_ptr, output_ptrs, + segment_offsets_.size(), d_segment_offsets_ptr, input_ptr, output_ptrs, inner_cols_, axis_shape_, outer_rows); return cudaGetLastError() != cudaSuccess; } diff --git a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h index 49f028493ee87f02d994768c061a72fed3796c36..93dc45215d4ee83e3f6c10605caa21a43b24704f 100644 --- a/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/split_op_plugin.h @@ -14,8 +14,6 @@ #pragma once -#include - #include #include #include @@ -94,8 +92,6 @@ class SplitPlugin : public PluginTensorRTV2Ext { bool same_shape_; std::vector output_length_; std::vector segment_offsets_; - thrust::device_vector d_segment_offsets_; - thrust::device_vector d_output_ptrs_; private: void shareData(const SplitPlugin* another);