diff --git a/paddle/fluid/operators/gru_op.h b/paddle/fluid/operators/gru_op.h index 53f844a6607bd2e98c53b53c23422f6b48e2ced6..3b0d93e54b72910de1429ddf41eb6b0fe9646942 100644 --- a/paddle/fluid/operators/gru_op.h +++ b/paddle/fluid/operators/gru_op.h @@ -34,7 +34,7 @@ inline void ReorderInitState(const DeviceContext& ctx, framework::Tensor* dst, bool indexed_src) { math::CopyMatrixRowsFunctor row_shuffle; dst->mutable_data(src.dims(), ctx.GetPlace()); - row_shuffle(ctx, src, index_lod, *dst, indexed_src); + row_shuffle(ctx, src, index_lod, dst, indexed_src); } template @@ -61,7 +61,7 @@ class GRUKernel : public framework::OpKernel { bool is_reverse = context.Attr("is_reverse"); math::LoDTensor2BatchFunctor to_batch; auto& dev_ctx = context.template device_context(); - to_batch(dev_ctx, *input, *batch_gate, true, is_reverse); + to_batch(dev_ctx, *input, batch_gate, true, is_reverse); if (bias) { math::RowwiseAdd add_bias; @@ -113,7 +113,7 @@ class GRUKernel : public framework::OpKernel { math::Batch2LoDTensorFunctor to_seq; batch_hidden->set_lod(batch_gate->lod()); - to_seq(dev_ctx, *batch_hidden, *hidden); + to_seq(dev_ctx, *batch_hidden, hidden); } void Compute(const framework::ExecutionContext& context) const override { @@ -174,7 +174,7 @@ class GRUGradKernel : public framework::OpKernel { bool is_reverse = context.Attr("is_reverse"); batch_hidden_grad.set_lod(batch_hidden->lod()); - to_batch(dev_ctx, *hidden_grad, batch_hidden_grad, false, is_reverse); + to_batch(dev_ctx, *hidden_grad, &batch_hidden_grad, false, is_reverse); math::GRUMetaValue gru_value; gru_value.gate_weight = const_cast(weight_data); @@ -236,7 +236,7 @@ class GRUGradKernel : public framework::OpKernel { input_grad->mutable_data(context.GetPlace()); math::Batch2LoDTensorFunctor to_seq; batch_gate_grad.set_lod(batch_gate->lod()); - to_seq(dev_ctx, batch_gate_grad, *input_grad); + to_seq(dev_ctx, batch_gate_grad, input_grad); } if (bias_grad) { bias_grad->mutable_data(context.GetPlace()); diff --git a/paddle/fluid/operators/lstm_op.h b/paddle/fluid/operators/lstm_op.h index a1ef0eb278dea7205cd8052bbe006b0ae4e3a466..0707aded8c9aa37d6be92373c274b59b7d6b34b6 100644 --- a/paddle/fluid/operators/lstm_op.h +++ b/paddle/fluid/operators/lstm_op.h @@ -33,7 +33,7 @@ inline void ReorderInitState(const DeviceContext& ctx, framework::Tensor* dst, bool indexed_src) { math::CopyMatrixRowsFunctor row_shuffle; dst->mutable_data(src.dims(), ctx.GetPlace()); - row_shuffle(ctx, src, index_lod, *dst, indexed_src); + row_shuffle(ctx, src, index_lod, dst, indexed_src); } template @@ -57,7 +57,7 @@ class LSTMKernel : public framework::OpKernel { bool is_reverse = ctx.Attr("is_reverse"); math::LoDTensor2BatchFunctor to_batch; auto& device_ctx = ctx.template device_context(); - to_batch(device_ctx, *input, *batch_gate, true, is_reverse); + to_batch(device_ctx, *input, batch_gate, true, is_reverse); auto in_dims = input->dims(); int frame_size = static_cast(in_dims[1] / 4); @@ -161,11 +161,11 @@ class LSTMKernel : public framework::OpKernel { math::Batch2LoDTensorFunctor to_seq; batch_hidden.set_lod(batch_gate->lod()); // restore the output hidden in LoDTensor from the batch hidden - to_seq(device_ctx, batch_hidden, *hidden_out); + to_seq(device_ctx, batch_hidden, hidden_out); batch_cell.set_lod(batch_gate->lod()); // restore the output cell state in LoDTensor from the batch cell - to_seq(device_ctx, batch_cell, *cell_out); + to_seq(device_ctx, batch_cell, cell_out); } }; @@ -257,7 +257,7 @@ class LSTMGradKernel : public framework::OpKernel { const framework::DDim& dims, framework::LoDTensor& dst) { dst.mutable_data(dims, ctx.GetPlace()); dst.set_lod(batch_gate->lod()); - to_batch(ctx, src, dst, false); + to_batch(ctx, src, &dst, false); }; LoDTensor batch_hidden, batch_hidden_g, batch_cell; @@ -351,7 +351,7 @@ class LSTMGradKernel : public framework::OpKernel { if (in_g) { /* backward data */ in_g->mutable_data(ctx.GetPlace()); - to_seq(device_ctx, batch_gate_g, *in_g); + to_seq(device_ctx, batch_gate_g, in_g); } if (bias && bias_g) { /* backward bias */ diff --git a/paddle/fluid/operators/lstmp_op.h b/paddle/fluid/operators/lstmp_op.h index 172db548960135fbc1841cf58b73894d4f74d838..628936a3105b95577bef080f05b0bd556b514918 100644 --- a/paddle/fluid/operators/lstmp_op.h +++ b/paddle/fluid/operators/lstmp_op.h @@ -40,7 +40,7 @@ inline void ReorderInitState(const DeviceContext& ctx, framework::Tensor* dst, bool indexed_src) { math::CopyMatrixRowsFunctor row_shuffle; dst->mutable_data(src.dims(), ctx.GetPlace()); - row_shuffle(ctx, src, index, *dst, indexed_src); + row_shuffle(ctx, src, index, dst, indexed_src); } template @@ -81,7 +81,7 @@ class LSTMPKernel : public framework::OpKernel { bool is_reverse = ctx.Attr("is_reverse"); math::LoDTensor2BatchFunctor to_batch; auto& device_ctx = ctx.template device_context(); - to_batch(device_ctx, *input, *batch_gate, true, is_reverse); + to_batch(device_ctx, *input, batch_gate, true, is_reverse); auto in_dims = input->dims(); int frame_size = static_cast(in_dims[1] / 4); @@ -208,11 +208,11 @@ class LSTMPKernel : public framework::OpKernel { math::Batch2LoDTensorFunctor to_seq; batch_proj.set_lod(batch_gate->lod()); // restore the output hidden in LoDTensor from the batch hidden - to_seq(device_ctx, batch_proj, *proj_out); + to_seq(device_ctx, batch_proj, proj_out); batch_cell.set_lod(batch_gate->lod()); // restore the output cell state in LoDTensor from the batch cell - to_seq(device_ctx, batch_cell, *cell_out); + to_seq(device_ctx, batch_cell, cell_out); } }; @@ -332,7 +332,7 @@ class LSTMPGradKernel : public framework::OpKernel { const framework::DDim& dims, framework::LoDTensor& dst) { dst.mutable_data(dims, ctx.GetPlace()); dst.set_lod(batch_gate->lod()); - to_batch(ctx, src, dst, false); + to_batch(ctx, src, &dst, false); }; LoDTensor batch_hidden_g, batch_proj, batch_proj_g, batch_cell; @@ -471,7 +471,7 @@ class LSTMPGradKernel : public framework::OpKernel { if (in_g) { /* backward data */ in_g->mutable_data(ctx.GetPlace()); - to_seq(device_ctx, batch_gate_g, *in_g); + to_seq(device_ctx, batch_gate_g, in_g); } if (bias && bias_g) { /* backward bias */ diff --git a/paddle/fluid/operators/math/concat_test.cc b/paddle/fluid/operators/math/concat_test.cc index 19d056fa54777eff2881a346da071ff95126173c..f0847aafae78f17eb28745bd224d45ec86497030 100644 --- a/paddle/fluid/operators/math/concat_test.cc +++ b/paddle/fluid/operators/math/concat_test.cc @@ -17,17 +17,14 @@ limitations under the License. */ #include #include "paddle/fluid/framework/tensor_util.h" -using namespace paddle::framework; -using namespace paddle::platform; - template void testConcat() { - Tensor input_a_cpu; - Tensor input_b_cpu; - Tensor out_cpu; - Tensor input_a; - Tensor input_b; - Tensor out; + paddle::framework::Tensor input_a_cpu; + paddle::framework::Tensor input_b_cpu; + paddle::framework::Tensor out_cpu; + paddle::framework::Tensor input_a; + paddle::framework::Tensor input_b; + paddle::framework::Tensor out; DeviceContext* context = new DeviceContext(Place()); // DeviceContext context(Place()); @@ -40,18 +37,18 @@ void testConcat() { * output: * out.shape: [5, 3, 4] */ - auto dim_a = make_ddim({2, 3, 4}); - auto dim_b = make_ddim({3, 3, 4}); - auto dim_out = make_ddim({5, 3, 4}); + auto dim_a = paddle::framework::make_ddim({2, 3, 4}); + auto dim_b = paddle::framework::make_ddim({3, 3, 4}); + auto dim_out = paddle::framework::make_ddim({5, 3, 4}); input_a.mutable_data(dim_a, Place()); input_b.mutable_data(dim_b, Place()); out.mutable_data(dim_out, Place()); if (paddle::platform::is_gpu_place(Place())) { - input_a_cpu.mutable_data(dim_a, CPUPlace()); - input_b_cpu.mutable_data(dim_b, CPUPlace()); - out_cpu.mutable_data(dim_out, CPUPlace()); + input_a_cpu.mutable_data(dim_a, paddle::platform::CPUPlace()); + input_b_cpu.mutable_data(dim_b, paddle::platform::CPUPlace()); + out_cpu.mutable_data(dim_out, paddle::platform::CPUPlace()); } int* a_ptr; @@ -72,11 +69,11 @@ void testConcat() { } if (paddle::platform::is_gpu_place(Place())) { - TensorCopySync(input_a_cpu, Place(), &input_a); - TensorCopySync(input_b_cpu, Place(), &input_b); + paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a); + paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b); } - std::vector input; + std::vector input; input.push_back(input_a); input.push_back(input_b); @@ -89,7 +86,8 @@ void testConcat() { int* out_ptr; if (paddle::platform::is_gpu_place(Place())) { - TensorCopySync(out, CPUPlace(), &out_cpu); + paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context, + &out_cpu); out_ptr = out_cpu.data(); } else { out_ptr = out.data(); @@ -115,9 +113,9 @@ void testConcat() { * output: * out.shape: [2, 7, 4] */ - dim_a = make_ddim({2, 3, 4}); - dim_b = make_ddim({2, 4, 4}); - dim_out = make_ddim({2, 7, 4}); + dim_a = paddle::framework::make_ddim({2, 3, 4}); + dim_b = paddle::framework::make_ddim({2, 4, 4}); + dim_out = paddle::framework::make_ddim({2, 7, 4}); input_a.Resize(dim_a); input_b.Resize(dim_b); @@ -144,8 +142,8 @@ void testConcat() { } if (paddle::platform::is_gpu_place(Place())) { - TensorCopySync(input_a_cpu, Place(), &input_a); - TensorCopySync(input_b_cpu, Place(), &input_b); + paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a); + paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b); } input.clear(); @@ -159,7 +157,8 @@ void testConcat() { PADDLE_ENFORCE_EQ(input_b.dims(), dim_b); if (paddle::platform::is_gpu_place(Place())) { - TensorCopySync(out, CPUPlace(), &out_cpu); + paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context, + &out_cpu); out_ptr = out_cpu.data(); } else { out_ptr = out.data(); @@ -187,9 +186,9 @@ void testConcat() { * output: * out.shape: [2, 3, 9] */ - dim_a = make_ddim({2, 3, 4}); - dim_b = make_ddim({2, 3, 5}); - dim_out = make_ddim({2, 3, 9}); + dim_a = paddle::framework::make_ddim({2, 3, 4}); + dim_b = paddle::framework::make_ddim({2, 3, 5}); + dim_out = paddle::framework::make_ddim({2, 3, 9}); input_a.Resize(dim_a); input_b.Resize(dim_b); @@ -216,8 +215,8 @@ void testConcat() { } if (paddle::platform::is_gpu_place(Place())) { - TensorCopySync(input_a_cpu, Place(), &input_a); - TensorCopySync(input_b_cpu, Place(), &input_b); + paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a); + paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b); } input.clear(); @@ -231,7 +230,8 @@ void testConcat() { PADDLE_ENFORCE_EQ(input_b.dims(), dim_b); if (paddle::platform::is_gpu_place(Place())) { - TensorCopySync(out, CPUPlace(), &out_cpu); + paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context, + &out_cpu); out_ptr = out_cpu.data(); } else { out_ptr = out.data(); @@ -261,9 +261,9 @@ void testConcat() { * output: * out.shape: [2, 6, 4] */ - dim_a = make_ddim({2, 3, 4}); - dim_b = make_ddim({2, 3, 4}); - dim_out = make_ddim({2, 6, 4}); + dim_a = paddle::framework::make_ddim({2, 3, 4}); + dim_b = paddle::framework::make_ddim({2, 3, 4}); + dim_out = paddle::framework::make_ddim({2, 6, 4}); input_a.Resize(dim_a); input_b.Resize(dim_b); @@ -290,8 +290,8 @@ void testConcat() { } if (paddle::platform::is_gpu_place(Place())) { - TensorCopySync(input_a_cpu, Place(), &input_a); - TensorCopySync(input_b_cpu, Place(), &input_b); + paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a); + paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b); } input.clear(); @@ -305,7 +305,8 @@ void testConcat() { PADDLE_ENFORCE_EQ(input_b.dims(), dim_b); if (paddle::platform::is_gpu_place(Place())) { - TensorCopySync(out, CPUPlace(), &out_cpu); + paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context, + &out_cpu); out_ptr = out_cpu.data(); } else { out_ptr = out.data(); diff --git a/paddle/fluid/operators/math/cross_entropy.cu b/paddle/fluid/operators/math/cross_entropy.cu index f4935c2813c9f84699f1182df6a9adb613190506..da73f575f375d8a792a82bf6cf4226bab673170d 100644 --- a/paddle/fluid/operators/math/cross_entropy.cu +++ b/paddle/fluid/operators/math/cross_entropy.cu @@ -108,7 +108,9 @@ class CrossEntropyFunctor { if (softLabel) { const T* label_data = labels->data(); - int block = class_num > 512 ? 512 : pow(2, int(std::log2(class_num))); + int block = class_num > 512 + ? 512 + : pow(2, static_cast(std::log2(class_num))); SoftCrossEntropyKernel<<< batch_size, block, block * sizeof(T), diff --git a/paddle/fluid/operators/math/detail/lstm_gpu_kernel.h b/paddle/fluid/operators/math/detail/lstm_gpu_kernel.h index ee7b16da4187e0f7f7839eff5c8753d2eb4f9c6d..0b1034a080f15270e24622b8aaeda7f546aa90e6 100644 --- a/paddle/fluid/operators/math/detail/lstm_gpu_kernel.h +++ b/paddle/fluid/operators/math/detail/lstm_gpu_kernel.h @@ -13,13 +13,13 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include + #include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/lstm_compute.h" #include "paddle/fluid/platform/cuda_helper.h" #include "paddle/fluid/platform/device_context.h" -#include - namespace paddle { namespace operators { namespace math { diff --git a/paddle/fluid/operators/math/sampler.h b/paddle/fluid/operators/math/sampler.h index 9d6a6c28c4304019d0347a30be605e1374c169ee..b82691f269c5d0f267ca98c78646efe9b26f0b34 100644 --- a/paddle/fluid/operators/math/sampler.h +++ b/paddle/fluid/operators/math/sampler.h @@ -13,9 +13,9 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include #include #include -typedef long int64; namespace paddle { namespace operators { namespace math { @@ -27,25 +27,25 @@ namespace math { */ class Sampler { public: - explicit Sampler(int64 range) : range_(range) { + explicit Sampler(int64_t range) : range_(range) { PADDLE_ENFORCE_GT(range, 0); std::random_device r; seed_ = r(); } - explicit Sampler(int64 range, unsigned int seed) + explicit Sampler(int64_t range, unsigned int seed) : range_(range), seed_(seed) { PADDLE_ENFORCE_GT(range, 0); } virtual ~Sampler(); // Sample a single value - virtual int64 Sample() const = 0; + virtual int64_t Sample() const = 0; // The probability that a single call to Sample() returns the given value. - virtual float Probability(int64 value) const = 0; + virtual float Probability(int64_t value) const = 0; - int64 range() { return range_; }; + int64 range() { return range_; } protected: - const int64 range_; + const int64_t range_; unsigned int seed_; }; @@ -56,15 +56,15 @@ class Sampler { */ class UniformSampler : public Sampler { public: - explicit UniformSampler(int64 range); + explicit UniformSampler(int64_t range); - explicit UniformSampler(int64 range, unsigned int seed); + explicit UniformSampler(int64_t range, unsigned int seed); ~UniformSampler() override {} int64 Sample() const override; - float Probability(int64 value) const override; + float Probability(int64_t value) const override; private: const float inv_range_; @@ -79,15 +79,15 @@ class UniformSampler : public Sampler { */ class LogUniformSampler : public Sampler { public: - explicit LogUniformSampler(int64 range); + explicit LogUniformSampler(int64_t range); - explicit LogUniformSampler(int64 range, unsigned int seed); + explicit LogUniformSampler(int64_t range, unsigned int seed); ~LogUniformSampler() override {} int64 Sample() const override; - float Probability(int64 value) const override; + float Probability(int64_t value) const override; private: const float log_range_; @@ -95,6 +95,6 @@ class LogUniformSampler : public Sampler { std::shared_ptr> dist_; }; -} // math +} // namespace math } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/math/selected_rows_functor.cc b/paddle/fluid/operators/math/selected_rows_functor.cc index 5da3d15277cff8b413a116819b17f50061632b5d..a830dc5250a6aea7e622da4046b512d0c7c5d6f9 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cc +++ b/paddle/fluid/operators/math/selected_rows_functor.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +#include #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" diff --git a/paddle/fluid/operators/math/selected_rows_functor.cu b/paddle/fluid/operators/math/selected_rows_functor.cu index 5d78fd9d213556204d56087128dc84fe6a91e97d..7b31ee8e389b94eeaa04ace52251a23933230d34 100644 --- a/paddle/fluid/operators/math/selected_rows_functor.cu +++ b/paddle/fluid/operators/math/selected_rows_functor.cu @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include +#include #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/selected_rows_functor.h" diff --git a/paddle/fluid/operators/math/sequence2batch.cc b/paddle/fluid/operators/math/sequence2batch.cc index 8899abff360ea867872d3433722cdb37ef358500..b546b8728217ed6013247555dcd5d7180ddeae74 100644 --- a/paddle/fluid/operators/math/sequence2batch.cc +++ b/paddle/fluid/operators/math/sequence2batch.cc @@ -23,11 +23,11 @@ class CopyMatrixRowsFunctor { public: void operator()(const platform::CPUDeviceContext& context, const framework::Tensor& src, - framework::Vector index_lod, framework::Tensor& dst, + framework::Vector index_lod, framework::Tensor* dst, bool is_src_index) { size_t* index = index_lod.data(); auto src_dims = src.dims(); - auto dst_dims = dst.dims(); + auto dst_dims = dst->dims(); PADDLE_ENFORCE_EQ(src_dims.size(), 2UL, "The src must be matrix with rank 2."); PADDLE_ENFORCE_EQ(dst_dims.size(), 2UL, @@ -37,7 +37,7 @@ class CopyMatrixRowsFunctor { auto height = dst_dims[0]; auto width = dst_dims[1]; auto* src_data = src.data(); - auto* dst_data = dst.data(); + auto* dst_data = dst->data(); for (int i = 0; i < height; ++i) { if (is_src_index) { memcpy(dst_data + i * width, src_data + index[i] * width, diff --git a/paddle/fluid/operators/math/sequence2batch.cu b/paddle/fluid/operators/math/sequence2batch.cu index 3185f10d4180437ab5a3f78df8583613edd9ed43..be73adfc0cbe37ed8831b5ad34e66bc95e342e9d 100644 --- a/paddle/fluid/operators/math/sequence2batch.cu +++ b/paddle/fluid/operators/math/sequence2batch.cu @@ -43,10 +43,10 @@ class CopyMatrixRowsFunctor { public: void operator()(const platform::CUDADeviceContext& context, const framework::Tensor& src, - framework::Vector index_lod, framework::Tensor& dst, + framework::Vector index_lod, framework::Tensor* dst, bool is_src_index) { auto src_dims = src.dims(); - auto dst_dims = dst.dims(); + auto dst_dims = dst->dims(); PADDLE_ENFORCE_EQ(src_dims.size(), 2, "The src must be matrix with rank 2."); PADDLE_ENFORCE_EQ(dst_dims.size(), 2, @@ -56,7 +56,7 @@ class CopyMatrixRowsFunctor { auto height = dst_dims[0]; auto width = dst_dims[1]; auto* src_data = src.data(); - auto* dst_data = dst.data(); + auto* dst_data = dst->data(); dim3 threads(128, 8); dim3 grid(8, 1); diff --git a/paddle/fluid/operators/math/sequence2batch.h b/paddle/fluid/operators/math/sequence2batch.h index e78aafd37d1dda91a035f3ed850537e80f188cb2..0abda999a52bcbb94e6503692bd11aff26e849ba 100644 --- a/paddle/fluid/operators/math/sequence2batch.h +++ b/paddle/fluid/operators/math/sequence2batch.h @@ -13,6 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once +#include +#include #include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/tensor.h" @@ -35,7 +37,7 @@ class CopyMatrixRowsFunctor { // copy the input src to the indexed rows of output dst. // The indexed rows are based on the input index. void operator()(const DeviceContext& context, const framework::Tensor& src, - framework::Vector index_lod, framework::Tensor& dst, + framework::Vector index_lod, framework::Tensor* dst, bool is_src_index); }; @@ -58,10 +60,10 @@ class LoDTensor2BatchFunctor { public: void operator()(const DeviceContext& context, const framework::LoDTensor& lod_tensor, - framework::LoDTensor& batch, bool is_cal_batch_lod, + framework::LoDTensor* batch, bool is_cal_batch_lod, bool is_reverse = false) const { if (!is_cal_batch_lod) { - auto lods = batch.lod(); + auto lods = batch->lod(); PADDLE_ENFORCE_GT(lods.size(), 2UL); PADDLE_ENFORCE_EQ(lods[1].size(), static_cast(lod_tensor.dims()[0])); @@ -141,7 +143,7 @@ class LoDTensor2BatchFunctor { for (size_t i = 0; i < seq_info.size(); ++i) { seq_order[i] = seq_info[i].seq_idx; } - batch.set_lod(batch_lods); + batch->set_lod(batch_lods); CopyMatrixRowsFunctor to_batch; to_batch(context, lod_tensor, batch_lods[1], batch, true); @@ -153,11 +155,11 @@ class Batch2LoDTensorFunctor { public: void operator()(const DeviceContext& context, const framework::LoDTensor& batch, - framework::LoDTensor& lod_tensor) const { + framework::LoDTensor* lod_tensor) const { auto in_lod = batch.lod(); PADDLE_ENFORCE_GT(in_lod.size(), 2UL); PADDLE_ENFORCE_EQ(in_lod[1].size(), - static_cast(lod_tensor.dims()[0])); + static_cast(lod_tensor->dims()[0])); CopyMatrixRowsFunctor to_seq; to_seq(context, batch, in_lod[1], lod_tensor, false); } diff --git a/paddle/fluid/operators/math/sequence_scale.cc b/paddle/fluid/operators/math/sequence_scale.cc index 2c46d4183b5ccd6db909e4142797f97a626c43d5..ee5b22ca855b4fa26e9626aadb84fa9b93b72952 100644 --- a/paddle/fluid/operators/math/sequence_scale.cc +++ b/paddle/fluid/operators/math/sequence_scale.cc @@ -21,15 +21,15 @@ namespace math { template class ScaleLoDTensorFunctor { public: - void operator()(const platform::CPUDeviceContext& context, - framework::LoDTensor& seq, const T* scales) { + void operator()(const platform::CPUDeviceContext& context, const T* scales, + framework::LoDTensor* seq) { const size_t level = 0; - auto lod = seq.lod(); + auto lod = seq->lod(); const size_t num_seq = lod[level].size() - 1; - size_t seq_width = seq.dims()[1]; + size_t seq_width = seq->dims()[1]; framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); - T* seq_data = seq.mutable_data(context.GetPlace()); + T* seq_data = seq->mutable_data(context.GetPlace()); for (size_t i = 0; i < num_seq; ++i) { for (size_t j = lod[level][i] * seq_width; j < lod[level][i + 1] * seq_width; ++j) { diff --git a/paddle/fluid/operators/math/sequence_scale.cu b/paddle/fluid/operators/math/sequence_scale.cu index 74085153c62354771f6126b58746229b5564f2d0..430bf13c3f8d627f2b4cc24b005f2be5a66cefac 100644 --- a/paddle/fluid/operators/math/sequence_scale.cu +++ b/paddle/fluid/operators/math/sequence_scale.cu @@ -35,14 +35,14 @@ __global__ void SequenceScaleKernel(T* seq, size_t* lod, const T* scales, template class ScaleLoDTensorFunctor { public: - void operator()(const platform::CUDADeviceContext& context, - framework::LoDTensor& seq, const T* scales) { + void operator()(const platform::CUDADeviceContext& context, const T* scales, + framework::LoDTensor* seq) { const size_t level = 0; - auto lod = seq.lod(); + auto lod = seq->lod(); const size_t num_seq = lod[level].size() - 1; - const size_t seq_width = seq.numel() / seq.dims()[0]; + const size_t seq_width = seq->numel() / seq->dims()[0]; framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); - T* seq_data = seq.mutable_data(context.GetPlace()); + T* seq_data = seq->mutable_data(context.GetPlace()); SequenceScaleKernel<<< num_seq, PADDLE_CUDA_NUM_THREADS, 0, context.stream()>>>( diff --git a/paddle/fluid/operators/math/sequence_scale.h b/paddle/fluid/operators/math/sequence_scale.h index 6cdcbe21cbf82881679d90de470f342f75b3e2f3..202243985c125cd518a27477eb370bf1a325fe16 100644 --- a/paddle/fluid/operators/math/sequence_scale.h +++ b/paddle/fluid/operators/math/sequence_scale.h @@ -46,8 +46,8 @@ namespace math { template class ScaleLoDTensorFunctor { public: - void operator()(const DeviceContext& context, framework::LoDTensor& seq, - const T* scales); + void operator()(const DeviceContext& context, const T* scales, + framework::LoDTensor* seq); }; } // namespace math diff --git a/paddle/fluid/operators/nccl/nccl_gpu_common.h b/paddle/fluid/operators/nccl/nccl_gpu_common.h index 113f93e346681e568524f9fb6a0ab9a56de8569e..558ff4cc09603eebbcd95a234ff1aa63ada7fbb2 100644 --- a/paddle/fluid/operators/nccl/nccl_gpu_common.h +++ b/paddle/fluid/operators/nccl/nccl_gpu_common.h @@ -15,9 +15,9 @@ limitations under the License. */ #pragma once #include -#include +#include // NOLINT #include -#include +#include // NOLINT #include #include #include diff --git a/paddle/fluid/operators/warpctc_op.h b/paddle/fluid/operators/warpctc_op.h index afbfe69973830bde93ec0af8d1c844580a786663..85131d002595f7681e4bec4135e28fe49cf842fb 100644 --- a/paddle/fluid/operators/warpctc_op.h +++ b/paddle/fluid/operators/warpctc_op.h @@ -222,8 +222,8 @@ class WarpCTCGradKernel : public framework::OpKernel { const T* loss_grad_data = loss_grad->data(); math::ScaleLoDTensorFunctor()( - ctx.template device_context(), *logits_grad, - loss_grad_data); + ctx.template device_context(), loss_grad_data, + logits_grad); } };