未验证 提交 e7353596 编写于 作者: A Abhinav Arora 提交者: GitHub

Fix more CPPlint issues in fluid/operators/math (#10249)

* Fix CPPLint errors

* Fix CPPLint errors in sequence2batch

* Fix compilation

* Fix LSTM op and GRU op

* Fix LSTMP op

* Fix more cpplint errors in operators/math

* Address Code review feedback
上级 6e0b47b3
...@@ -34,7 +34,7 @@ inline void ReorderInitState(const DeviceContext& ctx, ...@@ -34,7 +34,7 @@ inline void ReorderInitState(const DeviceContext& ctx,
framework::Tensor* dst, bool indexed_src) { framework::Tensor* dst, bool indexed_src) {
math::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle; math::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
dst->mutable_data<T>(src.dims(), ctx.GetPlace()); dst->mutable_data<T>(src.dims(), ctx.GetPlace());
row_shuffle(ctx, src, index_lod, *dst, indexed_src); row_shuffle(ctx, src, index_lod, dst, indexed_src);
} }
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
...@@ -61,7 +61,7 @@ class GRUKernel : public framework::OpKernel<T> { ...@@ -61,7 +61,7 @@ class GRUKernel : public framework::OpKernel<T> {
bool is_reverse = context.Attr<bool>("is_reverse"); bool is_reverse = context.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch; math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& dev_ctx = context.template device_context<DeviceContext>(); auto& dev_ctx = context.template device_context<DeviceContext>();
to_batch(dev_ctx, *input, *batch_gate, true, is_reverse); to_batch(dev_ctx, *input, batch_gate, true, is_reverse);
if (bias) { if (bias) {
math::RowwiseAdd<DeviceContext, T> add_bias; math::RowwiseAdd<DeviceContext, T> add_bias;
...@@ -113,7 +113,7 @@ class GRUKernel : public framework::OpKernel<T> { ...@@ -113,7 +113,7 @@ class GRUKernel : public framework::OpKernel<T> {
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq; math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_hidden->set_lod(batch_gate->lod()); batch_hidden->set_lod(batch_gate->lod());
to_seq(dev_ctx, *batch_hidden, *hidden); to_seq(dev_ctx, *batch_hidden, hidden);
} }
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
...@@ -174,7 +174,7 @@ class GRUGradKernel : public framework::OpKernel<T> { ...@@ -174,7 +174,7 @@ class GRUGradKernel : public framework::OpKernel<T> {
bool is_reverse = context.Attr<bool>("is_reverse"); bool is_reverse = context.Attr<bool>("is_reverse");
batch_hidden_grad.set_lod(batch_hidden->lod()); batch_hidden_grad.set_lod(batch_hidden->lod());
to_batch(dev_ctx, *hidden_grad, batch_hidden_grad, false, is_reverse); to_batch(dev_ctx, *hidden_grad, &batch_hidden_grad, false, is_reverse);
math::GRUMetaValue<T> gru_value; math::GRUMetaValue<T> gru_value;
gru_value.gate_weight = const_cast<T*>(weight_data); gru_value.gate_weight = const_cast<T*>(weight_data);
...@@ -236,7 +236,7 @@ class GRUGradKernel : public framework::OpKernel<T> { ...@@ -236,7 +236,7 @@ class GRUGradKernel : public framework::OpKernel<T> {
input_grad->mutable_data<T>(context.GetPlace()); input_grad->mutable_data<T>(context.GetPlace());
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq; math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_gate_grad.set_lod(batch_gate->lod()); batch_gate_grad.set_lod(batch_gate->lod());
to_seq(dev_ctx, batch_gate_grad, *input_grad); to_seq(dev_ctx, batch_gate_grad, input_grad);
} }
if (bias_grad) { if (bias_grad) {
bias_grad->mutable_data<T>(context.GetPlace()); bias_grad->mutable_data<T>(context.GetPlace());
......
...@@ -33,7 +33,7 @@ inline void ReorderInitState(const DeviceContext& ctx, ...@@ -33,7 +33,7 @@ inline void ReorderInitState(const DeviceContext& ctx,
framework::Tensor* dst, bool indexed_src) { framework::Tensor* dst, bool indexed_src) {
math::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle; math::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
dst->mutable_data<T>(src.dims(), ctx.GetPlace()); dst->mutable_data<T>(src.dims(), ctx.GetPlace());
row_shuffle(ctx, src, index_lod, *dst, indexed_src); row_shuffle(ctx, src, index_lod, dst, indexed_src);
} }
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
...@@ -57,7 +57,7 @@ class LSTMKernel : public framework::OpKernel<T> { ...@@ -57,7 +57,7 @@ class LSTMKernel : public framework::OpKernel<T> {
bool is_reverse = ctx.Attr<bool>("is_reverse"); bool is_reverse = ctx.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch; math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& device_ctx = ctx.template device_context<DeviceContext>(); auto& device_ctx = ctx.template device_context<DeviceContext>();
to_batch(device_ctx, *input, *batch_gate, true, is_reverse); to_batch(device_ctx, *input, batch_gate, true, is_reverse);
auto in_dims = input->dims(); auto in_dims = input->dims();
int frame_size = static_cast<int>(in_dims[1] / 4); int frame_size = static_cast<int>(in_dims[1] / 4);
...@@ -161,11 +161,11 @@ class LSTMKernel : public framework::OpKernel<T> { ...@@ -161,11 +161,11 @@ class LSTMKernel : public framework::OpKernel<T> {
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq; math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_hidden.set_lod(batch_gate->lod()); batch_hidden.set_lod(batch_gate->lod());
// restore the output hidden in LoDTensor from the batch hidden // restore the output hidden in LoDTensor from the batch hidden
to_seq(device_ctx, batch_hidden, *hidden_out); to_seq(device_ctx, batch_hidden, hidden_out);
batch_cell.set_lod(batch_gate->lod()); batch_cell.set_lod(batch_gate->lod());
// restore the output cell state in LoDTensor from the batch cell // restore the output cell state in LoDTensor from the batch cell
to_seq(device_ctx, batch_cell, *cell_out); to_seq(device_ctx, batch_cell, cell_out);
} }
}; };
...@@ -257,7 +257,7 @@ class LSTMGradKernel : public framework::OpKernel<T> { ...@@ -257,7 +257,7 @@ class LSTMGradKernel : public framework::OpKernel<T> {
const framework::DDim& dims, framework::LoDTensor& dst) { const framework::DDim& dims, framework::LoDTensor& dst) {
dst.mutable_data<T>(dims, ctx.GetPlace()); dst.mutable_data<T>(dims, ctx.GetPlace());
dst.set_lod(batch_gate->lod()); dst.set_lod(batch_gate->lod());
to_batch(ctx, src, dst, false); to_batch(ctx, src, &dst, false);
}; };
LoDTensor batch_hidden, batch_hidden_g, batch_cell; LoDTensor batch_hidden, batch_hidden_g, batch_cell;
...@@ -351,7 +351,7 @@ class LSTMGradKernel : public framework::OpKernel<T> { ...@@ -351,7 +351,7 @@ class LSTMGradKernel : public framework::OpKernel<T> {
if (in_g) { if (in_g) {
/* backward data */ /* backward data */
in_g->mutable_data<T>(ctx.GetPlace()); in_g->mutable_data<T>(ctx.GetPlace());
to_seq(device_ctx, batch_gate_g, *in_g); to_seq(device_ctx, batch_gate_g, in_g);
} }
if (bias && bias_g) { if (bias && bias_g) {
/* backward bias */ /* backward bias */
......
...@@ -40,7 +40,7 @@ inline void ReorderInitState(const DeviceContext& ctx, ...@@ -40,7 +40,7 @@ inline void ReorderInitState(const DeviceContext& ctx,
framework::Tensor* dst, bool indexed_src) { framework::Tensor* dst, bool indexed_src) {
math::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle; math::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
dst->mutable_data<T>(src.dims(), ctx.GetPlace()); dst->mutable_data<T>(src.dims(), ctx.GetPlace());
row_shuffle(ctx, src, index, *dst, indexed_src); row_shuffle(ctx, src, index, dst, indexed_src);
} }
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
...@@ -81,7 +81,7 @@ class LSTMPKernel : public framework::OpKernel<T> { ...@@ -81,7 +81,7 @@ class LSTMPKernel : public framework::OpKernel<T> {
bool is_reverse = ctx.Attr<bool>("is_reverse"); bool is_reverse = ctx.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch; math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& device_ctx = ctx.template device_context<DeviceContext>(); auto& device_ctx = ctx.template device_context<DeviceContext>();
to_batch(device_ctx, *input, *batch_gate, true, is_reverse); to_batch(device_ctx, *input, batch_gate, true, is_reverse);
auto in_dims = input->dims(); auto in_dims = input->dims();
int frame_size = static_cast<int>(in_dims[1] / 4); int frame_size = static_cast<int>(in_dims[1] / 4);
...@@ -208,11 +208,11 @@ class LSTMPKernel : public framework::OpKernel<T> { ...@@ -208,11 +208,11 @@ class LSTMPKernel : public framework::OpKernel<T> {
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq; math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_proj.set_lod(batch_gate->lod()); batch_proj.set_lod(batch_gate->lod());
// restore the output hidden in LoDTensor from the batch hidden // restore the output hidden in LoDTensor from the batch hidden
to_seq(device_ctx, batch_proj, *proj_out); to_seq(device_ctx, batch_proj, proj_out);
batch_cell.set_lod(batch_gate->lod()); batch_cell.set_lod(batch_gate->lod());
// restore the output cell state in LoDTensor from the batch cell // restore the output cell state in LoDTensor from the batch cell
to_seq(device_ctx, batch_cell, *cell_out); to_seq(device_ctx, batch_cell, cell_out);
} }
}; };
...@@ -332,7 +332,7 @@ class LSTMPGradKernel : public framework::OpKernel<T> { ...@@ -332,7 +332,7 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
const framework::DDim& dims, framework::LoDTensor& dst) { const framework::DDim& dims, framework::LoDTensor& dst) {
dst.mutable_data<T>(dims, ctx.GetPlace()); dst.mutable_data<T>(dims, ctx.GetPlace());
dst.set_lod(batch_gate->lod()); dst.set_lod(batch_gate->lod());
to_batch(ctx, src, dst, false); to_batch(ctx, src, &dst, false);
}; };
LoDTensor batch_hidden_g, batch_proj, batch_proj_g, batch_cell; LoDTensor batch_hidden_g, batch_proj, batch_proj_g, batch_cell;
...@@ -471,7 +471,7 @@ class LSTMPGradKernel : public framework::OpKernel<T> { ...@@ -471,7 +471,7 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
if (in_g) { if (in_g) {
/* backward data */ /* backward data */
in_g->mutable_data<T>(ctx.GetPlace()); in_g->mutable_data<T>(ctx.GetPlace());
to_seq(device_ctx, batch_gate_g, *in_g); to_seq(device_ctx, batch_gate_g, in_g);
} }
if (bias && bias_g) { if (bias && bias_g) {
/* backward bias */ /* backward bias */
......
...@@ -17,17 +17,14 @@ limitations under the License. */ ...@@ -17,17 +17,14 @@ limitations under the License. */
#include <vector> #include <vector>
#include "paddle/fluid/framework/tensor_util.h" #include "paddle/fluid/framework/tensor_util.h"
using namespace paddle::framework;
using namespace paddle::platform;
template <typename DeviceContext, typename Place> template <typename DeviceContext, typename Place>
void testConcat() { void testConcat() {
Tensor input_a_cpu; paddle::framework::Tensor input_a_cpu;
Tensor input_b_cpu; paddle::framework::Tensor input_b_cpu;
Tensor out_cpu; paddle::framework::Tensor out_cpu;
Tensor input_a; paddle::framework::Tensor input_a;
Tensor input_b; paddle::framework::Tensor input_b;
Tensor out; paddle::framework::Tensor out;
DeviceContext* context = new DeviceContext(Place()); DeviceContext* context = new DeviceContext(Place());
// DeviceContext context(Place()); // DeviceContext context(Place());
...@@ -40,18 +37,18 @@ void testConcat() { ...@@ -40,18 +37,18 @@ void testConcat() {
* output: * output:
* out.shape: [5, 3, 4] * out.shape: [5, 3, 4]
*/ */
auto dim_a = make_ddim({2, 3, 4}); auto dim_a = paddle::framework::make_ddim({2, 3, 4});
auto dim_b = make_ddim({3, 3, 4}); auto dim_b = paddle::framework::make_ddim({3, 3, 4});
auto dim_out = make_ddim({5, 3, 4}); auto dim_out = paddle::framework::make_ddim({5, 3, 4});
input_a.mutable_data<int>(dim_a, Place()); input_a.mutable_data<int>(dim_a, Place());
input_b.mutable_data<int>(dim_b, Place()); input_b.mutable_data<int>(dim_b, Place());
out.mutable_data<int>(dim_out, Place()); out.mutable_data<int>(dim_out, Place());
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
input_a_cpu.mutable_data<int>(dim_a, CPUPlace()); input_a_cpu.mutable_data<int>(dim_a, paddle::platform::CPUPlace());
input_b_cpu.mutable_data<int>(dim_b, CPUPlace()); input_b_cpu.mutable_data<int>(dim_b, paddle::platform::CPUPlace());
out_cpu.mutable_data<int>(dim_out, CPUPlace()); out_cpu.mutable_data<int>(dim_out, paddle::platform::CPUPlace());
} }
int* a_ptr; int* a_ptr;
...@@ -72,11 +69,11 @@ void testConcat() { ...@@ -72,11 +69,11 @@ void testConcat() {
} }
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(input_a_cpu, Place(), &input_a); paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
TensorCopySync(input_b_cpu, Place(), &input_b); paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
} }
std::vector<Tensor> input; std::vector<paddle::framework::Tensor> input;
input.push_back(input_a); input.push_back(input_a);
input.push_back(input_b); input.push_back(input_b);
...@@ -89,7 +86,8 @@ void testConcat() { ...@@ -89,7 +86,8 @@ void testConcat() {
int* out_ptr; int* out_ptr;
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(out, CPUPlace(), &out_cpu); paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
&out_cpu);
out_ptr = out_cpu.data<int>(); out_ptr = out_cpu.data<int>();
} else { } else {
out_ptr = out.data<int>(); out_ptr = out.data<int>();
...@@ -115,9 +113,9 @@ void testConcat() { ...@@ -115,9 +113,9 @@ void testConcat() {
* output: * output:
* out.shape: [2, 7, 4] * out.shape: [2, 7, 4]
*/ */
dim_a = make_ddim({2, 3, 4}); dim_a = paddle::framework::make_ddim({2, 3, 4});
dim_b = make_ddim({2, 4, 4}); dim_b = paddle::framework::make_ddim({2, 4, 4});
dim_out = make_ddim({2, 7, 4}); dim_out = paddle::framework::make_ddim({2, 7, 4});
input_a.Resize(dim_a); input_a.Resize(dim_a);
input_b.Resize(dim_b); input_b.Resize(dim_b);
...@@ -144,8 +142,8 @@ void testConcat() { ...@@ -144,8 +142,8 @@ void testConcat() {
} }
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(input_a_cpu, Place(), &input_a); paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
TensorCopySync(input_b_cpu, Place(), &input_b); paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
} }
input.clear(); input.clear();
...@@ -159,7 +157,8 @@ void testConcat() { ...@@ -159,7 +157,8 @@ void testConcat() {
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b); PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(out, CPUPlace(), &out_cpu); paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
&out_cpu);
out_ptr = out_cpu.data<int>(); out_ptr = out_cpu.data<int>();
} else { } else {
out_ptr = out.data<int>(); out_ptr = out.data<int>();
...@@ -187,9 +186,9 @@ void testConcat() { ...@@ -187,9 +186,9 @@ void testConcat() {
* output: * output:
* out.shape: [2, 3, 9] * out.shape: [2, 3, 9]
*/ */
dim_a = make_ddim({2, 3, 4}); dim_a = paddle::framework::make_ddim({2, 3, 4});
dim_b = make_ddim({2, 3, 5}); dim_b = paddle::framework::make_ddim({2, 3, 5});
dim_out = make_ddim({2, 3, 9}); dim_out = paddle::framework::make_ddim({2, 3, 9});
input_a.Resize(dim_a); input_a.Resize(dim_a);
input_b.Resize(dim_b); input_b.Resize(dim_b);
...@@ -216,8 +215,8 @@ void testConcat() { ...@@ -216,8 +215,8 @@ void testConcat() {
} }
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(input_a_cpu, Place(), &input_a); paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
TensorCopySync(input_b_cpu, Place(), &input_b); paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
} }
input.clear(); input.clear();
...@@ -231,7 +230,8 @@ void testConcat() { ...@@ -231,7 +230,8 @@ void testConcat() {
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b); PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(out, CPUPlace(), &out_cpu); paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
&out_cpu);
out_ptr = out_cpu.data<int>(); out_ptr = out_cpu.data<int>();
} else { } else {
out_ptr = out.data<int>(); out_ptr = out.data<int>();
...@@ -261,9 +261,9 @@ void testConcat() { ...@@ -261,9 +261,9 @@ void testConcat() {
* output: * output:
* out.shape: [2, 6, 4] * out.shape: [2, 6, 4]
*/ */
dim_a = make_ddim({2, 3, 4}); dim_a = paddle::framework::make_ddim({2, 3, 4});
dim_b = make_ddim({2, 3, 4}); dim_b = paddle::framework::make_ddim({2, 3, 4});
dim_out = make_ddim({2, 6, 4}); dim_out = paddle::framework::make_ddim({2, 6, 4});
input_a.Resize(dim_a); input_a.Resize(dim_a);
input_b.Resize(dim_b); input_b.Resize(dim_b);
...@@ -290,8 +290,8 @@ void testConcat() { ...@@ -290,8 +290,8 @@ void testConcat() {
} }
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(input_a_cpu, Place(), &input_a); paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
TensorCopySync(input_b_cpu, Place(), &input_b); paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
} }
input.clear(); input.clear();
...@@ -305,7 +305,8 @@ void testConcat() { ...@@ -305,7 +305,8 @@ void testConcat() {
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b); PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
if (paddle::platform::is_gpu_place(Place())) { if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(out, CPUPlace(), &out_cpu); paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
&out_cpu);
out_ptr = out_cpu.data<int>(); out_ptr = out_cpu.data<int>();
} else { } else {
out_ptr = out.data<int>(); out_ptr = out.data<int>();
......
...@@ -108,7 +108,9 @@ class CrossEntropyFunctor<platform::CUDADeviceContext, T> { ...@@ -108,7 +108,9 @@ class CrossEntropyFunctor<platform::CUDADeviceContext, T> {
if (softLabel) { if (softLabel) {
const T* label_data = labels->data<T>(); const T* label_data = labels->data<T>();
int block = class_num > 512 ? 512 : pow(2, int(std::log2(class_num))); int block = class_num > 512
? 512
: pow(2, static_cast<int>(std::log2(class_num)));
SoftCrossEntropyKernel<T><<< SoftCrossEntropyKernel<T><<<
batch_size, block, block * sizeof(T), batch_size, block, block * sizeof(T),
......
...@@ -13,13 +13,13 @@ See the License for the specific language governing permissions and ...@@ -13,13 +13,13 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <type_traits>
#include "paddle/fluid/operators/math/detail/activation_functions.h" #include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h" #include "paddle/fluid/operators/math/lstm_compute.h"
#include "paddle/fluid/platform/cuda_helper.h" #include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include <type_traits>
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
......
...@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and ...@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <cstdint>
#include <memory> #include <memory>
#include <random> #include <random>
typedef long int64;
namespace paddle { namespace paddle {
namespace operators { namespace operators {
namespace math { namespace math {
...@@ -27,25 +27,25 @@ namespace math { ...@@ -27,25 +27,25 @@ namespace math {
*/ */
class Sampler { class Sampler {
public: public:
explicit Sampler(int64 range) : range_(range) { explicit Sampler(int64_t range) : range_(range) {
PADDLE_ENFORCE_GT(range, 0); PADDLE_ENFORCE_GT(range, 0);
std::random_device r; std::random_device r;
seed_ = r(); seed_ = r();
} }
explicit Sampler(int64 range, unsigned int seed) explicit Sampler(int64_t range, unsigned int seed)
: range_(range), seed_(seed) { : range_(range), seed_(seed) {
PADDLE_ENFORCE_GT(range, 0); PADDLE_ENFORCE_GT(range, 0);
} }
virtual ~Sampler(); virtual ~Sampler();
// Sample a single value // Sample a single value
virtual int64 Sample() const = 0; virtual int64_t Sample() const = 0;
// The probability that a single call to Sample() returns the given value. // The probability that a single call to Sample() returns the given value.
virtual float Probability(int64 value) const = 0; virtual float Probability(int64_t value) const = 0;
int64 range() { return range_; }; int64 range() { return range_; }
protected: protected:
const int64 range_; const int64_t range_;
unsigned int seed_; unsigned int seed_;
}; };
...@@ -56,15 +56,15 @@ class Sampler { ...@@ -56,15 +56,15 @@ class Sampler {
*/ */
class UniformSampler : public Sampler { class UniformSampler : public Sampler {
public: public:
explicit UniformSampler(int64 range); explicit UniformSampler(int64_t range);
explicit UniformSampler(int64 range, unsigned int seed); explicit UniformSampler(int64_t range, unsigned int seed);
~UniformSampler() override {} ~UniformSampler() override {}
int64 Sample() const override; int64 Sample() const override;
float Probability(int64 value) const override; float Probability(int64_t value) const override;
private: private:
const float inv_range_; const float inv_range_;
...@@ -79,15 +79,15 @@ class UniformSampler : public Sampler { ...@@ -79,15 +79,15 @@ class UniformSampler : public Sampler {
*/ */
class LogUniformSampler : public Sampler { class LogUniformSampler : public Sampler {
public: public:
explicit LogUniformSampler(int64 range); explicit LogUniformSampler(int64_t range);
explicit LogUniformSampler(int64 range, unsigned int seed); explicit LogUniformSampler(int64_t range, unsigned int seed);
~LogUniformSampler() override {} ~LogUniformSampler() override {}
int64 Sample() const override; int64 Sample() const override;
float Probability(int64 value) const override; float Probability(int64_t value) const override;
private: private:
const float log_range_; const float log_range_;
...@@ -95,6 +95,6 @@ class LogUniformSampler : public Sampler { ...@@ -95,6 +95,6 @@ class LogUniformSampler : public Sampler {
std::shared_ptr<std::uniform_real_distribution<>> dist_; std::shared_ptr<std::uniform_real_distribution<>> dist_;
}; };
} // math } // namespace math
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <set> #include <set>
#include <vector>
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/fluid/operators/math/selected_rows_functor.h"
......
...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <set> #include <set>
#include <vector>
#include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h" #include "paddle/fluid/operators/math/selected_rows_functor.h"
......
...@@ -23,11 +23,11 @@ class CopyMatrixRowsFunctor<platform::CPUDeviceContext, T> { ...@@ -23,11 +23,11 @@ class CopyMatrixRowsFunctor<platform::CPUDeviceContext, T> {
public: public:
void operator()(const platform::CPUDeviceContext& context, void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& src, const framework::Tensor& src,
framework::Vector<size_t> index_lod, framework::Tensor& dst, framework::Vector<size_t> index_lod, framework::Tensor* dst,
bool is_src_index) { bool is_src_index) {
size_t* index = index_lod.data(); size_t* index = index_lod.data();
auto src_dims = src.dims(); auto src_dims = src.dims();
auto dst_dims = dst.dims(); auto dst_dims = dst->dims();
PADDLE_ENFORCE_EQ(src_dims.size(), 2UL, PADDLE_ENFORCE_EQ(src_dims.size(), 2UL,
"The src must be matrix with rank 2."); "The src must be matrix with rank 2.");
PADDLE_ENFORCE_EQ(dst_dims.size(), 2UL, PADDLE_ENFORCE_EQ(dst_dims.size(), 2UL,
...@@ -37,7 +37,7 @@ class CopyMatrixRowsFunctor<platform::CPUDeviceContext, T> { ...@@ -37,7 +37,7 @@ class CopyMatrixRowsFunctor<platform::CPUDeviceContext, T> {
auto height = dst_dims[0]; auto height = dst_dims[0];
auto width = dst_dims[1]; auto width = dst_dims[1];
auto* src_data = src.data<T>(); auto* src_data = src.data<T>();
auto* dst_data = dst.data<T>(); auto* dst_data = dst->data<T>();
for (int i = 0; i < height; ++i) { for (int i = 0; i < height; ++i) {
if (is_src_index) { if (is_src_index) {
memcpy(dst_data + i * width, src_data + index[i] * width, memcpy(dst_data + i * width, src_data + index[i] * width,
......
...@@ -43,10 +43,10 @@ class CopyMatrixRowsFunctor<platform::CUDADeviceContext, T> { ...@@ -43,10 +43,10 @@ class CopyMatrixRowsFunctor<platform::CUDADeviceContext, T> {
public: public:
void operator()(const platform::CUDADeviceContext& context, void operator()(const platform::CUDADeviceContext& context,
const framework::Tensor& src, const framework::Tensor& src,
framework::Vector<size_t> index_lod, framework::Tensor& dst, framework::Vector<size_t> index_lod, framework::Tensor* dst,
bool is_src_index) { bool is_src_index) {
auto src_dims = src.dims(); auto src_dims = src.dims();
auto dst_dims = dst.dims(); auto dst_dims = dst->dims();
PADDLE_ENFORCE_EQ(src_dims.size(), 2, PADDLE_ENFORCE_EQ(src_dims.size(), 2,
"The src must be matrix with rank 2."); "The src must be matrix with rank 2.");
PADDLE_ENFORCE_EQ(dst_dims.size(), 2, PADDLE_ENFORCE_EQ(dst_dims.size(), 2,
...@@ -56,7 +56,7 @@ class CopyMatrixRowsFunctor<platform::CUDADeviceContext, T> { ...@@ -56,7 +56,7 @@ class CopyMatrixRowsFunctor<platform::CUDADeviceContext, T> {
auto height = dst_dims[0]; auto height = dst_dims[0];
auto width = dst_dims[1]; auto width = dst_dims[1];
auto* src_data = src.data<T>(); auto* src_data = src.data<T>();
auto* dst_data = dst.data<T>(); auto* dst_data = dst->data<T>();
dim3 threads(128, 8); dim3 threads(128, 8);
dim3 grid(8, 1); dim3 grid(8, 1);
......
...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and ...@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#pragma once #pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
...@@ -35,7 +37,7 @@ class CopyMatrixRowsFunctor { ...@@ -35,7 +37,7 @@ class CopyMatrixRowsFunctor {
// copy the input src to the indexed rows of output dst. // copy the input src to the indexed rows of output dst.
// The indexed rows are based on the input index. // The indexed rows are based on the input index.
void operator()(const DeviceContext& context, const framework::Tensor& src, void operator()(const DeviceContext& context, const framework::Tensor& src,
framework::Vector<size_t> index_lod, framework::Tensor& dst, framework::Vector<size_t> index_lod, framework::Tensor* dst,
bool is_src_index); bool is_src_index);
}; };
...@@ -58,10 +60,10 @@ class LoDTensor2BatchFunctor { ...@@ -58,10 +60,10 @@ class LoDTensor2BatchFunctor {
public: public:
void operator()(const DeviceContext& context, void operator()(const DeviceContext& context,
const framework::LoDTensor& lod_tensor, const framework::LoDTensor& lod_tensor,
framework::LoDTensor& batch, bool is_cal_batch_lod, framework::LoDTensor* batch, bool is_cal_batch_lod,
bool is_reverse = false) const { bool is_reverse = false) const {
if (!is_cal_batch_lod) { if (!is_cal_batch_lod) {
auto lods = batch.lod(); auto lods = batch->lod();
PADDLE_ENFORCE_GT(lods.size(), 2UL); PADDLE_ENFORCE_GT(lods.size(), 2UL);
PADDLE_ENFORCE_EQ(lods[1].size(), PADDLE_ENFORCE_EQ(lods[1].size(),
static_cast<size_t>(lod_tensor.dims()[0])); static_cast<size_t>(lod_tensor.dims()[0]));
...@@ -141,7 +143,7 @@ class LoDTensor2BatchFunctor { ...@@ -141,7 +143,7 @@ class LoDTensor2BatchFunctor {
for (size_t i = 0; i < seq_info.size(); ++i) { for (size_t i = 0; i < seq_info.size(); ++i) {
seq_order[i] = seq_info[i].seq_idx; seq_order[i] = seq_info[i].seq_idx;
} }
batch.set_lod(batch_lods); batch->set_lod(batch_lods);
CopyMatrixRowsFunctor<DeviceContext, T> to_batch; CopyMatrixRowsFunctor<DeviceContext, T> to_batch;
to_batch(context, lod_tensor, batch_lods[1], batch, true); to_batch(context, lod_tensor, batch_lods[1], batch, true);
...@@ -153,11 +155,11 @@ class Batch2LoDTensorFunctor { ...@@ -153,11 +155,11 @@ class Batch2LoDTensorFunctor {
public: public:
void operator()(const DeviceContext& context, void operator()(const DeviceContext& context,
const framework::LoDTensor& batch, const framework::LoDTensor& batch,
framework::LoDTensor& lod_tensor) const { framework::LoDTensor* lod_tensor) const {
auto in_lod = batch.lod(); auto in_lod = batch.lod();
PADDLE_ENFORCE_GT(in_lod.size(), 2UL); PADDLE_ENFORCE_GT(in_lod.size(), 2UL);
PADDLE_ENFORCE_EQ(in_lod[1].size(), PADDLE_ENFORCE_EQ(in_lod[1].size(),
static_cast<size_t>(lod_tensor.dims()[0])); static_cast<size_t>(lod_tensor->dims()[0]));
CopyMatrixRowsFunctor<DeviceContext, T> to_seq; CopyMatrixRowsFunctor<DeviceContext, T> to_seq;
to_seq(context, batch, in_lod[1], lod_tensor, false); to_seq(context, batch, in_lod[1], lod_tensor, false);
} }
......
...@@ -21,15 +21,15 @@ namespace math { ...@@ -21,15 +21,15 @@ namespace math {
template <typename T> template <typename T>
class ScaleLoDTensorFunctor<platform::CPUDeviceContext, T> { class ScaleLoDTensorFunctor<platform::CPUDeviceContext, T> {
public: public:
void operator()(const platform::CPUDeviceContext& context, void operator()(const platform::CPUDeviceContext& context, const T* scales,
framework::LoDTensor& seq, const T* scales) { framework::LoDTensor* seq) {
const size_t level = 0; const size_t level = 0;
auto lod = seq.lod(); auto lod = seq->lod();
const size_t num_seq = lod[level].size() - 1; const size_t num_seq = lod[level].size() - 1;
size_t seq_width = seq.dims()[1]; size_t seq_width = seq->dims()[1];
framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
T* seq_data = seq.mutable_data<T>(context.GetPlace()); T* seq_data = seq->mutable_data<T>(context.GetPlace());
for (size_t i = 0; i < num_seq; ++i) { for (size_t i = 0; i < num_seq; ++i) {
for (size_t j = lod[level][i] * seq_width; for (size_t j = lod[level][i] * seq_width;
j < lod[level][i + 1] * seq_width; ++j) { j < lod[level][i + 1] * seq_width; ++j) {
......
...@@ -35,14 +35,14 @@ __global__ void SequenceScaleKernel(T* seq, size_t* lod, const T* scales, ...@@ -35,14 +35,14 @@ __global__ void SequenceScaleKernel(T* seq, size_t* lod, const T* scales,
template <typename T> template <typename T>
class ScaleLoDTensorFunctor<platform::CUDADeviceContext, T> { class ScaleLoDTensorFunctor<platform::CUDADeviceContext, T> {
public: public:
void operator()(const platform::CUDADeviceContext& context, void operator()(const platform::CUDADeviceContext& context, const T* scales,
framework::LoDTensor& seq, const T* scales) { framework::LoDTensor* seq) {
const size_t level = 0; const size_t level = 0;
auto lod = seq.lod(); auto lod = seq->lod();
const size_t num_seq = lod[level].size() - 1; const size_t num_seq = lod[level].size() - 1;
const size_t seq_width = seq.numel() / seq.dims()[0]; const size_t seq_width = seq->numel() / seq->dims()[0];
framework::LoD abs_offset_lod = framework::ToAbsOffset(lod); framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
T* seq_data = seq.mutable_data<T>(context.GetPlace()); T* seq_data = seq->mutable_data<T>(context.GetPlace());
SequenceScaleKernel<T, PADDLE_CUDA_NUM_THREADS><<< SequenceScaleKernel<T, PADDLE_CUDA_NUM_THREADS><<<
num_seq, PADDLE_CUDA_NUM_THREADS, 0, context.stream()>>>( num_seq, PADDLE_CUDA_NUM_THREADS, 0, context.stream()>>>(
......
...@@ -46,8 +46,8 @@ namespace math { ...@@ -46,8 +46,8 @@ namespace math {
template <typename DeviceContext, typename T> template <typename DeviceContext, typename T>
class ScaleLoDTensorFunctor { class ScaleLoDTensorFunctor {
public: public:
void operator()(const DeviceContext& context, framework::LoDTensor& seq, void operator()(const DeviceContext& context, const T* scales,
const T* scales); framework::LoDTensor* seq);
}; };
} // namespace math } // namespace math
......
...@@ -15,9 +15,9 @@ limitations under the License. */ ...@@ -15,9 +15,9 @@ limitations under the License. */
#pragma once #pragma once
#include <algorithm> #include <algorithm>
#include <condition_variable> #include <condition_variable> // NOLINT
#include <memory> #include <memory>
#include <mutex> #include <mutex> // NOLINT
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
......
...@@ -222,8 +222,8 @@ class WarpCTCGradKernel : public framework::OpKernel<T> { ...@@ -222,8 +222,8 @@ class WarpCTCGradKernel : public framework::OpKernel<T> {
const T* loss_grad_data = loss_grad->data<T>(); const T* loss_grad_data = loss_grad->data<T>();
math::ScaleLoDTensorFunctor<DeviceContext, T>()( math::ScaleLoDTensorFunctor<DeviceContext, T>()(
ctx.template device_context<DeviceContext>(), *logits_grad, ctx.template device_context<DeviceContext>(), loss_grad_data,
loss_grad_data); logits_grad);
} }
}; };
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册