未验证 提交 e7353596 编写于 作者: A Abhinav Arora 提交者: GitHub

Fix more CPPlint issues in fluid/operators/math (#10249)

* Fix CPPLint errors

* Fix CPPLint errors in sequence2batch

* Fix compilation

* Fix LSTM op and GRU op

* Fix LSTMP op

* Fix more cpplint errors in operators/math

* Address Code review feedback
上级 6e0b47b3
......@@ -34,7 +34,7 @@ inline void ReorderInitState(const DeviceContext& ctx,
framework::Tensor* dst, bool indexed_src) {
math::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
dst->mutable_data<T>(src.dims(), ctx.GetPlace());
row_shuffle(ctx, src, index_lod, *dst, indexed_src);
row_shuffle(ctx, src, index_lod, dst, indexed_src);
}
template <typename DeviceContext, typename T>
......@@ -61,7 +61,7 @@ class GRUKernel : public framework::OpKernel<T> {
bool is_reverse = context.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& dev_ctx = context.template device_context<DeviceContext>();
to_batch(dev_ctx, *input, *batch_gate, true, is_reverse);
to_batch(dev_ctx, *input, batch_gate, true, is_reverse);
if (bias) {
math::RowwiseAdd<DeviceContext, T> add_bias;
......@@ -113,7 +113,7 @@ class GRUKernel : public framework::OpKernel<T> {
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_hidden->set_lod(batch_gate->lod());
to_seq(dev_ctx, *batch_hidden, *hidden);
to_seq(dev_ctx, *batch_hidden, hidden);
}
void Compute(const framework::ExecutionContext& context) const override {
......@@ -174,7 +174,7 @@ class GRUGradKernel : public framework::OpKernel<T> {
bool is_reverse = context.Attr<bool>("is_reverse");
batch_hidden_grad.set_lod(batch_hidden->lod());
to_batch(dev_ctx, *hidden_grad, batch_hidden_grad, false, is_reverse);
to_batch(dev_ctx, *hidden_grad, &batch_hidden_grad, false, is_reverse);
math::GRUMetaValue<T> gru_value;
gru_value.gate_weight = const_cast<T*>(weight_data);
......@@ -236,7 +236,7 @@ class GRUGradKernel : public framework::OpKernel<T> {
input_grad->mutable_data<T>(context.GetPlace());
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_gate_grad.set_lod(batch_gate->lod());
to_seq(dev_ctx, batch_gate_grad, *input_grad);
to_seq(dev_ctx, batch_gate_grad, input_grad);
}
if (bias_grad) {
bias_grad->mutable_data<T>(context.GetPlace());
......
......@@ -33,7 +33,7 @@ inline void ReorderInitState(const DeviceContext& ctx,
framework::Tensor* dst, bool indexed_src) {
math::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
dst->mutable_data<T>(src.dims(), ctx.GetPlace());
row_shuffle(ctx, src, index_lod, *dst, indexed_src);
row_shuffle(ctx, src, index_lod, dst, indexed_src);
}
template <typename DeviceContext, typename T>
......@@ -57,7 +57,7 @@ class LSTMKernel : public framework::OpKernel<T> {
bool is_reverse = ctx.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& device_ctx = ctx.template device_context<DeviceContext>();
to_batch(device_ctx, *input, *batch_gate, true, is_reverse);
to_batch(device_ctx, *input, batch_gate, true, is_reverse);
auto in_dims = input->dims();
int frame_size = static_cast<int>(in_dims[1] / 4);
......@@ -161,11 +161,11 @@ class LSTMKernel : public framework::OpKernel<T> {
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_hidden.set_lod(batch_gate->lod());
// restore the output hidden in LoDTensor from the batch hidden
to_seq(device_ctx, batch_hidden, *hidden_out);
to_seq(device_ctx, batch_hidden, hidden_out);
batch_cell.set_lod(batch_gate->lod());
// restore the output cell state in LoDTensor from the batch cell
to_seq(device_ctx, batch_cell, *cell_out);
to_seq(device_ctx, batch_cell, cell_out);
}
};
......@@ -257,7 +257,7 @@ class LSTMGradKernel : public framework::OpKernel<T> {
const framework::DDim& dims, framework::LoDTensor& dst) {
dst.mutable_data<T>(dims, ctx.GetPlace());
dst.set_lod(batch_gate->lod());
to_batch(ctx, src, dst, false);
to_batch(ctx, src, &dst, false);
};
LoDTensor batch_hidden, batch_hidden_g, batch_cell;
......@@ -351,7 +351,7 @@ class LSTMGradKernel : public framework::OpKernel<T> {
if (in_g) {
/* backward data */
in_g->mutable_data<T>(ctx.GetPlace());
to_seq(device_ctx, batch_gate_g, *in_g);
to_seq(device_ctx, batch_gate_g, in_g);
}
if (bias && bias_g) {
/* backward bias */
......
......@@ -40,7 +40,7 @@ inline void ReorderInitState(const DeviceContext& ctx,
framework::Tensor* dst, bool indexed_src) {
math::CopyMatrixRowsFunctor<DeviceContext, T> row_shuffle;
dst->mutable_data<T>(src.dims(), ctx.GetPlace());
row_shuffle(ctx, src, index, *dst, indexed_src);
row_shuffle(ctx, src, index, dst, indexed_src);
}
template <typename DeviceContext, typename T>
......@@ -81,7 +81,7 @@ class LSTMPKernel : public framework::OpKernel<T> {
bool is_reverse = ctx.Attr<bool>("is_reverse");
math::LoDTensor2BatchFunctor<DeviceContext, T> to_batch;
auto& device_ctx = ctx.template device_context<DeviceContext>();
to_batch(device_ctx, *input, *batch_gate, true, is_reverse);
to_batch(device_ctx, *input, batch_gate, true, is_reverse);
auto in_dims = input->dims();
int frame_size = static_cast<int>(in_dims[1] / 4);
......@@ -208,11 +208,11 @@ class LSTMPKernel : public framework::OpKernel<T> {
math::Batch2LoDTensorFunctor<DeviceContext, T> to_seq;
batch_proj.set_lod(batch_gate->lod());
// restore the output hidden in LoDTensor from the batch hidden
to_seq(device_ctx, batch_proj, *proj_out);
to_seq(device_ctx, batch_proj, proj_out);
batch_cell.set_lod(batch_gate->lod());
// restore the output cell state in LoDTensor from the batch cell
to_seq(device_ctx, batch_cell, *cell_out);
to_seq(device_ctx, batch_cell, cell_out);
}
};
......@@ -332,7 +332,7 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
const framework::DDim& dims, framework::LoDTensor& dst) {
dst.mutable_data<T>(dims, ctx.GetPlace());
dst.set_lod(batch_gate->lod());
to_batch(ctx, src, dst, false);
to_batch(ctx, src, &dst, false);
};
LoDTensor batch_hidden_g, batch_proj, batch_proj_g, batch_cell;
......@@ -471,7 +471,7 @@ class LSTMPGradKernel : public framework::OpKernel<T> {
if (in_g) {
/* backward data */
in_g->mutable_data<T>(ctx.GetPlace());
to_seq(device_ctx, batch_gate_g, *in_g);
to_seq(device_ctx, batch_gate_g, in_g);
}
if (bias && bias_g) {
/* backward bias */
......
......@@ -17,17 +17,14 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
using namespace paddle::framework;
using namespace paddle::platform;
template <typename DeviceContext, typename Place>
void testConcat() {
Tensor input_a_cpu;
Tensor input_b_cpu;
Tensor out_cpu;
Tensor input_a;
Tensor input_b;
Tensor out;
paddle::framework::Tensor input_a_cpu;
paddle::framework::Tensor input_b_cpu;
paddle::framework::Tensor out_cpu;
paddle::framework::Tensor input_a;
paddle::framework::Tensor input_b;
paddle::framework::Tensor out;
DeviceContext* context = new DeviceContext(Place());
// DeviceContext context(Place());
......@@ -40,18 +37,18 @@ void testConcat() {
* output:
* out.shape: [5, 3, 4]
*/
auto dim_a = make_ddim({2, 3, 4});
auto dim_b = make_ddim({3, 3, 4});
auto dim_out = make_ddim({5, 3, 4});
auto dim_a = paddle::framework::make_ddim({2, 3, 4});
auto dim_b = paddle::framework::make_ddim({3, 3, 4});
auto dim_out = paddle::framework::make_ddim({5, 3, 4});
input_a.mutable_data<int>(dim_a, Place());
input_b.mutable_data<int>(dim_b, Place());
out.mutable_data<int>(dim_out, Place());
if (paddle::platform::is_gpu_place(Place())) {
input_a_cpu.mutable_data<int>(dim_a, CPUPlace());
input_b_cpu.mutable_data<int>(dim_b, CPUPlace());
out_cpu.mutable_data<int>(dim_out, CPUPlace());
input_a_cpu.mutable_data<int>(dim_a, paddle::platform::CPUPlace());
input_b_cpu.mutable_data<int>(dim_b, paddle::platform::CPUPlace());
out_cpu.mutable_data<int>(dim_out, paddle::platform::CPUPlace());
}
int* a_ptr;
......@@ -72,11 +69,11 @@ void testConcat() {
}
if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(input_a_cpu, Place(), &input_a);
TensorCopySync(input_b_cpu, Place(), &input_b);
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
}
std::vector<Tensor> input;
std::vector<paddle::framework::Tensor> input;
input.push_back(input_a);
input.push_back(input_b);
......@@ -89,7 +86,8 @@ void testConcat() {
int* out_ptr;
if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(out, CPUPlace(), &out_cpu);
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
&out_cpu);
out_ptr = out_cpu.data<int>();
} else {
out_ptr = out.data<int>();
......@@ -115,9 +113,9 @@ void testConcat() {
* output:
* out.shape: [2, 7, 4]
*/
dim_a = make_ddim({2, 3, 4});
dim_b = make_ddim({2, 4, 4});
dim_out = make_ddim({2, 7, 4});
dim_a = paddle::framework::make_ddim({2, 3, 4});
dim_b = paddle::framework::make_ddim({2, 4, 4});
dim_out = paddle::framework::make_ddim({2, 7, 4});
input_a.Resize(dim_a);
input_b.Resize(dim_b);
......@@ -144,8 +142,8 @@ void testConcat() {
}
if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(input_a_cpu, Place(), &input_a);
TensorCopySync(input_b_cpu, Place(), &input_b);
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
}
input.clear();
......@@ -159,7 +157,8 @@ void testConcat() {
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(out, CPUPlace(), &out_cpu);
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
&out_cpu);
out_ptr = out_cpu.data<int>();
} else {
out_ptr = out.data<int>();
......@@ -187,9 +186,9 @@ void testConcat() {
* output:
* out.shape: [2, 3, 9]
*/
dim_a = make_ddim({2, 3, 4});
dim_b = make_ddim({2, 3, 5});
dim_out = make_ddim({2, 3, 9});
dim_a = paddle::framework::make_ddim({2, 3, 4});
dim_b = paddle::framework::make_ddim({2, 3, 5});
dim_out = paddle::framework::make_ddim({2, 3, 9});
input_a.Resize(dim_a);
input_b.Resize(dim_b);
......@@ -216,8 +215,8 @@ void testConcat() {
}
if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(input_a_cpu, Place(), &input_a);
TensorCopySync(input_b_cpu, Place(), &input_b);
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
}
input.clear();
......@@ -231,7 +230,8 @@ void testConcat() {
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(out, CPUPlace(), &out_cpu);
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
&out_cpu);
out_ptr = out_cpu.data<int>();
} else {
out_ptr = out.data<int>();
......@@ -261,9 +261,9 @@ void testConcat() {
* output:
* out.shape: [2, 6, 4]
*/
dim_a = make_ddim({2, 3, 4});
dim_b = make_ddim({2, 3, 4});
dim_out = make_ddim({2, 6, 4});
dim_a = paddle::framework::make_ddim({2, 3, 4});
dim_b = paddle::framework::make_ddim({2, 3, 4});
dim_out = paddle::framework::make_ddim({2, 6, 4});
input_a.Resize(dim_a);
input_b.Resize(dim_b);
......@@ -290,8 +290,8 @@ void testConcat() {
}
if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(input_a_cpu, Place(), &input_a);
TensorCopySync(input_b_cpu, Place(), &input_b);
paddle::framework::TensorCopy(input_a_cpu, Place(), *context, &input_a);
paddle::framework::TensorCopy(input_b_cpu, Place(), *context, &input_b);
}
input.clear();
......@@ -305,7 +305,8 @@ void testConcat() {
PADDLE_ENFORCE_EQ(input_b.dims(), dim_b);
if (paddle::platform::is_gpu_place(Place())) {
TensorCopySync(out, CPUPlace(), &out_cpu);
paddle::framework::TensorCopy(out, paddle::platform::CPUPlace(), *context,
&out_cpu);
out_ptr = out_cpu.data<int>();
} else {
out_ptr = out.data<int>();
......
......@@ -108,7 +108,9 @@ class CrossEntropyFunctor<platform::CUDADeviceContext, T> {
if (softLabel) {
const T* label_data = labels->data<T>();
int block = class_num > 512 ? 512 : pow(2, int(std::log2(class_num)));
int block = class_num > 512
? 512
: pow(2, static_cast<int>(std::log2(class_num)));
SoftCrossEntropyKernel<T><<<
batch_size, block, block * sizeof(T),
......
......@@ -13,13 +13,13 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <type_traits>
#include "paddle/fluid/operators/math/detail/activation_functions.h"
#include "paddle/fluid/operators/math/lstm_compute.h"
#include "paddle/fluid/platform/cuda_helper.h"
#include "paddle/fluid/platform/device_context.h"
#include <type_traits>
namespace paddle {
namespace operators {
namespace math {
......
......@@ -13,9 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
#include <memory>
#include <random>
typedef long int64;
namespace paddle {
namespace operators {
namespace math {
......@@ -27,25 +27,25 @@ namespace math {
*/
class Sampler {
public:
explicit Sampler(int64 range) : range_(range) {
explicit Sampler(int64_t range) : range_(range) {
PADDLE_ENFORCE_GT(range, 0);
std::random_device r;
seed_ = r();
}
explicit Sampler(int64 range, unsigned int seed)
explicit Sampler(int64_t range, unsigned int seed)
: range_(range), seed_(seed) {
PADDLE_ENFORCE_GT(range, 0);
}
virtual ~Sampler();
// Sample a single value
virtual int64 Sample() const = 0;
virtual int64_t Sample() const = 0;
// The probability that a single call to Sample() returns the given value.
virtual float Probability(int64 value) const = 0;
virtual float Probability(int64_t value) const = 0;
int64 range() { return range_; };
int64 range() { return range_; }
protected:
const int64 range_;
const int64_t range_;
unsigned int seed_;
};
......@@ -56,15 +56,15 @@ class Sampler {
*/
class UniformSampler : public Sampler {
public:
explicit UniformSampler(int64 range);
explicit UniformSampler(int64_t range);
explicit UniformSampler(int64 range, unsigned int seed);
explicit UniformSampler(int64_t range, unsigned int seed);
~UniformSampler() override {}
int64 Sample() const override;
float Probability(int64 value) const override;
float Probability(int64_t value) const override;
private:
const float inv_range_;
......@@ -79,15 +79,15 @@ class UniformSampler : public Sampler {
*/
class LogUniformSampler : public Sampler {
public:
explicit LogUniformSampler(int64 range);
explicit LogUniformSampler(int64_t range);
explicit LogUniformSampler(int64 range, unsigned int seed);
explicit LogUniformSampler(int64_t range, unsigned int seed);
~LogUniformSampler() override {}
int64 Sample() const override;
float Probability(int64 value) const override;
float Probability(int64_t value) const override;
private:
const float log_range_;
......@@ -95,6 +95,6 @@ class LogUniformSampler : public Sampler {
std::shared_ptr<std::uniform_real_distribution<>> dist_;
};
} // math
} // namespace math
} // namespace operators
} // namespace paddle
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <set>
#include <vector>
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
......
......@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <set>
#include <vector>
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
......
......@@ -23,11 +23,11 @@ class CopyMatrixRowsFunctor<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
const framework::Tensor& src,
framework::Vector<size_t> index_lod, framework::Tensor& dst,
framework::Vector<size_t> index_lod, framework::Tensor* dst,
bool is_src_index) {
size_t* index = index_lod.data();
auto src_dims = src.dims();
auto dst_dims = dst.dims();
auto dst_dims = dst->dims();
PADDLE_ENFORCE_EQ(src_dims.size(), 2UL,
"The src must be matrix with rank 2.");
PADDLE_ENFORCE_EQ(dst_dims.size(), 2UL,
......@@ -37,7 +37,7 @@ class CopyMatrixRowsFunctor<platform::CPUDeviceContext, T> {
auto height = dst_dims[0];
auto width = dst_dims[1];
auto* src_data = src.data<T>();
auto* dst_data = dst.data<T>();
auto* dst_data = dst->data<T>();
for (int i = 0; i < height; ++i) {
if (is_src_index) {
memcpy(dst_data + i * width, src_data + index[i] * width,
......
......@@ -43,10 +43,10 @@ class CopyMatrixRowsFunctor<platform::CUDADeviceContext, T> {
public:
void operator()(const platform::CUDADeviceContext& context,
const framework::Tensor& src,
framework::Vector<size_t> index_lod, framework::Tensor& dst,
framework::Vector<size_t> index_lod, framework::Tensor* dst,
bool is_src_index) {
auto src_dims = src.dims();
auto dst_dims = dst.dims();
auto dst_dims = dst->dims();
PADDLE_ENFORCE_EQ(src_dims.size(), 2,
"The src must be matrix with rank 2.");
PADDLE_ENFORCE_EQ(dst_dims.size(), 2,
......@@ -56,7 +56,7 @@ class CopyMatrixRowsFunctor<platform::CUDADeviceContext, T> {
auto height = dst_dims[0];
auto width = dst_dims[1];
auto* src_data = src.data<T>();
auto* dst_data = dst.data<T>();
auto* dst_data = dst->data<T>();
dim3 threads(128, 8);
dim3 grid(8, 1);
......
......@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor.h"
......@@ -35,7 +37,7 @@ class CopyMatrixRowsFunctor {
// copy the input src to the indexed rows of output dst.
// The indexed rows are based on the input index.
void operator()(const DeviceContext& context, const framework::Tensor& src,
framework::Vector<size_t> index_lod, framework::Tensor& dst,
framework::Vector<size_t> index_lod, framework::Tensor* dst,
bool is_src_index);
};
......@@ -58,10 +60,10 @@ class LoDTensor2BatchFunctor {
public:
void operator()(const DeviceContext& context,
const framework::LoDTensor& lod_tensor,
framework::LoDTensor& batch, bool is_cal_batch_lod,
framework::LoDTensor* batch, bool is_cal_batch_lod,
bool is_reverse = false) const {
if (!is_cal_batch_lod) {
auto lods = batch.lod();
auto lods = batch->lod();
PADDLE_ENFORCE_GT(lods.size(), 2UL);
PADDLE_ENFORCE_EQ(lods[1].size(),
static_cast<size_t>(lod_tensor.dims()[0]));
......@@ -141,7 +143,7 @@ class LoDTensor2BatchFunctor {
for (size_t i = 0; i < seq_info.size(); ++i) {
seq_order[i] = seq_info[i].seq_idx;
}
batch.set_lod(batch_lods);
batch->set_lod(batch_lods);
CopyMatrixRowsFunctor<DeviceContext, T> to_batch;
to_batch(context, lod_tensor, batch_lods[1], batch, true);
......@@ -153,11 +155,11 @@ class Batch2LoDTensorFunctor {
public:
void operator()(const DeviceContext& context,
const framework::LoDTensor& batch,
framework::LoDTensor& lod_tensor) const {
framework::LoDTensor* lod_tensor) const {
auto in_lod = batch.lod();
PADDLE_ENFORCE_GT(in_lod.size(), 2UL);
PADDLE_ENFORCE_EQ(in_lod[1].size(),
static_cast<size_t>(lod_tensor.dims()[0]));
static_cast<size_t>(lod_tensor->dims()[0]));
CopyMatrixRowsFunctor<DeviceContext, T> to_seq;
to_seq(context, batch, in_lod[1], lod_tensor, false);
}
......
......@@ -21,15 +21,15 @@ namespace math {
template <typename T>
class ScaleLoDTensorFunctor<platform::CPUDeviceContext, T> {
public:
void operator()(const platform::CPUDeviceContext& context,
framework::LoDTensor& seq, const T* scales) {
void operator()(const platform::CPUDeviceContext& context, const T* scales,
framework::LoDTensor* seq) {
const size_t level = 0;
auto lod = seq.lod();
auto lod = seq->lod();
const size_t num_seq = lod[level].size() - 1;
size_t seq_width = seq.dims()[1];
size_t seq_width = seq->dims()[1];
framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
T* seq_data = seq.mutable_data<T>(context.GetPlace());
T* seq_data = seq->mutable_data<T>(context.GetPlace());
for (size_t i = 0; i < num_seq; ++i) {
for (size_t j = lod[level][i] * seq_width;
j < lod[level][i + 1] * seq_width; ++j) {
......
......@@ -35,14 +35,14 @@ __global__ void SequenceScaleKernel(T* seq, size_t* lod, const T* scales,
template <typename T>
class ScaleLoDTensorFunctor<platform::CUDADeviceContext, T> {
public:
void operator()(const platform::CUDADeviceContext& context,
framework::LoDTensor& seq, const T* scales) {
void operator()(const platform::CUDADeviceContext& context, const T* scales,
framework::LoDTensor* seq) {
const size_t level = 0;
auto lod = seq.lod();
auto lod = seq->lod();
const size_t num_seq = lod[level].size() - 1;
const size_t seq_width = seq.numel() / seq.dims()[0];
const size_t seq_width = seq->numel() / seq->dims()[0];
framework::LoD abs_offset_lod = framework::ToAbsOffset(lod);
T* seq_data = seq.mutable_data<T>(context.GetPlace());
T* seq_data = seq->mutable_data<T>(context.GetPlace());
SequenceScaleKernel<T, PADDLE_CUDA_NUM_THREADS><<<
num_seq, PADDLE_CUDA_NUM_THREADS, 0, context.stream()>>>(
......
......@@ -46,8 +46,8 @@ namespace math {
template <typename DeviceContext, typename T>
class ScaleLoDTensorFunctor {
public:
void operator()(const DeviceContext& context, framework::LoDTensor& seq,
const T* scales);
void operator()(const DeviceContext& context, const T* scales,
framework::LoDTensor* seq);
};
} // namespace math
......
......@@ -15,9 +15,9 @@ limitations under the License. */
#pragma once
#include <algorithm>
#include <condition_variable>
#include <condition_variable> // NOLINT
#include <memory>
#include <mutex>
#include <mutex> // NOLINT
#include <string>
#include <unordered_map>
#include <vector>
......
......@@ -222,8 +222,8 @@ class WarpCTCGradKernel : public framework::OpKernel<T> {
const T* loss_grad_data = loss_grad->data<T>();
math::ScaleLoDTensorFunctor<DeviceContext, T>()(
ctx.template device_context<DeviceContext>(), *logits_grad,
loss_grad_data);
ctx.template device_context<DeviceContext>(), loss_grad_data,
logits_grad);
}
};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册