未验证 提交 77734ce7 编写于 作者: H huzhiqiang 提交者: GitHub

[x86] Fix x86 code style (#3287)

上级 720590c9
......@@ -10,6 +10,7 @@ if (LITE_ON_TINY_PUBLISH)
endif()
set(light_lib_DEPS light_api paddle_api paddle_api_light)
if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR LITE_WITH_BM OR ARM_TARGET_OS STREQUAL "android" OR ARM_TARGET_OS STREQUAL "armlinux"))
#full api dynamic library
lite_cc_library(paddle_full_api_shared SHARED SRCS paddle_api.cc light_api.cc cxx_api.cc cxx_api_impl.cc light_api_impl.cc
......@@ -264,8 +265,6 @@ if (NOT LITE_ON_TINY_PUBLISH)
NPU_DEPS ${npu_kernels}
CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels}
CV_DEPS paddle_cv_arm
NPU_DEPS ${npu_kernels}
BM_DEPS ${bm_kernels})
# The final inference library for just MobileConfig.
bundle_static_library(paddle_api_full paddle_api_full_bundled bundle_full_api)
......
......@@ -96,8 +96,8 @@ class BeamSearchFunctor<TARGET(kX86), T> {
// : nullptr;
// fill in data
std::vector<size_t> low_level;
size_t low_offset = 0;
std::vector<uint64_t> low_level;
uint64_t low_offset = 0;
for (auto &items : selected_items) {
low_level.push_back(low_offset);
for (auto &item : items) {
......
......@@ -22,8 +22,8 @@ void PrepareCPUTensors(paddle::framework::LoDTensor* ids,
paddle::framework::LoDTensor* pre_scores) {
// lod
paddle::framework::LoD lod;
std::vector<size_t> level0({0, 2, 4});
std::vector<size_t> level1({0, 1, 2, 3, 4});
std::vector<uint64_t> level0({0, 2, 4});
std::vector<uint64_t> level1({0, 1, 2, 3, 4});
lod.push_back(level0);
lod.push_back(level1);
ids->set_lod(lod);
......
......@@ -483,7 +483,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a,
mat_a.data<T>(),
mat_b.data<T>(),
beta,
mat_out->mutable_data<T>());
mat_out->template mutable_data<T>());
}
template <>
......@@ -759,7 +759,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a,
mat_a.data<T>(),
mat_b.data<T>(),
beta,
mat_out->mutable_data<T>());
mat_out->template mutable_data<T>());
} else {
PADDLE_ENFORCE(dim_a.batch_size_ == dim_b.batch_size_ ||
dim_a.batch_size_ == 0 || dim_b.batch_size_ == 0);
......@@ -773,7 +773,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a,
mat_a.data<T>(),
mat_b.data<T>(),
beta,
mat_out->mutable_data<T>(),
mat_out->template mutable_data<T>(),
dim_a.batch_size_ == 0 ? dim_b.batch_size_ : dim_a.batch_size_,
dim_a.stride_,
dim_b.stride_);
......
......@@ -51,7 +51,7 @@ class ConcatFunctor<lite::TargetType::kX86, T> {
// auto cpu_place = boost::get<platform::CPUPlace>(context.GetPlace());
// computation
auto output_data = output->mutable_data<T>();
auto output_data = output->template mutable_data<T>();
int col_idx = 0;
for (int j = 0; j < num; ++j) {
int col_len = input_cols[j];
......@@ -108,7 +108,7 @@ class SplitFunctor<lite::TargetType::kX86, T> {
int col_len = output_cols[j];
auto* out_tensor = outputs->at(j);
if (out_tensor != nullptr) {
T* dst_ptr = out_tensor->mutable_data<T>() + k * col_len;
T* dst_ptr = out_tensor->template mutable_data<T>() + k * col_len;
std::copy_n(src_ptr + col_idx, col_len, dst_ptr);
// memory::Copy(cpu_place, dst_ptr, cpu_place, src_ptr + col_idx,
// sizeof(T) * col_len);
......
......@@ -50,8 +50,8 @@ class CrossEntropyFunctor<lite::TargetType::kX86, T> {
.reshape(batch_axis_remain)
.sum(Eigen::DSizes<int, 1>(1)));
} else {
const T* prob_data = prob->data<T>();
T* loss_data = out->mutable_data<T>();
const T* prob_data = prob->template data<T>();
T* loss_data = out->template mutable_data<T>();
const int64_t* label_data = labels->data<int64_t>();
for (int i = 0; i < batch_size; ++i) {
......
......@@ -99,7 +99,7 @@ class Col2ImFunctor<lite::x86::math::ColFormat::kCFO,
int channels_col = im_channels * filter_height * filter_width;
T* im_data = im->mutable_data<T>();
T* im_data = im->template mutable_data<T>();
const T* col_data = col.data<T>();
for (int c = 0; c < channels_col; ++c) {
......@@ -161,7 +161,7 @@ class Im2ColFunctor<lite::x86::math::ColFormat::kOCF,
int col_width = col->dims()[1];
const T* im_data = im.data<T>();
T* col_data = col->mutable_data<T>();
T* col_data = col->template mutable_data<T>();
for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
......@@ -235,7 +235,7 @@ class Col2ImFunctor<lite::x86::math::ColFormat::kOCF,
"col_width and padding(padding_left, padding_right) are "
"inconsistent.");
T* im_data = im->mutable_data<T>();
T* im_data = im->template mutable_data<T>();
const T* col_data = col.data<T>();
for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
......
......@@ -42,7 +42,7 @@ inline void im2col_common(const lite::Tensor& im,
int channels_col = im_channels * filter_height * filter_width;
const T* im_data = im.data<T>();
T* col_data = col->mutable_data<T>();
T* col_data = col->template mutable_data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height;
......@@ -77,7 +77,7 @@ inline void im2col_sh1sw1dh1dw1ph0pw0(const lite::Tensor& im,
int output_width = col->dims()[4];
const T* im_data = im.data<T>();
T* col_data = col->mutable_data<T>();
T* col_data = col->template mutable_data<T>();
int col_matrix_width = output_width * output_height;
int im_size = im_height * im_width;
size_t copy_size = sizeof(T) * output_width;
......@@ -123,7 +123,7 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const lite::Tensor& im,
constexpr int prw = 1;
const T* im_data = im.data<T>();
T* col_data = col->mutable_data<T>();
T* col_data = col->template mutable_data<T>();
int im_size = im_height * im_width;
int col_matrix_width = output_width * output_height;
int col_block_fh = filter_width * col_matrix_width; // fw*oh*ow
......
......@@ -65,7 +65,7 @@ struct TensorSetConstantCPU {
: tensor_(tensor), value_(value) {}
template <typename T>
void apply() const {
auto* begin = tensor_->mutable_data<T>(lite::TargetType::kX86);
auto* begin = tensor_->template mutable_data<T>(lite::TargetType::kX86);
std::fill(begin, begin + tensor_->numel(), static_cast<T>(value_));
}
lite::Tensor* tensor_;
......@@ -126,7 +126,7 @@ struct RowwiseAdd<lite::TargetType::kX86, T> {
const T* input_data = input.data<T>();
const T* vector_data = vector.data<T>();
T* output_data = output->mutable_data<T>();
T* output_data = output->template mutable_data<T>();
for (int64_t i = 0; i < in_dims[0]; ++i) {
for (int64_t j = 0; j < size; ++j) {
output_data[i * in_dims[0] + j] =
......
......@@ -83,7 +83,7 @@ class ColwiseSum<lite::TargetType::kX86, T> {
auto size = in_dims[1];
PADDLE_ENFORCE_EQ(out->numel(), size);
T* out_buf = out->mutable_data<T>(out->target());
T* out_buf = out->template mutable_data<T>(out->target());
const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
......@@ -129,7 +129,7 @@ class RowwiseMean<lite::TargetType::kX86, T> {
auto size = in_dims[1];
PADDLE_ENFORCE_EQ(out->numel(), height);
auto inv_size = 1.0 / size;
T* out_buf = out->mutable_data<T>(out->target());
T* out_buf = out->template mutable_data<T>(out->target());
const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
......@@ -173,7 +173,7 @@ class RowwiseSum<lite::TargetType::kX86, T> {
auto size = in_dims[1];
PADDLE_ENFORCE_EQ(out->numel(), height);
T* out_buf = out->mutable_data<T>(out->target());
T* out_buf = out->template mutable_data<T>(out->target());
const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
......
......@@ -35,7 +35,7 @@ class MaxOutFunctor<lite::TargetType::kX86, T> {
// c_size means the output size of each sample
int c_size = fea_size * output_channels;
const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(lite::TargetType::kX86);
T* output_data = output->template mutable_data<T>(lite::TargetType::kX86);
for (int i = 0; i < batch_size; ++i) {
int new_bindex = c_size * i;
......@@ -72,7 +72,8 @@ class MaxOutGradFunctor<lite::TargetType::kX86, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(lite::TargetType::kX86);
T* input_grad_data =
input_grad->template mutable_data<T>(lite::TargetType::kX86);
for (int i = 0; i < batch_size; ++i) {
int blen = fea_size * output_channels * i;
......
......@@ -54,8 +54,8 @@ class Pool2dFunctor<lite::TargetType::kX86, PoolProcess, T> {
const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width;
const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>(lite::TargetType::kX86);
const T* input_data = input->template data<T>();
T* output_data = output->template mutable_data<T>(lite::TargetType::kX86);
int hstart, hend;
int wstart, wend;
......@@ -137,7 +137,8 @@ class Pool2dGradFunctor<lite::TargetType::kX86, PoolProcess, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(lite::TargetType::kX86);
T* input_grad_data =
input_grad->template mutable_data<T>(lite::TargetType::kX86);
int hstart, hend;
int wstart, wend;
......@@ -220,7 +221,8 @@ class MaxPool2dGradFunctor<lite::TargetType::kX86, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(lite::TargetType::kX86);
T* input_grad_data =
input_grad->template mutable_data<T>(lite::TargetType::kX86);
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......@@ -322,7 +324,7 @@ class Pool3dFunctor<lite::TargetType::kX86, PoolProcess, T> {
const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(lite::TargetType::kX86);
T* output_data = output->template mutable_data<T>(lite::TargetType::kX86);
int dstart, dend;
int hstart, hend;
......@@ -425,7 +427,8 @@ class Pool3dGradFunctor<lite::TargetType::kX86, PoolProcess, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(lite::TargetType::kX86);
T* input_grad_data =
input_grad->template mutable_data<T>(lite::TargetType::kX86);
int dstart, dend;
int hstart, hend;
......@@ -530,7 +533,8 @@ class MaxPool3dGradFunctor<lite::TargetType::kX86, T> {
const T* input_data = input.data<T>();
const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(lite::TargetType::kX86);
T* input_grad_data =
input_grad->template mutable_data<T>(lite::TargetType::kX86);
for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) {
......
......@@ -58,11 +58,11 @@ class SampleWithProb {
const int64_t* label_data = L->data<int64_t>();
// int64_t* samples_data =
// S->mutable_data<int64_t>(ret_dim, Target);
// T* probabilities_data = P->mutable_data<T>(ret_dim, Target);
// T* probabilities_data = P->template mutable_data<T>(ret_dim, Target);
S->Resize({batch_size, num_sampled_classes});
auto* samples_data = S->mutable_data<int64_t>(Target);
P->Resize({batch_size, num_sampled_classes});
auto* probabilities_data = P->mutable_data<T>(Target);
auto* probabilities_data = P->template mutable_data<T>(Target);
// temp sets for unique sampling
std::unordered_set<int64_t> tmp_samples;
......
......@@ -42,7 +42,7 @@ class SearchFcFunctor<lite::TargetType::kX86, T> {
lite::DDim dims(std::vector<int64_t>({bottom.dims()[0], out_size}));
const auto bottom_data = bottom.data<T>();
auto top_data = top->mutable_data<T>(lite::TargetType::kX86);
auto top_data = top->template mutable_data<T>(lite::TargetType::kX86);
const auto weights = w.data<T>();
auto blas = math::GetBlas<lite::TargetType::kX86, T>(context);
call_gemm<lite::X86Context, T>(blas,
......
......@@ -52,7 +52,7 @@ struct SelectedRowsAdd<lite::TargetType::kX86, T> {
PADDLE_ENFORCE_EQ(in1_row_numel, in2_value.numel() / in2_rows.size());
PADDLE_ENFORCE_EQ(in1_row_numel, out_value->numel() / out_rows.size());
auto* out_data = out_value->mutable_data<T>();
auto* out_data = out_value->template mutable_data<T>();
auto* in1_data = in1_value.data<T>();
std::copy_n(in1_data, in1_value.numel(), out_data);
......@@ -87,7 +87,7 @@ struct SelectedRowsAddTensor<lite::TargetType::kX86, T> {
functor(context, output, 0.0);
auto* in1_data = in1_value.data<T>();
auto* out_data = output->mutable_data<T>();
auto* out_data = output->template mutable_data<T>();
for (size_t i = 0; i < in1_rows.size(); i++) {
for (int64_t j = 0; j < in1_row_numel; j++) {
......@@ -127,7 +127,7 @@ struct SelectedRowsAddTo<lite::TargetType::kX86, T> {
in2_rows.insert(in2_rows.end(), in1_rows.begin(), in1_rows.end());
auto* in1_data = in1_value.data<T>();
auto* in2_data = in2_value->mutable_data<T>();
auto* in2_data = in2_value->template mutable_data<T>();
std::copy_n(in1_data, in1_value.numel(), in2_data + input2_offset);
}
};
......@@ -161,7 +161,7 @@ struct SelectedRowsSumTo<lite::TargetType::kX86, T> {
input2->set_rows(in2_rows);
auto* in2_value = input2->mutable_value();
T* in2_data = in2_value->mutable_data<T>();
T* in2_data = in2_value->template mutable_data<T>();
auto blas = math::GetBlas<lite::TargetType::kX86, T>(context);
size_t offset = 0u;
for (size_t i = 0u; i != input1.size(); ++i) {
......@@ -194,7 +194,7 @@ struct SelectedRowsAddToTensor<lite::TargetType::kX86, T> {
PADDLE_ENFORCE_EQ(in1_row_numel, input2->numel() / in1_height);
auto* in1_data = in1_value.data<T>();
auto* input2_data = input2->mutable_data<T>();
auto* input2_data = input2->template mutable_data<T>();
for (size_t i = 0; i < in1_rows.size(); i++) {
for (int64_t j = 0; j < in1_row_numel; j++) {
......@@ -305,7 +305,7 @@ struct MergeAdd<lite::TargetType::kX86, T> {
lite::DDim dims(std::vector<int64_t>(
{static_cast<int64_t>(merged_row_set.size()), input_width}));
out.mutable_value()->Resize(dims);
auto* out_data = out.mutable_value()->mutable_data<T>();
auto* out_data = out.mutable_value()->template mutable_data<T>();
if (merged_row_set.size() == row_num && !sorted_result) {
// no duplicated ids, just concat the result together
......@@ -385,7 +385,7 @@ struct UpdateToTensor<lite::TargetType::kX86, T> {
PADDLE_ENFORCE_EQ(in1_row_numel, input2->numel() / in1_height);
auto* in1_data = in1_value.data<T>();
auto* input2_data = input2->data<T>();
auto* input2_data = input2->template data<T>();
// FIXME(typhoonzero): use macro fix the below messy code.
switch (op) {
......
......@@ -24,10 +24,10 @@ class CopyMatrixRowsFunctor<lite::TargetType::kX86, T> {
public:
void operator()(const lite::Context<lite::TargetType::kX86>& context,
const lite::Tensor& src,
const std::vector<size_t>& index_lod,
const std::vector<uint64_t>& index_lod,
lite::Tensor* dst,
bool is_src_index) {
const size_t* index = index_lod.data();
const uint64_t* index = index_lod.data();
const auto& src_dims = src.dims();
const auto& dst_dims = dst->dims();
PADDLE_ENFORCE_EQ(
......@@ -39,7 +39,7 @@ class CopyMatrixRowsFunctor<lite::TargetType::kX86, T> {
auto height = dst_dims[0];
auto width = dst_dims[1];
auto* src_data = src.data<T>();
auto* dst_data = dst->mutable_data<T>();
auto* dst_data = dst->template mutable_data<T>();
const int sz = width * sizeof(T);
if (is_src_index) {
for (int i = 0; i < height; ++i) {
......
......@@ -36,7 +36,7 @@ class CopyMatrixRowsFunctor {
// The indexed rows are based on the input index.
void operator()(const lite::Context<Target>& context,
const lite::Tensor& src,
const std::vector<size_t>& index_lod,
const std::vector<uint64_t>& index_lod,
lite::Tensor* dst,
bool is_src_index);
};
......@@ -130,8 +130,8 @@ class LoDTensor2BatchFunctor {
// batch_lods[2] is the sort order for the input LoDTensor.
batch_lods->at(2).resize(seq_info.size());
size_t* batch_starts = batch_lods->at(0).data();
size_t* seq2batch_idx = batch_lods->at(1).data();
auto* batch_starts = batch_lods->at(0).data();
auto* seq2batch_idx = batch_lods->at(1).data();
batch_starts[0] = 0;
for (int n = 0; n < max_seqlen; n++) {
auto batch_id = static_cast<int>(batch_starts[n]);
......@@ -148,7 +148,7 @@ class LoDTensor2BatchFunctor {
}
batch_starts[n + 1] = static_cast<size_t>(batch_id);
}
size_t* seq_order = batch_lods->at(2).data();
auto* seq_order = batch_lods->at(2).data();
for (size_t i = 0; i < seq_info.size(); ++i) {
seq_order[i] = seq_info[i].seq_idx;
}
......
......@@ -22,15 +22,15 @@ namespace math {
template <typename T>
void CopyValidData(lite::Tensor* dst_tensor,
const lite::Tensor* src_tensor,
const std::vector<size_t>& seq_offsets,
const std::vector<uint64_t>& seq_offsets,
int pad_seq_len,
int step_width,
bool norm_by_len,
CopyType type,
PadLayout layout) {
int seq_num = seq_offsets.size() - 1;
const T* src_data = src_tensor->data<T>();
T* dst_data = dst_tensor->mutable_data<T>();
const T* src_data = src_tensor->template data<T>();
T* dst_data = dst_tensor->template mutable_data<T>();
int seq_cpy_gap = step_width;
int pad_cpy_gap =
......@@ -113,7 +113,7 @@ class PaddingLoDTensorFunctor<lite::TargetType::kX86, T> {
"'step_width'.");
// fill padding value
T* pad_data = pad_tensor->mutable_data<T>();
T* pad_data = pad_tensor->template mutable_data<T>();
const T* pad_value_data = pad_value.data<T>();
if (pad_value.numel() == 1) {
fast_mem_init<T>(
......
......@@ -30,10 +30,10 @@ enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth };
enum CopyType { kSeqToPad, kPadToSeq };
inline static size_t MaximumSequenceLength(
const std::vector<size_t>& seq_offset) {
size_t seq_num = seq_offset.size() - 1;
size_t max_seq_len = 0;
inline static uint64_t MaximumSequenceLength(
const std::vector<uint64_t>& seq_offset) {
uint64_t seq_num = seq_offset.size() - 1;
uint64_t max_seq_len = 0;
for (size_t i = 0; i < seq_num; ++i) {
max_seq_len = std::max(max_seq_len, seq_offset[i + 1] - seq_offset[i]);
}
......@@ -42,7 +42,7 @@ inline static size_t MaximumSequenceLength(
inline static void CheckDims(const lite::DDim& seq_tensor_dims,
const lite::DDim& pad_tensor_dims,
const std::vector<size_t>& seq_offset,
const std::vector<uint64_t>& seq_offset,
int64_t padded_seq_len,
int64_t step_width,
const PadLayout& layout) {
......
......@@ -55,7 +55,7 @@ class MaxSeqPoolFunctor {
auto starts = input.lod()[0];
const T* in_data = input.data<T>();
T* out_data = output->mutable_data<T>();
T* out_data = output->template mutable_data<T>();
int* max_index = index->mutable_data<int>();
int64_t num_seq = out_dims[0];
......@@ -103,7 +103,7 @@ class MaxSeqPoolFunctor<T, true> {
auto starts = input.lod()[0];
const T* in_data = input.data<T>();
T* out_data = output->mutable_data<T>();
T* out_data = output->template mutable_data<T>();
int64_t num_seq = out_dims[0];
int64_t dim = output->numel() / num_seq;
......@@ -145,7 +145,7 @@ class MaxSeqPoolGradFunctor {
const T* og_data = out_grad.data<T>();
const int* max_index = index.data<int>();
T* ig_data = in_grad->mutable_data<T>();
T* ig_data = in_grad->template mutable_data<T>();
SetConstant<TARGET(kX86), T> set_zero;
set_zero(context, in_grad, static_cast<T>(0.0));
......@@ -170,7 +170,7 @@ class LastSeqPoolFunctor {
lite::Tensor* output) {
// Create pointers to input and output data
auto* in_data = input.data<T>();
auto* out_data = output->mutable_data<T>();
auto* out_data = output->template mutable_data<T>();
// Calculate the size of each item in sequence
int64_t item_size = input.numel() / input.dims()[0];
......@@ -203,7 +203,7 @@ class FirstSeqPoolFunctor {
lite::Tensor* output) {
// Create pointers to input and output data
auto* in_data = input.data<T>();
auto* out_data = output->mutable_data<T>();
auto* out_data = output->template mutable_data<T>();
// Calculate the size of each item in sequence
int64_t item_size = input.numel() / input.dims()[0];
......@@ -238,7 +238,7 @@ class SumSeqPoolGradFunctor {
int64_t in_w = in_grad->numel() / in_grad->dims()[0];
PADDLE_ENFORCE(in_w == out_w);
const T* out_g_data = out_grad.data<T>();
T* in_g_data = in_grad->mutable_data<T>(TARGET(kX86));
T* in_g_data = in_grad->template mutable_data<T>(TARGET(kX86));
auto blas = math::GetBlas<TARGET(kX86), T>(context);
for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
int64_t h = static_cast<int64_t>(lod[i + 1] - lod[i]);
......@@ -288,7 +288,7 @@ class SequencePoolFunctor<TARGET(kX86), T> {
auto lod = input.lod()[0];
if (pooltype == "SUM") {
const T* src = input.data<T>();
T* dst = output->mutable_data<T>(TARGET(kX86));
T* dst = output->template mutable_data<T>(TARGET(kX86));
jit::seq_pool_attr_t attr(
static_cast<int>(input.numel() / input.dims()[0]),
jit::SeqPoolType::kSum);
......
......@@ -101,13 +101,13 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) {
TEST(SequencePoolingGrad, CPU_SUM) {
paddle::framework::LoD lod1;
lod1.push_back(std::vector<size_t>{0, 10});
lod1.push_back(std::vector<uint64_t>{0, 10});
TestSequencePoolingSum<paddle::platform::CPUDeviceContext,
paddle::platform::CPUPlace,
float>(lod1);
paddle::framework::LoD lod2;
lod2.push_back(std::vector<size_t>{0, 2, 7, 10});
lod2.push_back(std::vector<uint64_t>{0, 2, 7, 10});
TestSequencePoolingSum<paddle::platform::CPUDeviceContext,
paddle::platform::CPUPlace,
float>(lod2);
......@@ -116,13 +116,13 @@ TEST(SequencePoolingGrad, CPU_SUM) {
#ifdef PADDLE_WITH_CUDA
TEST(SequencePoolingGrad, CUDA_SUM) {
paddle::framework::LoD lod1;
lod1.push_back(std::vector<size_t>{0, 10});
lod1.push_back(std::vector<uint64_t>{0, 10});
TestSequencePoolingSum<paddle::platform::CUDADeviceContext,
paddle::platform::CUDAPlace,
float>(lod1);
paddle::framework::LoD lod2;
lod2.push_back(std::vector<size_t>{0, 2, 7, 10});
lod2.push_back(std::vector<uint64_t>{0, 2, 7, 10});
TestSequencePoolingSum<paddle::platform::CUDADeviceContext,
paddle::platform::CUDAPlace,
float>(lod2);
......
......@@ -32,7 +32,7 @@ class ScaleLoDTensorFunctor<lite::TargetType::kX86, T> {
size_t seq_width = seq->dims()[1];
lite::LoD abs_offset_lod = lite::fluid::ToAbsOffset(lod);
T* seq_data = seq->mutable_data<T>(lite::TargetType::kX86);
T* seq_data = seq->template mutable_data<T>(lite::TargetType::kX86);
for (size_t i = 0; i < num_seq; ++i) {
for (size_t j = lod[level][i] * seq_width;
j < lod[level][i + 1] * seq_width;
......
......@@ -83,7 +83,7 @@ class SequenceTopkAvgPoolingFunctor<lite::TargetType::kX86, T> {
auto pos_data = pos->mutable_data<int>(lite::TargetType::kX86);
int offset = 0;
std::vector<size_t> vec_out_lod;
std::vector<uint64_t> vec_out_lod;
vec_out_lod.reserve(batch_size + 1);
for (int i = 0; i <= batch_size; ++i) {
offset = row_lod[i];
......@@ -95,7 +95,7 @@ class SequenceTopkAvgPoolingFunctor<lite::TargetType::kX86, T> {
out->set_lod(lod_temp);
auto in_data = in.data<T>();
auto out_data = out->mutable_data<T>(lite::TargetType::kX86);
auto out_data = out->template mutable_data<T>(lite::TargetType::kX86);
T* sum_data = new T[max_k];
for (int i = 0; i < batch_size; ++i) {
......
......@@ -108,8 +108,8 @@ class SoftmaxFunctor<Target, T, is_test, enable_if_CPU<Target>> {
const int num_remain = num_classes / axis_dim;
if (num_remain == 1 && lite::x86::MayIUse(lite::x86::avx)) {
const T* in_data = X->data<T>();
auto* out_data = Y->mutable_data<T>();
const T* in_data = X->template data<T>();
auto* out_data = Y->template mutable_data<T>();
for (int bs = 0; bs < batch_size; ++bs) {
T max_val = *std::max_element(in_data, in_data + num_classes);
max_val *= static_cast<T>(-1);
......@@ -219,9 +219,9 @@ class SoftmaxGradFunctor<Target, T, enable_if_CPU<Target>> {
const int num_remain = num_classes / axis_dim;
if (num_remain == 1 && lite::x86::MayIUse(lite::x86::avx)) {
const T* out_data = y->data<T>();
const T* out_grad = y_grad->data<T>();
T* in_grad = x_grad->mutable_data<T>();
const T* out_data = y->template data<T>();
const T* out_grad = y_grad->template data<T>();
T* in_grad = x_grad->template mutable_data<T>();
for (int bs = 0; bs < batch_size; ++bs) {
T scalar;
vec_mul_reduce<T, lite::x86::avx>(
......
......@@ -104,12 +104,12 @@ class Tree2ColFunctor<lite::TargetType::kX86, T> {
patch_size = processing_list.size();
// T *patch_data =
// patch->mutable_data<T>({static_cast<int64_t>(patch_size),
// patch->template mutable_data<T>({static_cast<int64_t>(patch_size),
// static_cast<int64_t>(patch_elem_size)},
// cpu_place);
patch->Resize({static_cast<int64_t>(patch_size),
static_cast<int64_t>(patch_elem_size)});
auto *patch_data = patch->mutable_data<T>(lite::TargetType::kX86);
auto *patch_data = patch->template mutable_data<T>(lite::TargetType::kX86);
constant(context, patch, 0);
const T *features = node_features.data<T>();
......@@ -166,12 +166,12 @@ class Col2TreeFunctor<lite::TargetType::kX86, T> {
}
}
// T *grad_data =
// in_grad->mutable_data<T>({static_cast<int64_t>(node_count),
// in_grad->template mutable_data<T>({static_cast<int64_t>(node_count),
// static_cast<int64_t>(grad_elem_size)},
// cpu_place);
in_grad->Resize({static_cast<int64_t>(node_count),
static_cast<int64_t>(grad_elem_size)});
auto *grad_data = in_grad->mutable_data<T>(lite::TargetType::kX86);
auto *grad_data = in_grad->template mutable_data<T>(lite::TargetType::kX86);
constant(context, in_grad, 0);
const T *out_g = out_grad.data<T>();
......
......@@ -36,7 +36,7 @@ class Unpool2dMaxFunctor<lite::TargetType::kX86, T> {
int output_feasize = output_height * output_width;
const T* input_data = input.data<T>();
const int* indices_data = indices.data<int>();
T* output_data = output->mutable_data<T>(lite::TargetType::kX86);
T* output_data = output->template mutable_data<T>(lite::TargetType::kX86);
for (int b = 0; b < batch_size; ++b) {
for (int c = 0; c < output_channels; ++c) {
for (int i = 0; i < input_feasize; ++i) {
......@@ -70,7 +70,8 @@ class Unpool2dMaxGradFunctor<lite::TargetType::kX86, T> {
int output_feasize = output_height * output_width;
const int* indices_data = indices.data<int>();
const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(lite::TargetType::kX86);
T* input_grad_data =
input_grad->template mutable_data<T>(lite::TargetType::kX86);
for (int b = 0; b < batch_size; ++b) {
for (int c = 0; c < output_channels; ++c) {
......
......@@ -75,7 +75,7 @@ class Vol2ColFunctor<lite::TargetType::kX86, T> {
"mismatching.");
const T* vol_data = vol.data<T>();
T* col_data = col->mutable_data<T>();
T* col_data = col->template mutable_data<T>();
for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width;
......@@ -159,7 +159,7 @@ class Col2VolFunctor<lite::TargetType::kX86, T> {
output_width,
"input_width and output_width are "
"mismatching.");
T* vol_data = vol->mutable_data<T>();
T* vol_data = vol->template mutable_data<T>();
const T* col_data = col.data<T>();
for (int c = 0; c < channels_col; ++c) {
......
......@@ -19,7 +19,7 @@
namespace paddle {
namespace lite {
namespace fluid {
using LoD = std::vector<std::vector<size_t>>;
using LoD = std::vector<std::vector<uint64_t>>;
static LoD ToAbsOffset(const LoD &in) {
// the lowest level stores relative offsets
......
......@@ -231,8 +231,8 @@ class SoftsignCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
// auto& context = ctx_->As<X86Context>();
auto& param = *param_.get_mutable<operators::ActivationParam>();
const T* x_data = param.X->data<T>();
T* out_data = param.Out->mutable_data<T>();
const T* x_data = param.X->template data<T>();
T* out_data = param.Out->template mutable_data<T>();
size_t x_size = param.X->numel();
for (size_t i = 0; i < x_size; i++) {
out_data[i] = x_data[i] / (static_cast<T>(1) + std::abs(x_data[i]));
......
......@@ -45,9 +45,9 @@ class AttentionPaddingMaskCompute
auto src_len = static_cast<int64_t>(bottom1->lod()[0][1]);
const int att_batch = bottom0->lod()[0].size() - 1;
const int src_batch = bottom1->lod()[0].size() - 1;
int* pad_begin = _pad_begin->mutable_data<int>();
int* pad_begin = _pad_begin->template mutable_data<int>();
for (int i = 0; i < src_batch; ++i) {
const auto* src_data = bottom1->data<T>() + src_len * i;
const auto* src_data = bottom1->template data<T>() + src_len * i;
int index = src_len - 1;
for (; index >= 0 && _pad_id == static_cast<int>(src_data[index]);
--index) {
......@@ -56,13 +56,14 @@ class AttentionPaddingMaskCompute
}
const auto att_len = static_cast<int64_t>(bottom0->lod()[0][1]);
auto* top_data = top->mutable_data<T>();
auto* top_data = top->template mutable_data<T>();
memcpy(top_data,
bottom0->data<T>(),
bottom0->template data<T>(),
bottom0->dims()[0] * bottom0->dims()[1] * sizeof(T));
for (int i = 0; i < att_batch; ++i) {
for (int j = 0; j < att_len; ++j) {
top_data = top->mutable_data<T>() + src_len * (att_len * i + j);
top_data =
top->template mutable_data<T>() + src_len * (att_len * i + j);
int src_idx = i % src_batch;
for (int k = pad_begin[src_idx]; k < src_len; ++k) {
top_data[k] = _mask;
......
......@@ -59,26 +59,26 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
const int sample_size = x->dims().production() / N / C;
// alloc memory
param.y->mutable_data<T>();
param.y->template mutable_data<T>();
if (!param.is_test) {
param.mean_out->mutable_data<T>();
param.variance_out->mutable_data<T>();
param.saved_mean->mutable_data<T>();
param.saved_variance->mutable_data<T>();
param.mean_out->template mutable_data<T>();
param.variance_out->template mutable_data<T>();
param.saved_mean->template mutable_data<T>();
param.saved_variance->template mutable_data<T>();
}
if (!global_stats) {
// saved_xx is use just in this batch of data
EigenVectorArrayMap<T> saved_mean_e(param.saved_mean->mutable_data<T>(),
C);
EigenVectorArrayMap<T> saved_mean_e(
param.saved_mean->template mutable_data<T>(), C);
EigenVectorArrayMap<T> saved_variance_e(
param.saved_variance->mutable_data<T>(), C);
param.saved_variance->template mutable_data<T>(), C);
saved_mean_e.setZero();
saved_variance_e.setZero();
EigenVectorArrayMap<T> running_mean_arr(param.mean_out->mutable_data<T>(),
C);
EigenVectorArrayMap<T> running_mean_arr(
param.mean_out->template mutable_data<T>(), C);
EigenVectorArrayMap<T> running_var_arr(
param.variance_out->mutable_data<T>(), C);
param.variance_out->template mutable_data<T>(), C);
if ((N * sample_size) == 1) {
LOG(WARNING) << "Only 1 element in normalization dimension, "
......@@ -89,7 +89,8 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
switch (param.data_layout) {
case DATALAYOUT(kNCHW): {
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, N * C);
ConstEigenArrayMap<T> x_arr(
x->template data<T>(), sample_size, N * C);
for (int nc = 0; nc < N * C; ++nc) {
saved_mean_e(nc % C) += x_arr.col(nc).sum();
}
......@@ -115,33 +116,37 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
// use SavedMean and SavedVariance to do normalize
Eigen::Array<T, Eigen::Dynamic, 1> inv_std(C);
if (global_stats) {
ConstEigenVectorArrayMap<T> var_arr(param.variance->data<T>(), C);
ConstEigenVectorArrayMap<T> var_arr(param.variance->template data<T>(),
C);
inv_std = (var_arr + param.epsilon).sqrt().inverse();
} else {
EigenVectorArrayMap<T> saved_inv_std(
param.saved_variance->mutable_data<T>(), C);
param.saved_variance->template mutable_data<T>(), C);
// inverse SavedVariance first, gradient will use it too.
saved_inv_std = (saved_inv_std + param.epsilon).inverse().sqrt();
inv_std = saved_inv_std;
}
ConstEigenVectorArrayMap<T> mean_arr(
global_stats ? param.mean->data<T>() : param.saved_mean->data<T>(), C);
global_stats ? param.mean->template data<T>()
: param.saved_mean->template data<T>(),
C);
// ((x - est_mean) * (inv_var) * scale + bias
// formula transform ====>
// (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
ConstEigenVectorArrayMap<T> scale_arr(param.scale->data<T>(), C);
ConstEigenVectorArrayMap<T> bias_arr(param.bias->data<T>(), C);
ConstEigenVectorArrayMap<T> scale_arr(param.scale->template data<T>(), C);
ConstEigenVectorArrayMap<T> bias_arr(param.bias->template data<T>(), C);
Eigen::Array<T, Eigen::Dynamic, 1> new_scale = inv_std * scale_arr;
Eigen::Array<T, Eigen::Dynamic, 1> new_bias =
bias_arr - mean_arr * inv_std * scale_arr;
switch (param.data_layout) {
case DATALAYOUT(kNCHW): {
EigenArrayMap<T> y_arr(param.y->mutable_data<T>(), sample_size, N * C);
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, N * C);
EigenArrayMap<T> y_arr(
param.y->template mutable_data<T>(), sample_size, N * C);
ConstEigenArrayMap<T> x_arr(x->template data<T>(), sample_size, N * C);
for (int nc = 0; nc < N * C; ++nc) {
y_arr.col(nc) = x_arr.col(nc) * new_scale(nc % C) + new_bias(nc % C);
}
......
......@@ -47,7 +47,7 @@ class ConcatCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
int64_t axis = static_cast<int64_t>(param.axis);
auto* axis_tensor = param.axis_tensor;
if (axis_tensor != nullptr) {
auto* axis_tensor_data = axis_tensor->data<int>();
auto* axis_tensor_data = axis_tensor->template data<int>();
axis = static_cast<int64_t>(axis_tensor_data[0]);
}
......@@ -60,7 +60,7 @@ class ConcatCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
int concat_input_size = count(axis + 1, x_dims.size(), x_dims);
const int top_concat_axis = out->dims()[axis];
for (size_t i = 0; i < param.x.size(); ++i) {
const T* bottom_data = param.x[i]->data<T>();
const T* bottom_data = param.x[i]->template data<T>();
const int64_t bottom_concat_axis = param.x[i]->dims()[axis];
for (int n = 0; n < num_concat; ++n) {
std::memcpy(
......
......@@ -52,7 +52,7 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto& context = ctx_->As<X86Context>();
auto& param = *param_.get_mutable<operators::ConvParam>();
lite::Tensor filter = *param.filter;
param.output->mutable_data<T>();
param.output->template mutable_data<T>();
const int batch_size = static_cast<int>(param.x->dims()[0]);
std::vector<int64_t> filter_shape_vec(filter.dims().Vectorize());
......@@ -95,9 +95,9 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto blas =
paddle::lite::x86::math::GetBlas<lite::TargetType::kX86, T>(context);
for (int i = 0; i < batch_size; i++) {
lite::Tensor in_batch = param.x->Slice<T>(i, i + 1);
lite::Tensor in_batch = param.x->template Slice<T>(i, i + 1);
in_batch.Resize(input_shape);
lite::Tensor out_batch = param.output->Slice<T>(i, i + 1);
lite::Tensor out_batch = param.output->template Slice<T>(i, i + 1);
out_batch.Resize(output_matrix_shape);
for (int g = 0; g < param.groups; g++) {
lite::Tensor in_slice =
......
......@@ -38,10 +38,10 @@ class DropoutCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
using param_t = operators::DropoutParam;
void Run() override {
auto& param = *param_.get_mutable<operators::DropoutParam>();
const auto* x_data = param.x->data<T>();
auto* out_data = param.output->mutable_data<T>();
const auto* x_data = param.x->template data<T>();
auto* out_data = param.output->template mutable_data<T>();
if (!param.is_test) {
auto* mask_data = param.mask->mutable_data<T>();
auto* mask_data = param.mask->template mutable_data<T>();
std::random_device rnd;
std::minstd_rand engine;
int seed = param.fix_seed ? param.seed : rnd();
......
......@@ -248,8 +248,8 @@ class TransformFunctor {
lite::Tensor *z,
const lite::Context<Target> &ctx,
Functor func)
: x_(x->data<T>()),
y_(y->data<T>()),
: x_(x->template data<T>()),
y_(y->template data<T>()),
z_(z->mutable_data<OutType>()),
nx_(x->numel()),
ctx_(ctx),
......@@ -483,9 +483,10 @@ void FusedElemwiseAndActComputeNoBroadcast(const lite::Context<Target> &ctx,
x.data<T>(),
y.data<T>(),
compound_functor,
out->mutable_data<T>(),
intermediate_out == nullptr ? nullptr
: intermediate_out->mutable_data<T>()});
out->template mutable_data<T>(),
intermediate_out == nullptr
? nullptr
: intermediate_out->template mutable_data<T>()});
}
template <lite::TargetType Target,
......@@ -523,9 +524,10 @@ void FusedElemwiseAndActComputeWithBroadcast(const lite::Context<Target> &ctx,
compound_functor,
h,
w,
out->mutable_data<T>(),
intermediate_out == nullptr ? nullptr
: intermediate_out->mutable_data<T>());
out->template mutable_data<T>(),
intermediate_out == nullptr
? nullptr
: intermediate_out->template mutable_data<T>());
} else {
FusedElemwiseAndActBroadcast2CPU<T,
......@@ -539,9 +541,10 @@ void FusedElemwiseAndActComputeWithBroadcast(const lite::Context<Target> &ctx,
n,
post,
compound_functor,
out->mutable_data<T>(),
intermediate_out == nullptr ? nullptr
: intermediate_out->mutable_data<T>());
out->template mutable_data<T>(),
intermediate_out == nullptr
? nullptr
: intermediate_out->template mutable_data<T>());
}
}
......
......@@ -140,9 +140,9 @@ class FcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
int M = output->dims().production() / w_dims1;
const T* input_data = input->data<T>();
const T* w_data = w->data<T>();
T* output_data = output->mutable_data<T>();
const T* input_data = input->template data<T>();
const T* w_data = w->template data<T>();
T* output_data = output->template mutable_data<T>();
auto& context = ctx_->As<X86Context>();
FCFunctor<lite::TargetType::kX86, T> fc;
......@@ -153,7 +153,7 @@ class FcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
input_data,
w_data,
output_data,
bias ? bias->data<T>() : NULL,
bias ? bias->template data<T>() : NULL,
with_relu,
padding_weights);
}
......
......@@ -42,9 +42,9 @@ class FillConstantBatchSizeLikeCompute
int output_dim_idx = param.output_dim_idx;
odims[output_dim_idx] = static_cast<int>(in->lod().back().size()) - 1;
out->Resize(odims);
// out->mutable_data<T>();
// out->template mutable_data<T>();
}
out->mutable_data<T>();
out->template mutable_data<T>();
auto value = param.value;
paddle::lite::x86::math::SetConstant<lite::TargetType::kX86, T> setter;
......
......@@ -50,9 +50,9 @@ void CPUGather(const lite::Tensor* src,
auto src_dims = src->dims();
const T* p_src = src->data<T>();
const T* p_src = src->template data<T>();
const IndexT* p_index = index->data<IndexT>();
T* p_output = output->mutable_data<T>();
T* p_output = output->template mutable_data<T>();
// slice size
int slice_size = 1;
......@@ -77,7 +77,7 @@ class GatherCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto index = param.Index;
auto out = param.Out;
out->mutable_data<T>();
out->template mutable_data<T>();
if (x->dims().production() == 0) return;
/*
* Since there's no type defined for lite::Tensor in Paddle-Lite, then
......
......@@ -44,7 +44,7 @@ inline void ReorderInitState(const lite::Context<TARGET(kX86)>& context,
bool indexed_src) {
lite::x86::math::CopyMatrixRowsFunctor<TARGET(kX86), T> row_shuffle;
dst->Resize(src.dims());
dst->mutable_data<T>();
dst->template mutable_data<T>();
row_shuffle(context, src, index_lod, dst, indexed_src);
}
......@@ -65,18 +65,19 @@ class GRUCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto* input = param.input;
auto* h0 = param.h0;
auto* weight = param.weight;
const T* weight_data = weight->data<T>();
const T* weight_data = weight->template data<T>();
auto* bias = param.bias;
auto* batch_gate = param.batch_gate;
auto* batch_reset_hidden_prev = param.batch_reset_hidden_prev;
auto* batch_hidden = param.batch_hidden;
T* batch_gate_ptr = batch_gate->mutable_data<T>();
T* batch_reset_hidden_prev_ptr = batch_reset_hidden_prev->mutable_data<T>();
T* batch_hidden_ptr = batch_hidden->mutable_data<T>();
T* batch_gate_ptr = batch_gate->template mutable_data<T>();
T* batch_reset_hidden_prev_ptr =
batch_reset_hidden_prev->template mutable_data<T>();
T* batch_hidden_ptr = batch_hidden->template mutable_data<T>();
auto* hidden = param.hidden;
hidden->mutable_data<T>();
hidden->template mutable_data<T>();
const auto& hidden_dims = hidden->dims();
......@@ -99,7 +100,7 @@ class GRUCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
// Since the batch computing for GRU reorders the input sequences
// according to their length. The initialized cell state also needs
// to reorder.
const std::vector<size_t>& order(batch_gate->lod()[2]);
const std::vector<uint64_t>& order(batch_gate->lod()[2]);
ReorderInitState<T>(context, *h0, order, &ordered_h0, true);
gru_value.prev_out_value = ordered_h0.mutable_data<T>();
} else {
......
......@@ -47,9 +47,9 @@ class LayerNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto x_dims = x->dims();
y->mutable_data<T>();
Mean->mutable_data<T>();
Var->mutable_data<T>();
y->template mutable_data<T>();
Mean->template mutable_data<T>();
Var->template mutable_data<T>();
auto matrix_dim = x_dims.Flatten2D(begin_norm_axis);
int left = static_cast<int>(matrix_dim[0]);
......@@ -73,10 +73,10 @@ class LayerNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
.At(right);
ker(in.mutable_data<T>(),
out.mutable_data<T>(),
Mean->mutable_data<T>(),
Var->mutable_data<T>(),
Scale->data<T>(),
Bias->data<T>(),
Mean->template mutable_data<T>(),
Var->template mutable_data<T>(),
Scale->template data<T>(),
Bias->template data<T>(),
static_cast<int>(left),
epsilon,
right);
......
......@@ -33,15 +33,15 @@ class LookupTableCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto *ids_t = param.Ids;
auto *output_t = param.Out;
int64_t padding_idx = param.padding_idx;
const int64_t *ids = ids_t->data<int64_t>();
const int64_t *ids = ids_t->template data<int64_t>();
int64_t ids_numel = ids_t->dims().production();
auto *table_t = param.W;
int64_t row_number = table_t->dims()[0];
int64_t row_width = table_t->dims()[1];
const T *table = table_t->data<T>();
T *output = output_t->mutable_data<T>();
const T *table = table_t->template data<T>();
T *output = output_t->template mutable_data<T>();
memset(output, 0, output_t->dims().production() * sizeof(T));
for (int64_t i = 0; i < ids_numel; ++i) {
if (padding_idx != -1 && ids[i] == padding_idx) {
......
......@@ -35,7 +35,7 @@ void MatchMatrixTensorCompute<T>::Run() {
const auto& offset_l = x->lod()[0];
const auto& offset_r = y->lod()[0];
std::vector<size_t> top_offset;
std::vector<uint64_t> top_offset;
int top_size = 0;
top_offset.push_back(top_size);
for (size_t b = 0; b < x->lod()[0].size() - 1; b++) {
......@@ -97,9 +97,9 @@ void MatchMatrixTensorCompute<T>::Run() {
int batch_size = x->lod()[0].size() - 1;
int lod_lv1_size = batch_size * dim_t;
int lod_lv2_size = x->lod()[0].back() * dim_t;
std::vector<size_t> out_lod0(batch_size + 1, 0);
std::vector<size_t> out_lod1(lod_lv1_size + 1, 0);
std::vector<size_t> out_lod2(lod_lv2_size + 1, 0);
std::vector<uint64_t> out_lod0(batch_size + 1, 0);
std::vector<uint64_t> out_lod1(lod_lv1_size + 1, 0);
std::vector<uint64_t> out_lod2(lod_lv2_size + 1, 0);
for (int i = 0; i < batch_size; i++) {
out_lod0[i + 1] = out_lod0[i] + dim_t;
int len_l = offset_l[i + 1] - offset_l[i];
......
......@@ -56,7 +56,7 @@ class MatMulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto *x = param.X;
auto *y = param.Y;
auto *out = param.Out;
out->mutable_data<T>();
out->template mutable_data<T>();
auto blas = lite::x86::math::GetBlas<lite::TargetType::kX86, T>(context);
auto mat_dim_a = lite::x86::math::CreateMatrixDescriptor(
......
......@@ -64,7 +64,7 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
y_matrix = *y;
}
z->mutable_data<T>();
z->template mutable_data<T>();
auto z_dim = z->dims();
if (z_dim.size() != 2) {
z->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
......
......@@ -49,7 +49,7 @@ class ReduceSumCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
bool reduce_all = param.reduce_all;
auto* input = param.x;
auto* output = param.output;
param.output->mutable_data<T>();
param.output->template mutable_data<T>();
const auto& dims = param.dim;
bool keep_dim = param.keep_dim;
......
......@@ -41,8 +41,8 @@ class ScaleCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void Run() override {
auto& param = *param_.get_mutable<param_t>();
scale_compute(param.x->data<T>(),
param.output->mutable_data<T>(),
scale_compute(param.x->template data<T>(),
param.output->template mutable_data<T>(),
param.x->dims().production(),
param.scale,
param.bias,
......
......@@ -84,7 +84,7 @@ void SearchGrnnCompute<T>::PrepareLayout(const Tensor* input_blob) {
int max_width = width_data[idx_sorted_by_width_data[0]];
// start of reorganizing the input
std::vector<size_t> new_offset;
std::vector<uint64_t> new_offset;
new_offset.resize(max_width + 1);
new_offset[0] = 0;
......
......@@ -50,7 +50,7 @@ class SearchGroupPaddingCompute
}
}
std::vector<size_t> new_offset;
std::vector<uint64_t> new_offset;
new_offset.resize(batch + 1);
for (int i = 0; i < batch + 1; ++i) {
new_offset[i] = i * max_seq;
......@@ -67,7 +67,7 @@ class SearchGroupPaddingCompute
top1_lod.push_back(offset);
top1->set_lod(top1_lod);
top1->Resize({dim0, 1});
memset(top1->mutable_data<T>(),
memset(top1->template mutable_data<T>(),
0,
top1->dims()[0] * top1->dims()[1] * sizeof(T));
// for padding input id
......@@ -76,9 +76,9 @@ class SearchGroupPaddingCompute
top2->set_lod(top2_lod);
top2->Resize({batch * max_seq, 1});
// copy data
const auto* bottom_data = bottom0->data<T>();
auto* top_data = top0->mutable_data<T>();
auto* top_padding_input_data = top2->mutable_data<T>();
const auto* bottom_data = bottom0->template data<T>();
auto* top_data = top0->template mutable_data<T>();
auto* top_padding_input_data = top2->template mutable_data<T>();
for (int i = 0; i < batch; i++) {
const int copy_step = offset[i + 1] - offset[i];
const int start = i * max_seq;
......
......@@ -58,8 +58,10 @@ class SearchSeqFcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
int M = x_dims[0];
int N = w_dims[0];
for (int i = 0; i < M; i++) {
blas.AXPY(
N, static_cast<T>(1), b->data<T>(), out->mutable_data<T>() + i * N);
blas.AXPY(N,
static_cast<T>(1),
b->template data<T>(),
out->template mutable_data<T>() + i * N);
}
}
}
......
......@@ -39,9 +39,9 @@ class SequenceArithmeticCompute
out->Resize(x->dims());
out->set_lod(x->lod());
auto x_data = x->data<T>();
auto y_data = y->data<T>();
auto out_data = out->mutable_data<T>();
auto x_data = x->template data<T>();
auto y_data = y->template data<T>();
auto out_data = out->template mutable_data<T>();
auto x_seq_offset = x->lod()[0];
auto y_seq_offset = y->lod()[0];
int seq_num = x_seq_offset.size() - 1;
......
......@@ -25,7 +25,7 @@ namespace x86 {
template <typename T>
inline LoD ConcatLoD(const std::vector<lite::Tensor*>& xs,
std::vector<lite::Tensor>* xs_in_order) {
std::vector<size_t> result;
std::vector<uint64_t> result;
result.resize(xs[0]->lod()[0].size());
for (size_t i = 1; i < result.size(); ++i) {
......@@ -75,7 +75,7 @@ class SequenceConcatCompute
out_dims[0] = batch_size;
param.Out->Resize(out_dims);
T* dout = param.Out->mutable_data<T>();
T* dout = param.Out->template mutable_data<T>();
std::vector<lite::Tensor> x_in_order;
param.Out->set_lod(ConcatLoD<T>(param.X, &x_in_order));
......
......@@ -26,7 +26,7 @@ namespace x86 {
namespace {
inline LoD ConcatLoD(const std::vector<lite::Tensor*>& xs,
std::vector<lite::Tensor>* xs_in_order) {
std::vector<size_t> result;
std::vector<uint64_t> result;
result.resize(xs[0]->lod()[0].size());
for (size_t i = 1; i < result.size(); ++i) {
......
......@@ -29,9 +29,10 @@ using Tensor = lite::Tensor;
template <typename T>
struct SequenceExpandFunctor {
void operator()(const Tensor &x,
const std::vector<size_t> &ref_lod, /*expand referenced lod*/
Tensor *out) {
void operator()(
const Tensor &x,
const std::vector<uint64_t> &ref_lod, /*expand referenced lod*/
Tensor *out) {
int64_t hight = x.dims()[0];
int64_t width = x.data_size() / hight;
......@@ -39,13 +40,13 @@ struct SequenceExpandFunctor {
T *out_data = out->mutable_data<T, T>();
for (int h_id = 0; h_id < hight; ++h_id) {
size_t span = ref_lod[h_id + 1] - ref_lod[h_id];
uint64_t span = ref_lod[h_id + 1] - ref_lod[h_id];
if (span == 0) continue;
const T *src = in_data + h_id * width;
for (int64_t w_id = 0; w_id < width; ++w_id) {
for (uint64_t w_id = 0; w_id < width; ++w_id) {
T ele = src[w_id];
size_t offset = ref_lod[h_id] * width;
for (size_t k = 0; k < span; ++k) {
for (uint64_t k = 0; k < span; ++k) {
out_data[offset + k * width + w_id] = ele;
}
}
......@@ -68,7 +69,7 @@ class SequenceExpandAsCompute
CHECK_EQ(y_lod.size(), 1);
CHECK_GT(y_lod[0].size(), 1);
out->mutable_data<T, T>();
out->template mutable_data<T, T>();
SequenceExpandFunctor<T> seq_espand_functor;
seq_espand_functor(*x, y_lod[0], out);
......
......@@ -40,7 +40,7 @@ class SequencePoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
dims[0] = lod[0].size() - 1;
out->Resize({dims});
out->mutable_data<T>();
out->template mutable_data<T>();
lite::Tensor* index = nullptr;
const bool is_test = true;
......
......@@ -64,9 +64,9 @@ class SequenceReshapeCompute
out->Resize(std::vector<int64_t>{static_cast<int64_t>(out->lod()[0].back()),
out_width});
auto* dst_ptr = out->mutable_data<T>();
auto* dst_ptr = out->template mutable_data<T>();
auto size = in->numel() * sizeof(T);
std::memcpy(dst_ptr, in->data<T>(), size);
std::memcpy(dst_ptr, in->template data<T>(), size);
}
virtual ~SequenceReshapeCompute() = default;
......
......@@ -29,7 +29,7 @@ class ShapeCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void Run() override {
auto& param = *param_.get_mutable<operators::ShapeParam>();
// auto& context = context_->As<X86Context>();
auto out_data = param.Out->mutable_data<int32_t>();
auto out_data = param.Out->template mutable_data<int32_t>();
auto in_dims = param.X->dims();
for (int i = 0; i < in_dims.size(); ++i) {
out_data[i] = in_dims[i];
......
......@@ -58,7 +58,7 @@ class SoftmaxCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto* x = param.x;
auto* output = param.output;
output->mutable_data<T>();
output->template mutable_data<T>();
const int rank = x->dims().size();
const int axis = CanonicalAxis(param.axis, rank);
......
......@@ -35,8 +35,8 @@ class SqueezeCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto x = param.X;
auto output = param.Out;
auto x_dims = x->dims();
auto* x_data = x->data<T>();
auto* out_data = output->mutable_data<T>();
auto* x_data = x->template data<T>();
auto* out_data = output->template mutable_data<T>();
memcpy(out_data, x_data, x_dims.production() * sizeof(T));
}
......@@ -54,9 +54,9 @@ class Squeeze2Compute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto output = param.Out;
auto xshape = param.XShape;
auto x_dims = x->dims();
auto* x_data = x->data<T>();
auto* out_data = output->mutable_data<T>();
auto* xshape_data = xshape->mutable_data<T>();
auto* x_data = x->template data<T>();
auto* out_data = output->template mutable_data<T>();
auto* xshape_data = xshape->template mutable_data<T>();
memcpy(out_data, x_data, x_dims.production() * sizeof(T));
memcpy(xshape_data, x_data, x_dims.production() * sizeof(T));
}
......
......@@ -40,9 +40,9 @@ class StackCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
if (axis < 0) axis += (x[0]->dims().size() + 1);
int n = static_cast<int>(x.size());
auto y_data = y->mutable_data<T>();
auto y_data = y->template mutable_data<T>();
std::vector<const T*> x_datas(n);
for (int i = 0; i < n; ++i) x_datas[i] = x[i]->data<T>();
for (int i = 0; i < n; ++i) x_datas[i] = x[i]->template data<T>();
int pre = 1, post = 1;
auto dim = x[0]->dims();
......
......@@ -73,7 +73,7 @@ class TransposeCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto& param = *param_.get_mutable<param_t>();
auto* x = param.x;
auto* out = param.output;
out->mutable_data<T>();
out->template mutable_data<T>();
int ndims = param.axis.size();
auto& context = ctx_->As<X86Context>();
TransCompute<lite::TargetType::kX86, T>(
......@@ -92,7 +92,7 @@ class Transpose2Compute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto& param = *param_.get_mutable<param_t>();
auto* x = param.x;
auto* out = param.output;
out->mutable_data<T>();
out->template mutable_data<T>();
int ndims = param.axis.size();
auto& context = ctx_->As<X86Context>();
TransCompute<lite::TargetType::kX86, T>(
......
......@@ -34,8 +34,8 @@ class UniformRandomCompute
auto *param_out = &param.Out->raw_tensor();
T *data =
param_out->mutable_data<T>(context.x86_device_context()->GetPlace());
T *data = param_out->template mutable_data<T>(
context.x86_device_context()->GetPlace());
unsigned int seed = static_cast<unsigned int>(param.seed);
std::minstd_rand engine;
......
......@@ -80,7 +80,7 @@ class VarConv2DCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
std::vector<int64_t> col_dims_vec{top_size};
col_dims_vec.push_back(1);
col->Resize(col_dims_vec);
auto* top_data = col->mutable_data<T>();
auto* top_data = col->template mutable_data<T>();
const auto* bottom_data = input.data<T>();
int kernel_win_size = kernel_h * kernel_w;
......@@ -149,7 +149,7 @@ class VarConv2DCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
// const auto& offset_y = in_row->lod()[0];
const auto& offset_y = param.X->lod()[1];
const auto& offset_x = param.X->lod()[2];
std::vector<size_t> top_offset;
std::vector<uint64_t> top_offset;
int top_size = 0;
top_offset.push_back(top_size);
for (int b = 0; b < batch; ++b) {
......@@ -178,9 +178,9 @@ class VarConv2DCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
std::vector<int64_t> top_dims_vec{top_size};
top_dims_vec.push_back(1);
top->Resize(top_dims_vec);
auto* top_data = top->mutable_data<T>();
const auto* w_data = w->data<T>();
const auto* col_data = col->data<T>();
auto* top_data = top->template mutable_data<T>();
const auto* w_data = w->template data<T>();
const auto* col_data = col->template data<T>();
auto blas = lite::x86::math::GetBlas<lite::TargetType::kX86, T>(context);
for (int b = 0; b < batch; ++b) {
......
......@@ -140,7 +140,7 @@ static void var_conv_2d_ref(const lite::Tensor* bottom,
const auto& col_offset = col->lod()[0];
const auto& offset_x = in_col->lod()[0];
const auto& offset_y = in_row->lod()[0];
std::vector<size_t> top_offset;
std::vector<uint64_t> top_offset;
int top_size = 0;
top_offset.push_back(top_size);
for (int b = 0; b < batch; ++b) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册