未验证 提交 77734ce7 编写于 作者: H huzhiqiang 提交者: GitHub

[x86] Fix x86 code style (#3287)

上级 720590c9
...@@ -10,6 +10,7 @@ if (LITE_ON_TINY_PUBLISH) ...@@ -10,6 +10,7 @@ if (LITE_ON_TINY_PUBLISH)
endif() endif()
set(light_lib_DEPS light_api paddle_api paddle_api_light) set(light_lib_DEPS light_api paddle_api paddle_api_light)
if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR LITE_WITH_BM OR ARM_TARGET_OS STREQUAL "android" OR ARM_TARGET_OS STREQUAL "armlinux")) if ((NOT LITE_ON_TINY_PUBLISH) AND (LITE_WITH_CUDA OR LITE_WITH_X86 OR LITE_WITH_BM OR ARM_TARGET_OS STREQUAL "android" OR ARM_TARGET_OS STREQUAL "armlinux"))
#full api dynamic library #full api dynamic library
lite_cc_library(paddle_full_api_shared SHARED SRCS paddle_api.cc light_api.cc cxx_api.cc cxx_api_impl.cc light_api_impl.cc lite_cc_library(paddle_full_api_shared SHARED SRCS paddle_api.cc light_api.cc cxx_api.cc cxx_api_impl.cc light_api_impl.cc
...@@ -264,8 +265,6 @@ if (NOT LITE_ON_TINY_PUBLISH) ...@@ -264,8 +265,6 @@ if (NOT LITE_ON_TINY_PUBLISH)
NPU_DEPS ${npu_kernels} NPU_DEPS ${npu_kernels}
CL_DEPS ${opencl_kernels} CL_DEPS ${opencl_kernels}
FPGA_DEPS ${fpga_kernels} FPGA_DEPS ${fpga_kernels}
CV_DEPS paddle_cv_arm
NPU_DEPS ${npu_kernels}
BM_DEPS ${bm_kernels}) BM_DEPS ${bm_kernels})
# The final inference library for just MobileConfig. # The final inference library for just MobileConfig.
bundle_static_library(paddle_api_full paddle_api_full_bundled bundle_full_api) bundle_static_library(paddle_api_full paddle_api_full_bundled bundle_full_api)
......
...@@ -96,8 +96,8 @@ class BeamSearchFunctor<TARGET(kX86), T> { ...@@ -96,8 +96,8 @@ class BeamSearchFunctor<TARGET(kX86), T> {
// : nullptr; // : nullptr;
// fill in data // fill in data
std::vector<size_t> low_level; std::vector<uint64_t> low_level;
size_t low_offset = 0; uint64_t low_offset = 0;
for (auto &items : selected_items) { for (auto &items : selected_items) {
low_level.push_back(low_offset); low_level.push_back(low_offset);
for (auto &item : items) { for (auto &item : items) {
......
...@@ -22,8 +22,8 @@ void PrepareCPUTensors(paddle::framework::LoDTensor* ids, ...@@ -22,8 +22,8 @@ void PrepareCPUTensors(paddle::framework::LoDTensor* ids,
paddle::framework::LoDTensor* pre_scores) { paddle::framework::LoDTensor* pre_scores) {
// lod // lod
paddle::framework::LoD lod; paddle::framework::LoD lod;
std::vector<size_t> level0({0, 2, 4}); std::vector<uint64_t> level0({0, 2, 4});
std::vector<size_t> level1({0, 1, 2, 3, 4}); std::vector<uint64_t> level1({0, 1, 2, 3, 4});
lod.push_back(level0); lod.push_back(level0);
lod.push_back(level1); lod.push_back(level1);
ids->set_lod(lod); ids->set_lod(lod);
......
...@@ -483,7 +483,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a, ...@@ -483,7 +483,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a,
mat_a.data<T>(), mat_a.data<T>(),
mat_b.data<T>(), mat_b.data<T>(),
beta, beta,
mat_out->mutable_data<T>()); mat_out->template mutable_data<T>());
} }
template <> template <>
...@@ -759,7 +759,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a, ...@@ -759,7 +759,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a,
mat_a.data<T>(), mat_a.data<T>(),
mat_b.data<T>(), mat_b.data<T>(),
beta, beta,
mat_out->mutable_data<T>()); mat_out->template mutable_data<T>());
} else { } else {
PADDLE_ENFORCE(dim_a.batch_size_ == dim_b.batch_size_ || PADDLE_ENFORCE(dim_a.batch_size_ == dim_b.batch_size_ ||
dim_a.batch_size_ == 0 || dim_b.batch_size_ == 0); dim_a.batch_size_ == 0 || dim_b.batch_size_ == 0);
...@@ -773,7 +773,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a, ...@@ -773,7 +773,7 @@ void Blas<Target>::MatMul(const lite::Tensor &mat_a,
mat_a.data<T>(), mat_a.data<T>(),
mat_b.data<T>(), mat_b.data<T>(),
beta, beta,
mat_out->mutable_data<T>(), mat_out->template mutable_data<T>(),
dim_a.batch_size_ == 0 ? dim_b.batch_size_ : dim_a.batch_size_, dim_a.batch_size_ == 0 ? dim_b.batch_size_ : dim_a.batch_size_,
dim_a.stride_, dim_a.stride_,
dim_b.stride_); dim_b.stride_);
......
...@@ -51,7 +51,7 @@ class ConcatFunctor<lite::TargetType::kX86, T> { ...@@ -51,7 +51,7 @@ class ConcatFunctor<lite::TargetType::kX86, T> {
// auto cpu_place = boost::get<platform::CPUPlace>(context.GetPlace()); // auto cpu_place = boost::get<platform::CPUPlace>(context.GetPlace());
// computation // computation
auto output_data = output->mutable_data<T>(); auto output_data = output->template mutable_data<T>();
int col_idx = 0; int col_idx = 0;
for (int j = 0; j < num; ++j) { for (int j = 0; j < num; ++j) {
int col_len = input_cols[j]; int col_len = input_cols[j];
...@@ -108,7 +108,7 @@ class SplitFunctor<lite::TargetType::kX86, T> { ...@@ -108,7 +108,7 @@ class SplitFunctor<lite::TargetType::kX86, T> {
int col_len = output_cols[j]; int col_len = output_cols[j];
auto* out_tensor = outputs->at(j); auto* out_tensor = outputs->at(j);
if (out_tensor != nullptr) { if (out_tensor != nullptr) {
T* dst_ptr = out_tensor->mutable_data<T>() + k * col_len; T* dst_ptr = out_tensor->template mutable_data<T>() + k * col_len;
std::copy_n(src_ptr + col_idx, col_len, dst_ptr); std::copy_n(src_ptr + col_idx, col_len, dst_ptr);
// memory::Copy(cpu_place, dst_ptr, cpu_place, src_ptr + col_idx, // memory::Copy(cpu_place, dst_ptr, cpu_place, src_ptr + col_idx,
// sizeof(T) * col_len); // sizeof(T) * col_len);
......
...@@ -50,8 +50,8 @@ class CrossEntropyFunctor<lite::TargetType::kX86, T> { ...@@ -50,8 +50,8 @@ class CrossEntropyFunctor<lite::TargetType::kX86, T> {
.reshape(batch_axis_remain) .reshape(batch_axis_remain)
.sum(Eigen::DSizes<int, 1>(1))); .sum(Eigen::DSizes<int, 1>(1)));
} else { } else {
const T* prob_data = prob->data<T>(); const T* prob_data = prob->template data<T>();
T* loss_data = out->mutable_data<T>(); T* loss_data = out->template mutable_data<T>();
const int64_t* label_data = labels->data<int64_t>(); const int64_t* label_data = labels->data<int64_t>();
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
......
...@@ -99,7 +99,7 @@ class Col2ImFunctor<lite::x86::math::ColFormat::kCFO, ...@@ -99,7 +99,7 @@ class Col2ImFunctor<lite::x86::math::ColFormat::kCFO,
int channels_col = im_channels * filter_height * filter_width; int channels_col = im_channels * filter_height * filter_width;
T* im_data = im->mutable_data<T>(); T* im_data = im->template mutable_data<T>();
const T* col_data = col.data<T>(); const T* col_data = col.data<T>();
for (int c = 0; c < channels_col; ++c) { for (int c = 0; c < channels_col; ++c) {
...@@ -161,7 +161,7 @@ class Im2ColFunctor<lite::x86::math::ColFormat::kOCF, ...@@ -161,7 +161,7 @@ class Im2ColFunctor<lite::x86::math::ColFormat::kOCF,
int col_width = col->dims()[1]; int col_width = col->dims()[1];
const T* im_data = im.data<T>(); const T* im_data = im.data<T>();
T* col_data = col->mutable_data<T>(); T* col_data = col->template mutable_data<T>();
for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) { for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) { for (int col_col_idx = 0; col_col_idx < col_width; ++col_col_idx) {
...@@ -235,7 +235,7 @@ class Col2ImFunctor<lite::x86::math::ColFormat::kOCF, ...@@ -235,7 +235,7 @@ class Col2ImFunctor<lite::x86::math::ColFormat::kOCF,
"col_width and padding(padding_left, padding_right) are " "col_width and padding(padding_left, padding_right) are "
"inconsistent."); "inconsistent.");
T* im_data = im->mutable_data<T>(); T* im_data = im->template mutable_data<T>();
const T* col_data = col.data<T>(); const T* col_data = col.data<T>();
for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) { for (int col_row_idx = 0; col_row_idx < col_height; ++col_row_idx) {
......
...@@ -42,7 +42,7 @@ inline void im2col_common(const lite::Tensor& im, ...@@ -42,7 +42,7 @@ inline void im2col_common(const lite::Tensor& im,
int channels_col = im_channels * filter_height * filter_width; int channels_col = im_channels * filter_height * filter_width;
const T* im_data = im.data<T>(); const T* im_data = im.data<T>();
T* col_data = col->mutable_data<T>(); T* col_data = col->template mutable_data<T>();
for (int c = 0; c < channels_col; ++c) { for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width; int w_offset = c % filter_width;
int h_offset = (c / filter_width) % filter_height; int h_offset = (c / filter_width) % filter_height;
...@@ -77,7 +77,7 @@ inline void im2col_sh1sw1dh1dw1ph0pw0(const lite::Tensor& im, ...@@ -77,7 +77,7 @@ inline void im2col_sh1sw1dh1dw1ph0pw0(const lite::Tensor& im,
int output_width = col->dims()[4]; int output_width = col->dims()[4];
const T* im_data = im.data<T>(); const T* im_data = im.data<T>();
T* col_data = col->mutable_data<T>(); T* col_data = col->template mutable_data<T>();
int col_matrix_width = output_width * output_height; int col_matrix_width = output_width * output_height;
int im_size = im_height * im_width; int im_size = im_height * im_width;
size_t copy_size = sizeof(T) * output_width; size_t copy_size = sizeof(T) * output_width;
...@@ -123,7 +123,7 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const lite::Tensor& im, ...@@ -123,7 +123,7 @@ inline void im2col_sh1sw1dh1dw1ph1pw1(const lite::Tensor& im,
constexpr int prw = 1; constexpr int prw = 1;
const T* im_data = im.data<T>(); const T* im_data = im.data<T>();
T* col_data = col->mutable_data<T>(); T* col_data = col->template mutable_data<T>();
int im_size = im_height * im_width; int im_size = im_height * im_width;
int col_matrix_width = output_width * output_height; int col_matrix_width = output_width * output_height;
int col_block_fh = filter_width * col_matrix_width; // fw*oh*ow int col_block_fh = filter_width * col_matrix_width; // fw*oh*ow
......
...@@ -65,7 +65,7 @@ struct TensorSetConstantCPU { ...@@ -65,7 +65,7 @@ struct TensorSetConstantCPU {
: tensor_(tensor), value_(value) {} : tensor_(tensor), value_(value) {}
template <typename T> template <typename T>
void apply() const { void apply() const {
auto* begin = tensor_->mutable_data<T>(lite::TargetType::kX86); auto* begin = tensor_->template mutable_data<T>(lite::TargetType::kX86);
std::fill(begin, begin + tensor_->numel(), static_cast<T>(value_)); std::fill(begin, begin + tensor_->numel(), static_cast<T>(value_));
} }
lite::Tensor* tensor_; lite::Tensor* tensor_;
...@@ -126,7 +126,7 @@ struct RowwiseAdd<lite::TargetType::kX86, T> { ...@@ -126,7 +126,7 @@ struct RowwiseAdd<lite::TargetType::kX86, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* vector_data = vector.data<T>(); const T* vector_data = vector.data<T>();
T* output_data = output->mutable_data<T>(); T* output_data = output->template mutable_data<T>();
for (int64_t i = 0; i < in_dims[0]; ++i) { for (int64_t i = 0; i < in_dims[0]; ++i) {
for (int64_t j = 0; j < size; ++j) { for (int64_t j = 0; j < size; ++j) {
output_data[i * in_dims[0] + j] = output_data[i * in_dims[0] + j] =
......
...@@ -83,7 +83,7 @@ class ColwiseSum<lite::TargetType::kX86, T> { ...@@ -83,7 +83,7 @@ class ColwiseSum<lite::TargetType::kX86, T> {
auto size = in_dims[1]; auto size = in_dims[1];
PADDLE_ENFORCE_EQ(out->numel(), size); PADDLE_ENFORCE_EQ(out->numel(), size);
T* out_buf = out->mutable_data<T>(out->target()); T* out_buf = out->template mutable_data<T>(out->target());
const T* in_buf = input.data<T>(); const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) { for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
...@@ -129,7 +129,7 @@ class RowwiseMean<lite::TargetType::kX86, T> { ...@@ -129,7 +129,7 @@ class RowwiseMean<lite::TargetType::kX86, T> {
auto size = in_dims[1]; auto size = in_dims[1];
PADDLE_ENFORCE_EQ(out->numel(), height); PADDLE_ENFORCE_EQ(out->numel(), height);
auto inv_size = 1.0 / size; auto inv_size = 1.0 / size;
T* out_buf = out->mutable_data<T>(out->target()); T* out_buf = out->template mutable_data<T>(out->target());
const T* in_buf = input.data<T>(); const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) { for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
...@@ -173,7 +173,7 @@ class RowwiseSum<lite::TargetType::kX86, T> { ...@@ -173,7 +173,7 @@ class RowwiseSum<lite::TargetType::kX86, T> {
auto size = in_dims[1]; auto size = in_dims[1];
PADDLE_ENFORCE_EQ(out->numel(), height); PADDLE_ENFORCE_EQ(out->numel(), height);
T* out_buf = out->mutable_data<T>(out->target()); T* out_buf = out->template mutable_data<T>(out->target());
const T* in_buf = input.data<T>(); const T* in_buf = input.data<T>();
for (size_t i = 0; i < static_cast<size_t>(height); ++i) { for (size_t i = 0; i < static_cast<size_t>(height); ++i) {
......
...@@ -35,7 +35,7 @@ class MaxOutFunctor<lite::TargetType::kX86, T> { ...@@ -35,7 +35,7 @@ class MaxOutFunctor<lite::TargetType::kX86, T> {
// c_size means the output size of each sample // c_size means the output size of each sample
int c_size = fea_size * output_channels; int c_size = fea_size * output_channels;
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(lite::TargetType::kX86); T* output_data = output->template mutable_data<T>(lite::TargetType::kX86);
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
int new_bindex = c_size * i; int new_bindex = c_size * i;
...@@ -72,7 +72,8 @@ class MaxOutGradFunctor<lite::TargetType::kX86, T> { ...@@ -72,7 +72,8 @@ class MaxOutGradFunctor<lite::TargetType::kX86, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(lite::TargetType::kX86); T* input_grad_data =
input_grad->template mutable_data<T>(lite::TargetType::kX86);
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
int blen = fea_size * output_channels * i; int blen = fea_size * output_channels * i;
......
...@@ -54,8 +54,8 @@ class Pool2dFunctor<lite::TargetType::kX86, PoolProcess, T> { ...@@ -54,8 +54,8 @@ class Pool2dFunctor<lite::TargetType::kX86, PoolProcess, T> {
const int input_stride = input_height * input_width; const int input_stride = input_height * input_width;
const int output_stride = output_height * output_width; const int output_stride = output_height * output_width;
const T* input_data = input->data<T>(); const T* input_data = input->template data<T>();
T* output_data = output->mutable_data<T>(lite::TargetType::kX86); T* output_data = output->template mutable_data<T>(lite::TargetType::kX86);
int hstart, hend; int hstart, hend;
int wstart, wend; int wstart, wend;
...@@ -137,7 +137,8 @@ class Pool2dGradFunctor<lite::TargetType::kX86, PoolProcess, T> { ...@@ -137,7 +137,8 @@ class Pool2dGradFunctor<lite::TargetType::kX86, PoolProcess, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(lite::TargetType::kX86); T* input_grad_data =
input_grad->template mutable_data<T>(lite::TargetType::kX86);
int hstart, hend; int hstart, hend;
int wstart, wend; int wstart, wend;
...@@ -220,7 +221,8 @@ class MaxPool2dGradFunctor<lite::TargetType::kX86, T> { ...@@ -220,7 +221,8 @@ class MaxPool2dGradFunctor<lite::TargetType::kX86, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(lite::TargetType::kX86); T* input_grad_data =
input_grad->template mutable_data<T>(lite::TargetType::kX86);
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
...@@ -322,7 +324,7 @@ class Pool3dFunctor<lite::TargetType::kX86, PoolProcess, T> { ...@@ -322,7 +324,7 @@ class Pool3dFunctor<lite::TargetType::kX86, PoolProcess, T> {
const int output_stride = output_depth * output_height * output_width; const int output_stride = output_depth * output_height * output_width;
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
T* output_data = output->mutable_data<T>(lite::TargetType::kX86); T* output_data = output->template mutable_data<T>(lite::TargetType::kX86);
int dstart, dend; int dstart, dend;
int hstart, hend; int hstart, hend;
...@@ -425,7 +427,8 @@ class Pool3dGradFunctor<lite::TargetType::kX86, PoolProcess, T> { ...@@ -425,7 +427,8 @@ class Pool3dGradFunctor<lite::TargetType::kX86, PoolProcess, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(lite::TargetType::kX86); T* input_grad_data =
input_grad->template mutable_data<T>(lite::TargetType::kX86);
int dstart, dend; int dstart, dend;
int hstart, hend; int hstart, hend;
...@@ -530,7 +533,8 @@ class MaxPool3dGradFunctor<lite::TargetType::kX86, T> { ...@@ -530,7 +533,8 @@ class MaxPool3dGradFunctor<lite::TargetType::kX86, T> {
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const T* output_data = output.data<T>(); const T* output_data = output.data<T>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(lite::TargetType::kX86); T* input_grad_data =
input_grad->template mutable_data<T>(lite::TargetType::kX86);
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
......
...@@ -58,11 +58,11 @@ class SampleWithProb { ...@@ -58,11 +58,11 @@ class SampleWithProb {
const int64_t* label_data = L->data<int64_t>(); const int64_t* label_data = L->data<int64_t>();
// int64_t* samples_data = // int64_t* samples_data =
// S->mutable_data<int64_t>(ret_dim, Target); // S->mutable_data<int64_t>(ret_dim, Target);
// T* probabilities_data = P->mutable_data<T>(ret_dim, Target); // T* probabilities_data = P->template mutable_data<T>(ret_dim, Target);
S->Resize({batch_size, num_sampled_classes}); S->Resize({batch_size, num_sampled_classes});
auto* samples_data = S->mutable_data<int64_t>(Target); auto* samples_data = S->mutable_data<int64_t>(Target);
P->Resize({batch_size, num_sampled_classes}); P->Resize({batch_size, num_sampled_classes});
auto* probabilities_data = P->mutable_data<T>(Target); auto* probabilities_data = P->template mutable_data<T>(Target);
// temp sets for unique sampling // temp sets for unique sampling
std::unordered_set<int64_t> tmp_samples; std::unordered_set<int64_t> tmp_samples;
......
...@@ -42,7 +42,7 @@ class SearchFcFunctor<lite::TargetType::kX86, T> { ...@@ -42,7 +42,7 @@ class SearchFcFunctor<lite::TargetType::kX86, T> {
lite::DDim dims(std::vector<int64_t>({bottom.dims()[0], out_size})); lite::DDim dims(std::vector<int64_t>({bottom.dims()[0], out_size}));
const auto bottom_data = bottom.data<T>(); const auto bottom_data = bottom.data<T>();
auto top_data = top->mutable_data<T>(lite::TargetType::kX86); auto top_data = top->template mutable_data<T>(lite::TargetType::kX86);
const auto weights = w.data<T>(); const auto weights = w.data<T>();
auto blas = math::GetBlas<lite::TargetType::kX86, T>(context); auto blas = math::GetBlas<lite::TargetType::kX86, T>(context);
call_gemm<lite::X86Context, T>(blas, call_gemm<lite::X86Context, T>(blas,
......
...@@ -52,7 +52,7 @@ struct SelectedRowsAdd<lite::TargetType::kX86, T> { ...@@ -52,7 +52,7 @@ struct SelectedRowsAdd<lite::TargetType::kX86, T> {
PADDLE_ENFORCE_EQ(in1_row_numel, in2_value.numel() / in2_rows.size()); PADDLE_ENFORCE_EQ(in1_row_numel, in2_value.numel() / in2_rows.size());
PADDLE_ENFORCE_EQ(in1_row_numel, out_value->numel() / out_rows.size()); PADDLE_ENFORCE_EQ(in1_row_numel, out_value->numel() / out_rows.size());
auto* out_data = out_value->mutable_data<T>(); auto* out_data = out_value->template mutable_data<T>();
auto* in1_data = in1_value.data<T>(); auto* in1_data = in1_value.data<T>();
std::copy_n(in1_data, in1_value.numel(), out_data); std::copy_n(in1_data, in1_value.numel(), out_data);
...@@ -87,7 +87,7 @@ struct SelectedRowsAddTensor<lite::TargetType::kX86, T> { ...@@ -87,7 +87,7 @@ struct SelectedRowsAddTensor<lite::TargetType::kX86, T> {
functor(context, output, 0.0); functor(context, output, 0.0);
auto* in1_data = in1_value.data<T>(); auto* in1_data = in1_value.data<T>();
auto* out_data = output->mutable_data<T>(); auto* out_data = output->template mutable_data<T>();
for (size_t i = 0; i < in1_rows.size(); i++) { for (size_t i = 0; i < in1_rows.size(); i++) {
for (int64_t j = 0; j < in1_row_numel; j++) { for (int64_t j = 0; j < in1_row_numel; j++) {
...@@ -127,7 +127,7 @@ struct SelectedRowsAddTo<lite::TargetType::kX86, T> { ...@@ -127,7 +127,7 @@ struct SelectedRowsAddTo<lite::TargetType::kX86, T> {
in2_rows.insert(in2_rows.end(), in1_rows.begin(), in1_rows.end()); in2_rows.insert(in2_rows.end(), in1_rows.begin(), in1_rows.end());
auto* in1_data = in1_value.data<T>(); auto* in1_data = in1_value.data<T>();
auto* in2_data = in2_value->mutable_data<T>(); auto* in2_data = in2_value->template mutable_data<T>();
std::copy_n(in1_data, in1_value.numel(), in2_data + input2_offset); std::copy_n(in1_data, in1_value.numel(), in2_data + input2_offset);
} }
}; };
...@@ -161,7 +161,7 @@ struct SelectedRowsSumTo<lite::TargetType::kX86, T> { ...@@ -161,7 +161,7 @@ struct SelectedRowsSumTo<lite::TargetType::kX86, T> {
input2->set_rows(in2_rows); input2->set_rows(in2_rows);
auto* in2_value = input2->mutable_value(); auto* in2_value = input2->mutable_value();
T* in2_data = in2_value->mutable_data<T>(); T* in2_data = in2_value->template mutable_data<T>();
auto blas = math::GetBlas<lite::TargetType::kX86, T>(context); auto blas = math::GetBlas<lite::TargetType::kX86, T>(context);
size_t offset = 0u; size_t offset = 0u;
for (size_t i = 0u; i != input1.size(); ++i) { for (size_t i = 0u; i != input1.size(); ++i) {
...@@ -194,7 +194,7 @@ struct SelectedRowsAddToTensor<lite::TargetType::kX86, T> { ...@@ -194,7 +194,7 @@ struct SelectedRowsAddToTensor<lite::TargetType::kX86, T> {
PADDLE_ENFORCE_EQ(in1_row_numel, input2->numel() / in1_height); PADDLE_ENFORCE_EQ(in1_row_numel, input2->numel() / in1_height);
auto* in1_data = in1_value.data<T>(); auto* in1_data = in1_value.data<T>();
auto* input2_data = input2->mutable_data<T>(); auto* input2_data = input2->template mutable_data<T>();
for (size_t i = 0; i < in1_rows.size(); i++) { for (size_t i = 0; i < in1_rows.size(); i++) {
for (int64_t j = 0; j < in1_row_numel; j++) { for (int64_t j = 0; j < in1_row_numel; j++) {
...@@ -305,7 +305,7 @@ struct MergeAdd<lite::TargetType::kX86, T> { ...@@ -305,7 +305,7 @@ struct MergeAdd<lite::TargetType::kX86, T> {
lite::DDim dims(std::vector<int64_t>( lite::DDim dims(std::vector<int64_t>(
{static_cast<int64_t>(merged_row_set.size()), input_width})); {static_cast<int64_t>(merged_row_set.size()), input_width}));
out.mutable_value()->Resize(dims); out.mutable_value()->Resize(dims);
auto* out_data = out.mutable_value()->mutable_data<T>(); auto* out_data = out.mutable_value()->template mutable_data<T>();
if (merged_row_set.size() == row_num && !sorted_result) { if (merged_row_set.size() == row_num && !sorted_result) {
// no duplicated ids, just concat the result together // no duplicated ids, just concat the result together
...@@ -385,7 +385,7 @@ struct UpdateToTensor<lite::TargetType::kX86, T> { ...@@ -385,7 +385,7 @@ struct UpdateToTensor<lite::TargetType::kX86, T> {
PADDLE_ENFORCE_EQ(in1_row_numel, input2->numel() / in1_height); PADDLE_ENFORCE_EQ(in1_row_numel, input2->numel() / in1_height);
auto* in1_data = in1_value.data<T>(); auto* in1_data = in1_value.data<T>();
auto* input2_data = input2->data<T>(); auto* input2_data = input2->template data<T>();
// FIXME(typhoonzero): use macro fix the below messy code. // FIXME(typhoonzero): use macro fix the below messy code.
switch (op) { switch (op) {
......
...@@ -24,10 +24,10 @@ class CopyMatrixRowsFunctor<lite::TargetType::kX86, T> { ...@@ -24,10 +24,10 @@ class CopyMatrixRowsFunctor<lite::TargetType::kX86, T> {
public: public:
void operator()(const lite::Context<lite::TargetType::kX86>& context, void operator()(const lite::Context<lite::TargetType::kX86>& context,
const lite::Tensor& src, const lite::Tensor& src,
const std::vector<size_t>& index_lod, const std::vector<uint64_t>& index_lod,
lite::Tensor* dst, lite::Tensor* dst,
bool is_src_index) { bool is_src_index) {
const size_t* index = index_lod.data(); const uint64_t* index = index_lod.data();
const auto& src_dims = src.dims(); const auto& src_dims = src.dims();
const auto& dst_dims = dst->dims(); const auto& dst_dims = dst->dims();
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
...@@ -39,7 +39,7 @@ class CopyMatrixRowsFunctor<lite::TargetType::kX86, T> { ...@@ -39,7 +39,7 @@ class CopyMatrixRowsFunctor<lite::TargetType::kX86, T> {
auto height = dst_dims[0]; auto height = dst_dims[0];
auto width = dst_dims[1]; auto width = dst_dims[1];
auto* src_data = src.data<T>(); auto* src_data = src.data<T>();
auto* dst_data = dst->mutable_data<T>(); auto* dst_data = dst->template mutable_data<T>();
const int sz = width * sizeof(T); const int sz = width * sizeof(T);
if (is_src_index) { if (is_src_index) {
for (int i = 0; i < height; ++i) { for (int i = 0; i < height; ++i) {
......
...@@ -36,7 +36,7 @@ class CopyMatrixRowsFunctor { ...@@ -36,7 +36,7 @@ class CopyMatrixRowsFunctor {
// The indexed rows are based on the input index. // The indexed rows are based on the input index.
void operator()(const lite::Context<Target>& context, void operator()(const lite::Context<Target>& context,
const lite::Tensor& src, const lite::Tensor& src,
const std::vector<size_t>& index_lod, const std::vector<uint64_t>& index_lod,
lite::Tensor* dst, lite::Tensor* dst,
bool is_src_index); bool is_src_index);
}; };
...@@ -130,8 +130,8 @@ class LoDTensor2BatchFunctor { ...@@ -130,8 +130,8 @@ class LoDTensor2BatchFunctor {
// batch_lods[2] is the sort order for the input LoDTensor. // batch_lods[2] is the sort order for the input LoDTensor.
batch_lods->at(2).resize(seq_info.size()); batch_lods->at(2).resize(seq_info.size());
size_t* batch_starts = batch_lods->at(0).data(); auto* batch_starts = batch_lods->at(0).data();
size_t* seq2batch_idx = batch_lods->at(1).data(); auto* seq2batch_idx = batch_lods->at(1).data();
batch_starts[0] = 0; batch_starts[0] = 0;
for (int n = 0; n < max_seqlen; n++) { for (int n = 0; n < max_seqlen; n++) {
auto batch_id = static_cast<int>(batch_starts[n]); auto batch_id = static_cast<int>(batch_starts[n]);
...@@ -148,7 +148,7 @@ class LoDTensor2BatchFunctor { ...@@ -148,7 +148,7 @@ class LoDTensor2BatchFunctor {
} }
batch_starts[n + 1] = static_cast<size_t>(batch_id); batch_starts[n + 1] = static_cast<size_t>(batch_id);
} }
size_t* seq_order = batch_lods->at(2).data(); auto* seq_order = batch_lods->at(2).data();
for (size_t i = 0; i < seq_info.size(); ++i) { for (size_t i = 0; i < seq_info.size(); ++i) {
seq_order[i] = seq_info[i].seq_idx; seq_order[i] = seq_info[i].seq_idx;
} }
......
...@@ -22,15 +22,15 @@ namespace math { ...@@ -22,15 +22,15 @@ namespace math {
template <typename T> template <typename T>
void CopyValidData(lite::Tensor* dst_tensor, void CopyValidData(lite::Tensor* dst_tensor,
const lite::Tensor* src_tensor, const lite::Tensor* src_tensor,
const std::vector<size_t>& seq_offsets, const std::vector<uint64_t>& seq_offsets,
int pad_seq_len, int pad_seq_len,
int step_width, int step_width,
bool norm_by_len, bool norm_by_len,
CopyType type, CopyType type,
PadLayout layout) { PadLayout layout) {
int seq_num = seq_offsets.size() - 1; int seq_num = seq_offsets.size() - 1;
const T* src_data = src_tensor->data<T>(); const T* src_data = src_tensor->template data<T>();
T* dst_data = dst_tensor->mutable_data<T>(); T* dst_data = dst_tensor->template mutable_data<T>();
int seq_cpy_gap = step_width; int seq_cpy_gap = step_width;
int pad_cpy_gap = int pad_cpy_gap =
...@@ -113,7 +113,7 @@ class PaddingLoDTensorFunctor<lite::TargetType::kX86, T> { ...@@ -113,7 +113,7 @@ class PaddingLoDTensorFunctor<lite::TargetType::kX86, T> {
"'step_width'."); "'step_width'.");
// fill padding value // fill padding value
T* pad_data = pad_tensor->mutable_data<T>(); T* pad_data = pad_tensor->template mutable_data<T>();
const T* pad_value_data = pad_value.data<T>(); const T* pad_value_data = pad_value.data<T>();
if (pad_value.numel() == 1) { if (pad_value.numel() == 1) {
fast_mem_init<T>( fast_mem_init<T>(
......
...@@ -30,10 +30,10 @@ enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth }; ...@@ -30,10 +30,10 @@ enum PadLayout { kBatchLengthWidth = 0, kLengthBatchWidth };
enum CopyType { kSeqToPad, kPadToSeq }; enum CopyType { kSeqToPad, kPadToSeq };
inline static size_t MaximumSequenceLength( inline static uint64_t MaximumSequenceLength(
const std::vector<size_t>& seq_offset) { const std::vector<uint64_t>& seq_offset) {
size_t seq_num = seq_offset.size() - 1; uint64_t seq_num = seq_offset.size() - 1;
size_t max_seq_len = 0; uint64_t max_seq_len = 0;
for (size_t i = 0; i < seq_num; ++i) { for (size_t i = 0; i < seq_num; ++i) {
max_seq_len = std::max(max_seq_len, seq_offset[i + 1] - seq_offset[i]); max_seq_len = std::max(max_seq_len, seq_offset[i + 1] - seq_offset[i]);
} }
...@@ -42,7 +42,7 @@ inline static size_t MaximumSequenceLength( ...@@ -42,7 +42,7 @@ inline static size_t MaximumSequenceLength(
inline static void CheckDims(const lite::DDim& seq_tensor_dims, inline static void CheckDims(const lite::DDim& seq_tensor_dims,
const lite::DDim& pad_tensor_dims, const lite::DDim& pad_tensor_dims,
const std::vector<size_t>& seq_offset, const std::vector<uint64_t>& seq_offset,
int64_t padded_seq_len, int64_t padded_seq_len,
int64_t step_width, int64_t step_width,
const PadLayout& layout) { const PadLayout& layout) {
......
...@@ -55,7 +55,7 @@ class MaxSeqPoolFunctor { ...@@ -55,7 +55,7 @@ class MaxSeqPoolFunctor {
auto starts = input.lod()[0]; auto starts = input.lod()[0];
const T* in_data = input.data<T>(); const T* in_data = input.data<T>();
T* out_data = output->mutable_data<T>(); T* out_data = output->template mutable_data<T>();
int* max_index = index->mutable_data<int>(); int* max_index = index->mutable_data<int>();
int64_t num_seq = out_dims[0]; int64_t num_seq = out_dims[0];
...@@ -103,7 +103,7 @@ class MaxSeqPoolFunctor<T, true> { ...@@ -103,7 +103,7 @@ class MaxSeqPoolFunctor<T, true> {
auto starts = input.lod()[0]; auto starts = input.lod()[0];
const T* in_data = input.data<T>(); const T* in_data = input.data<T>();
T* out_data = output->mutable_data<T>(); T* out_data = output->template mutable_data<T>();
int64_t num_seq = out_dims[0]; int64_t num_seq = out_dims[0];
int64_t dim = output->numel() / num_seq; int64_t dim = output->numel() / num_seq;
...@@ -145,7 +145,7 @@ class MaxSeqPoolGradFunctor { ...@@ -145,7 +145,7 @@ class MaxSeqPoolGradFunctor {
const T* og_data = out_grad.data<T>(); const T* og_data = out_grad.data<T>();
const int* max_index = index.data<int>(); const int* max_index = index.data<int>();
T* ig_data = in_grad->mutable_data<T>(); T* ig_data = in_grad->template mutable_data<T>();
SetConstant<TARGET(kX86), T> set_zero; SetConstant<TARGET(kX86), T> set_zero;
set_zero(context, in_grad, static_cast<T>(0.0)); set_zero(context, in_grad, static_cast<T>(0.0));
...@@ -170,7 +170,7 @@ class LastSeqPoolFunctor { ...@@ -170,7 +170,7 @@ class LastSeqPoolFunctor {
lite::Tensor* output) { lite::Tensor* output) {
// Create pointers to input and output data // Create pointers to input and output data
auto* in_data = input.data<T>(); auto* in_data = input.data<T>();
auto* out_data = output->mutable_data<T>(); auto* out_data = output->template mutable_data<T>();
// Calculate the size of each item in sequence // Calculate the size of each item in sequence
int64_t item_size = input.numel() / input.dims()[0]; int64_t item_size = input.numel() / input.dims()[0];
...@@ -203,7 +203,7 @@ class FirstSeqPoolFunctor { ...@@ -203,7 +203,7 @@ class FirstSeqPoolFunctor {
lite::Tensor* output) { lite::Tensor* output) {
// Create pointers to input and output data // Create pointers to input and output data
auto* in_data = input.data<T>(); auto* in_data = input.data<T>();
auto* out_data = output->mutable_data<T>(); auto* out_data = output->template mutable_data<T>();
// Calculate the size of each item in sequence // Calculate the size of each item in sequence
int64_t item_size = input.numel() / input.dims()[0]; int64_t item_size = input.numel() / input.dims()[0];
...@@ -238,7 +238,7 @@ class SumSeqPoolGradFunctor { ...@@ -238,7 +238,7 @@ class SumSeqPoolGradFunctor {
int64_t in_w = in_grad->numel() / in_grad->dims()[0]; int64_t in_w = in_grad->numel() / in_grad->dims()[0];
PADDLE_ENFORCE(in_w == out_w); PADDLE_ENFORCE(in_w == out_w);
const T* out_g_data = out_grad.data<T>(); const T* out_g_data = out_grad.data<T>();
T* in_g_data = in_grad->mutable_data<T>(TARGET(kX86)); T* in_g_data = in_grad->template mutable_data<T>(TARGET(kX86));
auto blas = math::GetBlas<TARGET(kX86), T>(context); auto blas = math::GetBlas<TARGET(kX86), T>(context);
for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) { for (int i = 0; i < static_cast<int>(lod.size()) - 1; ++i) {
int64_t h = static_cast<int64_t>(lod[i + 1] - lod[i]); int64_t h = static_cast<int64_t>(lod[i + 1] - lod[i]);
...@@ -288,7 +288,7 @@ class SequencePoolFunctor<TARGET(kX86), T> { ...@@ -288,7 +288,7 @@ class SequencePoolFunctor<TARGET(kX86), T> {
auto lod = input.lod()[0]; auto lod = input.lod()[0];
if (pooltype == "SUM") { if (pooltype == "SUM") {
const T* src = input.data<T>(); const T* src = input.data<T>();
T* dst = output->mutable_data<T>(TARGET(kX86)); T* dst = output->template mutable_data<T>(TARGET(kX86));
jit::seq_pool_attr_t attr( jit::seq_pool_attr_t attr(
static_cast<int>(input.numel() / input.dims()[0]), static_cast<int>(input.numel() / input.dims()[0]),
jit::SeqPoolType::kSum); jit::SeqPoolType::kSum);
......
...@@ -101,13 +101,13 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) { ...@@ -101,13 +101,13 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) {
TEST(SequencePoolingGrad, CPU_SUM) { TEST(SequencePoolingGrad, CPU_SUM) {
paddle::framework::LoD lod1; paddle::framework::LoD lod1;
lod1.push_back(std::vector<size_t>{0, 10}); lod1.push_back(std::vector<uint64_t>{0, 10});
TestSequencePoolingSum<paddle::platform::CPUDeviceContext, TestSequencePoolingSum<paddle::platform::CPUDeviceContext,
paddle::platform::CPUPlace, paddle::platform::CPUPlace,
float>(lod1); float>(lod1);
paddle::framework::LoD lod2; paddle::framework::LoD lod2;
lod2.push_back(std::vector<size_t>{0, 2, 7, 10}); lod2.push_back(std::vector<uint64_t>{0, 2, 7, 10});
TestSequencePoolingSum<paddle::platform::CPUDeviceContext, TestSequencePoolingSum<paddle::platform::CPUDeviceContext,
paddle::platform::CPUPlace, paddle::platform::CPUPlace,
float>(lod2); float>(lod2);
...@@ -116,13 +116,13 @@ TEST(SequencePoolingGrad, CPU_SUM) { ...@@ -116,13 +116,13 @@ TEST(SequencePoolingGrad, CPU_SUM) {
#ifdef PADDLE_WITH_CUDA #ifdef PADDLE_WITH_CUDA
TEST(SequencePoolingGrad, CUDA_SUM) { TEST(SequencePoolingGrad, CUDA_SUM) {
paddle::framework::LoD lod1; paddle::framework::LoD lod1;
lod1.push_back(std::vector<size_t>{0, 10}); lod1.push_back(std::vector<uint64_t>{0, 10});
TestSequencePoolingSum<paddle::platform::CUDADeviceContext, TestSequencePoolingSum<paddle::platform::CUDADeviceContext,
paddle::platform::CUDAPlace, paddle::platform::CUDAPlace,
float>(lod1); float>(lod1);
paddle::framework::LoD lod2; paddle::framework::LoD lod2;
lod2.push_back(std::vector<size_t>{0, 2, 7, 10}); lod2.push_back(std::vector<uint64_t>{0, 2, 7, 10});
TestSequencePoolingSum<paddle::platform::CUDADeviceContext, TestSequencePoolingSum<paddle::platform::CUDADeviceContext,
paddle::platform::CUDAPlace, paddle::platform::CUDAPlace,
float>(lod2); float>(lod2);
......
...@@ -32,7 +32,7 @@ class ScaleLoDTensorFunctor<lite::TargetType::kX86, T> { ...@@ -32,7 +32,7 @@ class ScaleLoDTensorFunctor<lite::TargetType::kX86, T> {
size_t seq_width = seq->dims()[1]; size_t seq_width = seq->dims()[1];
lite::LoD abs_offset_lod = lite::fluid::ToAbsOffset(lod); lite::LoD abs_offset_lod = lite::fluid::ToAbsOffset(lod);
T* seq_data = seq->mutable_data<T>(lite::TargetType::kX86); T* seq_data = seq->template mutable_data<T>(lite::TargetType::kX86);
for (size_t i = 0; i < num_seq; ++i) { for (size_t i = 0; i < num_seq; ++i) {
for (size_t j = lod[level][i] * seq_width; for (size_t j = lod[level][i] * seq_width;
j < lod[level][i + 1] * seq_width; j < lod[level][i + 1] * seq_width;
......
...@@ -83,7 +83,7 @@ class SequenceTopkAvgPoolingFunctor<lite::TargetType::kX86, T> { ...@@ -83,7 +83,7 @@ class SequenceTopkAvgPoolingFunctor<lite::TargetType::kX86, T> {
auto pos_data = pos->mutable_data<int>(lite::TargetType::kX86); auto pos_data = pos->mutable_data<int>(lite::TargetType::kX86);
int offset = 0; int offset = 0;
std::vector<size_t> vec_out_lod; std::vector<uint64_t> vec_out_lod;
vec_out_lod.reserve(batch_size + 1); vec_out_lod.reserve(batch_size + 1);
for (int i = 0; i <= batch_size; ++i) { for (int i = 0; i <= batch_size; ++i) {
offset = row_lod[i]; offset = row_lod[i];
...@@ -95,7 +95,7 @@ class SequenceTopkAvgPoolingFunctor<lite::TargetType::kX86, T> { ...@@ -95,7 +95,7 @@ class SequenceTopkAvgPoolingFunctor<lite::TargetType::kX86, T> {
out->set_lod(lod_temp); out->set_lod(lod_temp);
auto in_data = in.data<T>(); auto in_data = in.data<T>();
auto out_data = out->mutable_data<T>(lite::TargetType::kX86); auto out_data = out->template mutable_data<T>(lite::TargetType::kX86);
T* sum_data = new T[max_k]; T* sum_data = new T[max_k];
for (int i = 0; i < batch_size; ++i) { for (int i = 0; i < batch_size; ++i) {
......
...@@ -108,8 +108,8 @@ class SoftmaxFunctor<Target, T, is_test, enable_if_CPU<Target>> { ...@@ -108,8 +108,8 @@ class SoftmaxFunctor<Target, T, is_test, enable_if_CPU<Target>> {
const int num_remain = num_classes / axis_dim; const int num_remain = num_classes / axis_dim;
if (num_remain == 1 && lite::x86::MayIUse(lite::x86::avx)) { if (num_remain == 1 && lite::x86::MayIUse(lite::x86::avx)) {
const T* in_data = X->data<T>(); const T* in_data = X->template data<T>();
auto* out_data = Y->mutable_data<T>(); auto* out_data = Y->template mutable_data<T>();
for (int bs = 0; bs < batch_size; ++bs) { for (int bs = 0; bs < batch_size; ++bs) {
T max_val = *std::max_element(in_data, in_data + num_classes); T max_val = *std::max_element(in_data, in_data + num_classes);
max_val *= static_cast<T>(-1); max_val *= static_cast<T>(-1);
...@@ -219,9 +219,9 @@ class SoftmaxGradFunctor<Target, T, enable_if_CPU<Target>> { ...@@ -219,9 +219,9 @@ class SoftmaxGradFunctor<Target, T, enable_if_CPU<Target>> {
const int num_remain = num_classes / axis_dim; const int num_remain = num_classes / axis_dim;
if (num_remain == 1 && lite::x86::MayIUse(lite::x86::avx)) { if (num_remain == 1 && lite::x86::MayIUse(lite::x86::avx)) {
const T* out_data = y->data<T>(); const T* out_data = y->template data<T>();
const T* out_grad = y_grad->data<T>(); const T* out_grad = y_grad->template data<T>();
T* in_grad = x_grad->mutable_data<T>(); T* in_grad = x_grad->template mutable_data<T>();
for (int bs = 0; bs < batch_size; ++bs) { for (int bs = 0; bs < batch_size; ++bs) {
T scalar; T scalar;
vec_mul_reduce<T, lite::x86::avx>( vec_mul_reduce<T, lite::x86::avx>(
......
...@@ -104,12 +104,12 @@ class Tree2ColFunctor<lite::TargetType::kX86, T> { ...@@ -104,12 +104,12 @@ class Tree2ColFunctor<lite::TargetType::kX86, T> {
patch_size = processing_list.size(); patch_size = processing_list.size();
// T *patch_data = // T *patch_data =
// patch->mutable_data<T>({static_cast<int64_t>(patch_size), // patch->template mutable_data<T>({static_cast<int64_t>(patch_size),
// static_cast<int64_t>(patch_elem_size)}, // static_cast<int64_t>(patch_elem_size)},
// cpu_place); // cpu_place);
patch->Resize({static_cast<int64_t>(patch_size), patch->Resize({static_cast<int64_t>(patch_size),
static_cast<int64_t>(patch_elem_size)}); static_cast<int64_t>(patch_elem_size)});
auto *patch_data = patch->mutable_data<T>(lite::TargetType::kX86); auto *patch_data = patch->template mutable_data<T>(lite::TargetType::kX86);
constant(context, patch, 0); constant(context, patch, 0);
const T *features = node_features.data<T>(); const T *features = node_features.data<T>();
...@@ -166,12 +166,12 @@ class Col2TreeFunctor<lite::TargetType::kX86, T> { ...@@ -166,12 +166,12 @@ class Col2TreeFunctor<lite::TargetType::kX86, T> {
} }
} }
// T *grad_data = // T *grad_data =
// in_grad->mutable_data<T>({static_cast<int64_t>(node_count), // in_grad->template mutable_data<T>({static_cast<int64_t>(node_count),
// static_cast<int64_t>(grad_elem_size)}, // static_cast<int64_t>(grad_elem_size)},
// cpu_place); // cpu_place);
in_grad->Resize({static_cast<int64_t>(node_count), in_grad->Resize({static_cast<int64_t>(node_count),
static_cast<int64_t>(grad_elem_size)}); static_cast<int64_t>(grad_elem_size)});
auto *grad_data = in_grad->mutable_data<T>(lite::TargetType::kX86); auto *grad_data = in_grad->template mutable_data<T>(lite::TargetType::kX86);
constant(context, in_grad, 0); constant(context, in_grad, 0);
const T *out_g = out_grad.data<T>(); const T *out_g = out_grad.data<T>();
......
...@@ -36,7 +36,7 @@ class Unpool2dMaxFunctor<lite::TargetType::kX86, T> { ...@@ -36,7 +36,7 @@ class Unpool2dMaxFunctor<lite::TargetType::kX86, T> {
int output_feasize = output_height * output_width; int output_feasize = output_height * output_width;
const T* input_data = input.data<T>(); const T* input_data = input.data<T>();
const int* indices_data = indices.data<int>(); const int* indices_data = indices.data<int>();
T* output_data = output->mutable_data<T>(lite::TargetType::kX86); T* output_data = output->template mutable_data<T>(lite::TargetType::kX86);
for (int b = 0; b < batch_size; ++b) { for (int b = 0; b < batch_size; ++b) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
for (int i = 0; i < input_feasize; ++i) { for (int i = 0; i < input_feasize; ++i) {
...@@ -70,7 +70,8 @@ class Unpool2dMaxGradFunctor<lite::TargetType::kX86, T> { ...@@ -70,7 +70,8 @@ class Unpool2dMaxGradFunctor<lite::TargetType::kX86, T> {
int output_feasize = output_height * output_width; int output_feasize = output_height * output_width;
const int* indices_data = indices.data<int>(); const int* indices_data = indices.data<int>();
const T* output_grad_data = output_grad.data<T>(); const T* output_grad_data = output_grad.data<T>();
T* input_grad_data = input_grad->mutable_data<T>(lite::TargetType::kX86); T* input_grad_data =
input_grad->template mutable_data<T>(lite::TargetType::kX86);
for (int b = 0; b < batch_size; ++b) { for (int b = 0; b < batch_size; ++b) {
for (int c = 0; c < output_channels; ++c) { for (int c = 0; c < output_channels; ++c) {
......
...@@ -75,7 +75,7 @@ class Vol2ColFunctor<lite::TargetType::kX86, T> { ...@@ -75,7 +75,7 @@ class Vol2ColFunctor<lite::TargetType::kX86, T> {
"mismatching."); "mismatching.");
const T* vol_data = vol.data<T>(); const T* vol_data = vol.data<T>();
T* col_data = col->mutable_data<T>(); T* col_data = col->template mutable_data<T>();
for (int c = 0; c < channels_col; ++c) { for (int c = 0; c < channels_col; ++c) {
int w_offset = c % filter_width; int w_offset = c % filter_width;
...@@ -159,7 +159,7 @@ class Col2VolFunctor<lite::TargetType::kX86, T> { ...@@ -159,7 +159,7 @@ class Col2VolFunctor<lite::TargetType::kX86, T> {
output_width, output_width,
"input_width and output_width are " "input_width and output_width are "
"mismatching."); "mismatching.");
T* vol_data = vol->mutable_data<T>(); T* vol_data = vol->template mutable_data<T>();
const T* col_data = col.data<T>(); const T* col_data = col.data<T>();
for (int c = 0; c < channels_col; ++c) { for (int c = 0; c < channels_col; ++c) {
......
...@@ -19,7 +19,7 @@ ...@@ -19,7 +19,7 @@
namespace paddle { namespace paddle {
namespace lite { namespace lite {
namespace fluid { namespace fluid {
using LoD = std::vector<std::vector<size_t>>; using LoD = std::vector<std::vector<uint64_t>>;
static LoD ToAbsOffset(const LoD &in) { static LoD ToAbsOffset(const LoD &in) {
// the lowest level stores relative offsets // the lowest level stores relative offsets
......
...@@ -231,8 +231,8 @@ class SoftsignCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -231,8 +231,8 @@ class SoftsignCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
// auto& context = ctx_->As<X86Context>(); // auto& context = ctx_->As<X86Context>();
auto& param = *param_.get_mutable<operators::ActivationParam>(); auto& param = *param_.get_mutable<operators::ActivationParam>();
const T* x_data = param.X->data<T>(); const T* x_data = param.X->template data<T>();
T* out_data = param.Out->mutable_data<T>(); T* out_data = param.Out->template mutable_data<T>();
size_t x_size = param.X->numel(); size_t x_size = param.X->numel();
for (size_t i = 0; i < x_size; i++) { for (size_t i = 0; i < x_size; i++) {
out_data[i] = x_data[i] / (static_cast<T>(1) + std::abs(x_data[i])); out_data[i] = x_data[i] / (static_cast<T>(1) + std::abs(x_data[i]));
......
...@@ -45,9 +45,9 @@ class AttentionPaddingMaskCompute ...@@ -45,9 +45,9 @@ class AttentionPaddingMaskCompute
auto src_len = static_cast<int64_t>(bottom1->lod()[0][1]); auto src_len = static_cast<int64_t>(bottom1->lod()[0][1]);
const int att_batch = bottom0->lod()[0].size() - 1; const int att_batch = bottom0->lod()[0].size() - 1;
const int src_batch = bottom1->lod()[0].size() - 1; const int src_batch = bottom1->lod()[0].size() - 1;
int* pad_begin = _pad_begin->mutable_data<int>(); int* pad_begin = _pad_begin->template mutable_data<int>();
for (int i = 0; i < src_batch; ++i) { for (int i = 0; i < src_batch; ++i) {
const auto* src_data = bottom1->data<T>() + src_len * i; const auto* src_data = bottom1->template data<T>() + src_len * i;
int index = src_len - 1; int index = src_len - 1;
for (; index >= 0 && _pad_id == static_cast<int>(src_data[index]); for (; index >= 0 && _pad_id == static_cast<int>(src_data[index]);
--index) { --index) {
...@@ -56,13 +56,14 @@ class AttentionPaddingMaskCompute ...@@ -56,13 +56,14 @@ class AttentionPaddingMaskCompute
} }
const auto att_len = static_cast<int64_t>(bottom0->lod()[0][1]); const auto att_len = static_cast<int64_t>(bottom0->lod()[0][1]);
auto* top_data = top->mutable_data<T>(); auto* top_data = top->template mutable_data<T>();
memcpy(top_data, memcpy(top_data,
bottom0->data<T>(), bottom0->template data<T>(),
bottom0->dims()[0] * bottom0->dims()[1] * sizeof(T)); bottom0->dims()[0] * bottom0->dims()[1] * sizeof(T));
for (int i = 0; i < att_batch; ++i) { for (int i = 0; i < att_batch; ++i) {
for (int j = 0; j < att_len; ++j) { for (int j = 0; j < att_len; ++j) {
top_data = top->mutable_data<T>() + src_len * (att_len * i + j); top_data =
top->template mutable_data<T>() + src_len * (att_len * i + j);
int src_idx = i % src_batch; int src_idx = i % src_batch;
for (int k = pad_begin[src_idx]; k < src_len; ++k) { for (int k = pad_begin[src_idx]; k < src_len; ++k) {
top_data[k] = _mask; top_data[k] = _mask;
......
...@@ -59,26 +59,26 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -59,26 +59,26 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
const int sample_size = x->dims().production() / N / C; const int sample_size = x->dims().production() / N / C;
// alloc memory // alloc memory
param.y->mutable_data<T>(); param.y->template mutable_data<T>();
if (!param.is_test) { if (!param.is_test) {
param.mean_out->mutable_data<T>(); param.mean_out->template mutable_data<T>();
param.variance_out->mutable_data<T>(); param.variance_out->template mutable_data<T>();
param.saved_mean->mutable_data<T>(); param.saved_mean->template mutable_data<T>();
param.saved_variance->mutable_data<T>(); param.saved_variance->template mutable_data<T>();
} }
if (!global_stats) { if (!global_stats) {
// saved_xx is use just in this batch of data // saved_xx is use just in this batch of data
EigenVectorArrayMap<T> saved_mean_e(param.saved_mean->mutable_data<T>(), EigenVectorArrayMap<T> saved_mean_e(
C); param.saved_mean->template mutable_data<T>(), C);
EigenVectorArrayMap<T> saved_variance_e( EigenVectorArrayMap<T> saved_variance_e(
param.saved_variance->mutable_data<T>(), C); param.saved_variance->template mutable_data<T>(), C);
saved_mean_e.setZero(); saved_mean_e.setZero();
saved_variance_e.setZero(); saved_variance_e.setZero();
EigenVectorArrayMap<T> running_mean_arr(param.mean_out->mutable_data<T>(), EigenVectorArrayMap<T> running_mean_arr(
C); param.mean_out->template mutable_data<T>(), C);
EigenVectorArrayMap<T> running_var_arr( EigenVectorArrayMap<T> running_var_arr(
param.variance_out->mutable_data<T>(), C); param.variance_out->template mutable_data<T>(), C);
if ((N * sample_size) == 1) { if ((N * sample_size) == 1) {
LOG(WARNING) << "Only 1 element in normalization dimension, " LOG(WARNING) << "Only 1 element in normalization dimension, "
...@@ -89,7 +89,8 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -89,7 +89,8 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
switch (param.data_layout) { switch (param.data_layout) {
case DATALAYOUT(kNCHW): { case DATALAYOUT(kNCHW): {
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, N * C); ConstEigenArrayMap<T> x_arr(
x->template data<T>(), sample_size, N * C);
for (int nc = 0; nc < N * C; ++nc) { for (int nc = 0; nc < N * C; ++nc) {
saved_mean_e(nc % C) += x_arr.col(nc).sum(); saved_mean_e(nc % C) += x_arr.col(nc).sum();
} }
...@@ -115,33 +116,37 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -115,33 +116,37 @@ class BatchNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
// use SavedMean and SavedVariance to do normalize // use SavedMean and SavedVariance to do normalize
Eigen::Array<T, Eigen::Dynamic, 1> inv_std(C); Eigen::Array<T, Eigen::Dynamic, 1> inv_std(C);
if (global_stats) { if (global_stats) {
ConstEigenVectorArrayMap<T> var_arr(param.variance->data<T>(), C); ConstEigenVectorArrayMap<T> var_arr(param.variance->template data<T>(),
C);
inv_std = (var_arr + param.epsilon).sqrt().inverse(); inv_std = (var_arr + param.epsilon).sqrt().inverse();
} else { } else {
EigenVectorArrayMap<T> saved_inv_std( EigenVectorArrayMap<T> saved_inv_std(
param.saved_variance->mutable_data<T>(), C); param.saved_variance->template mutable_data<T>(), C);
// inverse SavedVariance first, gradient will use it too. // inverse SavedVariance first, gradient will use it too.
saved_inv_std = (saved_inv_std + param.epsilon).inverse().sqrt(); saved_inv_std = (saved_inv_std + param.epsilon).inverse().sqrt();
inv_std = saved_inv_std; inv_std = saved_inv_std;
} }
ConstEigenVectorArrayMap<T> mean_arr( ConstEigenVectorArrayMap<T> mean_arr(
global_stats ? param.mean->data<T>() : param.saved_mean->data<T>(), C); global_stats ? param.mean->template data<T>()
: param.saved_mean->template data<T>(),
C);
// ((x - est_mean) * (inv_var) * scale + bias // ((x - est_mean) * (inv_var) * scale + bias
// formula transform ====> // formula transform ====>
// (x * inv_var * scale) + (bias - est_mean * inv_var * scale) // (x * inv_var * scale) + (bias - est_mean * inv_var * scale)
ConstEigenVectorArrayMap<T> scale_arr(param.scale->data<T>(), C); ConstEigenVectorArrayMap<T> scale_arr(param.scale->template data<T>(), C);
ConstEigenVectorArrayMap<T> bias_arr(param.bias->data<T>(), C); ConstEigenVectorArrayMap<T> bias_arr(param.bias->template data<T>(), C);
Eigen::Array<T, Eigen::Dynamic, 1> new_scale = inv_std * scale_arr; Eigen::Array<T, Eigen::Dynamic, 1> new_scale = inv_std * scale_arr;
Eigen::Array<T, Eigen::Dynamic, 1> new_bias = Eigen::Array<T, Eigen::Dynamic, 1> new_bias =
bias_arr - mean_arr * inv_std * scale_arr; bias_arr - mean_arr * inv_std * scale_arr;
switch (param.data_layout) { switch (param.data_layout) {
case DATALAYOUT(kNCHW): { case DATALAYOUT(kNCHW): {
EigenArrayMap<T> y_arr(param.y->mutable_data<T>(), sample_size, N * C); EigenArrayMap<T> y_arr(
ConstEigenArrayMap<T> x_arr(x->data<T>(), sample_size, N * C); param.y->template mutable_data<T>(), sample_size, N * C);
ConstEigenArrayMap<T> x_arr(x->template data<T>(), sample_size, N * C);
for (int nc = 0; nc < N * C; ++nc) { for (int nc = 0; nc < N * C; ++nc) {
y_arr.col(nc) = x_arr.col(nc) * new_scale(nc % C) + new_bias(nc % C); y_arr.col(nc) = x_arr.col(nc) * new_scale(nc % C) + new_bias(nc % C);
} }
......
...@@ -47,7 +47,7 @@ class ConcatCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -47,7 +47,7 @@ class ConcatCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
int64_t axis = static_cast<int64_t>(param.axis); int64_t axis = static_cast<int64_t>(param.axis);
auto* axis_tensor = param.axis_tensor; auto* axis_tensor = param.axis_tensor;
if (axis_tensor != nullptr) { if (axis_tensor != nullptr) {
auto* axis_tensor_data = axis_tensor->data<int>(); auto* axis_tensor_data = axis_tensor->template data<int>();
axis = static_cast<int64_t>(axis_tensor_data[0]); axis = static_cast<int64_t>(axis_tensor_data[0]);
} }
...@@ -60,7 +60,7 @@ class ConcatCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -60,7 +60,7 @@ class ConcatCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
int concat_input_size = count(axis + 1, x_dims.size(), x_dims); int concat_input_size = count(axis + 1, x_dims.size(), x_dims);
const int top_concat_axis = out->dims()[axis]; const int top_concat_axis = out->dims()[axis];
for (size_t i = 0; i < param.x.size(); ++i) { for (size_t i = 0; i < param.x.size(); ++i) {
const T* bottom_data = param.x[i]->data<T>(); const T* bottom_data = param.x[i]->template data<T>();
const int64_t bottom_concat_axis = param.x[i]->dims()[axis]; const int64_t bottom_concat_axis = param.x[i]->dims()[axis];
for (int n = 0; n < num_concat; ++n) { for (int n = 0; n < num_concat; ++n) {
std::memcpy( std::memcpy(
......
...@@ -52,7 +52,7 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -52,7 +52,7 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto& context = ctx_->As<X86Context>(); auto& context = ctx_->As<X86Context>();
auto& param = *param_.get_mutable<operators::ConvParam>(); auto& param = *param_.get_mutable<operators::ConvParam>();
lite::Tensor filter = *param.filter; lite::Tensor filter = *param.filter;
param.output->mutable_data<T>(); param.output->template mutable_data<T>();
const int batch_size = static_cast<int>(param.x->dims()[0]); const int batch_size = static_cast<int>(param.x->dims()[0]);
std::vector<int64_t> filter_shape_vec(filter.dims().Vectorize()); std::vector<int64_t> filter_shape_vec(filter.dims().Vectorize());
...@@ -95,9 +95,9 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -95,9 +95,9 @@ class Conv2dCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto blas = auto blas =
paddle::lite::x86::math::GetBlas<lite::TargetType::kX86, T>(context); paddle::lite::x86::math::GetBlas<lite::TargetType::kX86, T>(context);
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
lite::Tensor in_batch = param.x->Slice<T>(i, i + 1); lite::Tensor in_batch = param.x->template Slice<T>(i, i + 1);
in_batch.Resize(input_shape); in_batch.Resize(input_shape);
lite::Tensor out_batch = param.output->Slice<T>(i, i + 1); lite::Tensor out_batch = param.output->template Slice<T>(i, i + 1);
out_batch.Resize(output_matrix_shape); out_batch.Resize(output_matrix_shape);
for (int g = 0; g < param.groups; g++) { for (int g = 0; g < param.groups; g++) {
lite::Tensor in_slice = lite::Tensor in_slice =
......
...@@ -38,10 +38,10 @@ class DropoutCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -38,10 +38,10 @@ class DropoutCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
using param_t = operators::DropoutParam; using param_t = operators::DropoutParam;
void Run() override { void Run() override {
auto& param = *param_.get_mutable<operators::DropoutParam>(); auto& param = *param_.get_mutable<operators::DropoutParam>();
const auto* x_data = param.x->data<T>(); const auto* x_data = param.x->template data<T>();
auto* out_data = param.output->mutable_data<T>(); auto* out_data = param.output->template mutable_data<T>();
if (!param.is_test) { if (!param.is_test) {
auto* mask_data = param.mask->mutable_data<T>(); auto* mask_data = param.mask->template mutable_data<T>();
std::random_device rnd; std::random_device rnd;
std::minstd_rand engine; std::minstd_rand engine;
int seed = param.fix_seed ? param.seed : rnd(); int seed = param.fix_seed ? param.seed : rnd();
......
...@@ -248,8 +248,8 @@ class TransformFunctor { ...@@ -248,8 +248,8 @@ class TransformFunctor {
lite::Tensor *z, lite::Tensor *z,
const lite::Context<Target> &ctx, const lite::Context<Target> &ctx,
Functor func) Functor func)
: x_(x->data<T>()), : x_(x->template data<T>()),
y_(y->data<T>()), y_(y->template data<T>()),
z_(z->mutable_data<OutType>()), z_(z->mutable_data<OutType>()),
nx_(x->numel()), nx_(x->numel()),
ctx_(ctx), ctx_(ctx),
...@@ -483,9 +483,10 @@ void FusedElemwiseAndActComputeNoBroadcast(const lite::Context<Target> &ctx, ...@@ -483,9 +483,10 @@ void FusedElemwiseAndActComputeNoBroadcast(const lite::Context<Target> &ctx,
x.data<T>(), x.data<T>(),
y.data<T>(), y.data<T>(),
compound_functor, compound_functor,
out->mutable_data<T>(), out->template mutable_data<T>(),
intermediate_out == nullptr ? nullptr intermediate_out == nullptr
: intermediate_out->mutable_data<T>()}); ? nullptr
: intermediate_out->template mutable_data<T>()});
} }
template <lite::TargetType Target, template <lite::TargetType Target,
...@@ -523,9 +524,10 @@ void FusedElemwiseAndActComputeWithBroadcast(const lite::Context<Target> &ctx, ...@@ -523,9 +524,10 @@ void FusedElemwiseAndActComputeWithBroadcast(const lite::Context<Target> &ctx,
compound_functor, compound_functor,
h, h,
w, w,
out->mutable_data<T>(), out->template mutable_data<T>(),
intermediate_out == nullptr ? nullptr intermediate_out == nullptr
: intermediate_out->mutable_data<T>()); ? nullptr
: intermediate_out->template mutable_data<T>());
} else { } else {
FusedElemwiseAndActBroadcast2CPU<T, FusedElemwiseAndActBroadcast2CPU<T,
...@@ -539,9 +541,10 @@ void FusedElemwiseAndActComputeWithBroadcast(const lite::Context<Target> &ctx, ...@@ -539,9 +541,10 @@ void FusedElemwiseAndActComputeWithBroadcast(const lite::Context<Target> &ctx,
n, n,
post, post,
compound_functor, compound_functor,
out->mutable_data<T>(), out->template mutable_data<T>(),
intermediate_out == nullptr ? nullptr intermediate_out == nullptr
: intermediate_out->mutable_data<T>()); ? nullptr
: intermediate_out->template mutable_data<T>());
} }
} }
......
...@@ -140,9 +140,9 @@ class FcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -140,9 +140,9 @@ class FcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
int M = output->dims().production() / w_dims1; int M = output->dims().production() / w_dims1;
const T* input_data = input->data<T>(); const T* input_data = input->template data<T>();
const T* w_data = w->data<T>(); const T* w_data = w->template data<T>();
T* output_data = output->mutable_data<T>(); T* output_data = output->template mutable_data<T>();
auto& context = ctx_->As<X86Context>(); auto& context = ctx_->As<X86Context>();
FCFunctor<lite::TargetType::kX86, T> fc; FCFunctor<lite::TargetType::kX86, T> fc;
...@@ -153,7 +153,7 @@ class FcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -153,7 +153,7 @@ class FcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
input_data, input_data,
w_data, w_data,
output_data, output_data,
bias ? bias->data<T>() : NULL, bias ? bias->template data<T>() : NULL,
with_relu, with_relu,
padding_weights); padding_weights);
} }
......
...@@ -42,9 +42,9 @@ class FillConstantBatchSizeLikeCompute ...@@ -42,9 +42,9 @@ class FillConstantBatchSizeLikeCompute
int output_dim_idx = param.output_dim_idx; int output_dim_idx = param.output_dim_idx;
odims[output_dim_idx] = static_cast<int>(in->lod().back().size()) - 1; odims[output_dim_idx] = static_cast<int>(in->lod().back().size()) - 1;
out->Resize(odims); out->Resize(odims);
// out->mutable_data<T>(); // out->template mutable_data<T>();
} }
out->mutable_data<T>(); out->template mutable_data<T>();
auto value = param.value; auto value = param.value;
paddle::lite::x86::math::SetConstant<lite::TargetType::kX86, T> setter; paddle::lite::x86::math::SetConstant<lite::TargetType::kX86, T> setter;
......
...@@ -50,9 +50,9 @@ void CPUGather(const lite::Tensor* src, ...@@ -50,9 +50,9 @@ void CPUGather(const lite::Tensor* src,
auto src_dims = src->dims(); auto src_dims = src->dims();
const T* p_src = src->data<T>(); const T* p_src = src->template data<T>();
const IndexT* p_index = index->data<IndexT>(); const IndexT* p_index = index->data<IndexT>();
T* p_output = output->mutable_data<T>(); T* p_output = output->template mutable_data<T>();
// slice size // slice size
int slice_size = 1; int slice_size = 1;
...@@ -77,7 +77,7 @@ class GatherCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -77,7 +77,7 @@ class GatherCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto index = param.Index; auto index = param.Index;
auto out = param.Out; auto out = param.Out;
out->mutable_data<T>(); out->template mutable_data<T>();
if (x->dims().production() == 0) return; if (x->dims().production() == 0) return;
/* /*
* Since there's no type defined for lite::Tensor in Paddle-Lite, then * Since there's no type defined for lite::Tensor in Paddle-Lite, then
......
...@@ -44,7 +44,7 @@ inline void ReorderInitState(const lite::Context<TARGET(kX86)>& context, ...@@ -44,7 +44,7 @@ inline void ReorderInitState(const lite::Context<TARGET(kX86)>& context,
bool indexed_src) { bool indexed_src) {
lite::x86::math::CopyMatrixRowsFunctor<TARGET(kX86), T> row_shuffle; lite::x86::math::CopyMatrixRowsFunctor<TARGET(kX86), T> row_shuffle;
dst->Resize(src.dims()); dst->Resize(src.dims());
dst->mutable_data<T>(); dst->template mutable_data<T>();
row_shuffle(context, src, index_lod, dst, indexed_src); row_shuffle(context, src, index_lod, dst, indexed_src);
} }
...@@ -65,18 +65,19 @@ class GRUCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -65,18 +65,19 @@ class GRUCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto* input = param.input; auto* input = param.input;
auto* h0 = param.h0; auto* h0 = param.h0;
auto* weight = param.weight; auto* weight = param.weight;
const T* weight_data = weight->data<T>(); const T* weight_data = weight->template data<T>();
auto* bias = param.bias; auto* bias = param.bias;
auto* batch_gate = param.batch_gate; auto* batch_gate = param.batch_gate;
auto* batch_reset_hidden_prev = param.batch_reset_hidden_prev; auto* batch_reset_hidden_prev = param.batch_reset_hidden_prev;
auto* batch_hidden = param.batch_hidden; auto* batch_hidden = param.batch_hidden;
T* batch_gate_ptr = batch_gate->mutable_data<T>(); T* batch_gate_ptr = batch_gate->template mutable_data<T>();
T* batch_reset_hidden_prev_ptr = batch_reset_hidden_prev->mutable_data<T>(); T* batch_reset_hidden_prev_ptr =
T* batch_hidden_ptr = batch_hidden->mutable_data<T>(); batch_reset_hidden_prev->template mutable_data<T>();
T* batch_hidden_ptr = batch_hidden->template mutable_data<T>();
auto* hidden = param.hidden; auto* hidden = param.hidden;
hidden->mutable_data<T>(); hidden->template mutable_data<T>();
const auto& hidden_dims = hidden->dims(); const auto& hidden_dims = hidden->dims();
...@@ -99,7 +100,7 @@ class GRUCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -99,7 +100,7 @@ class GRUCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
// Since the batch computing for GRU reorders the input sequences // Since the batch computing for GRU reorders the input sequences
// according to their length. The initialized cell state also needs // according to their length. The initialized cell state also needs
// to reorder. // to reorder.
const std::vector<size_t>& order(batch_gate->lod()[2]); const std::vector<uint64_t>& order(batch_gate->lod()[2]);
ReorderInitState<T>(context, *h0, order, &ordered_h0, true); ReorderInitState<T>(context, *h0, order, &ordered_h0, true);
gru_value.prev_out_value = ordered_h0.mutable_data<T>(); gru_value.prev_out_value = ordered_h0.mutable_data<T>();
} else { } else {
......
...@@ -47,9 +47,9 @@ class LayerNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -47,9 +47,9 @@ class LayerNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto x_dims = x->dims(); auto x_dims = x->dims();
y->mutable_data<T>(); y->template mutable_data<T>();
Mean->mutable_data<T>(); Mean->template mutable_data<T>();
Var->mutable_data<T>(); Var->template mutable_data<T>();
auto matrix_dim = x_dims.Flatten2D(begin_norm_axis); auto matrix_dim = x_dims.Flatten2D(begin_norm_axis);
int left = static_cast<int>(matrix_dim[0]); int left = static_cast<int>(matrix_dim[0]);
...@@ -73,10 +73,10 @@ class LayerNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -73,10 +73,10 @@ class LayerNormCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
.At(right); .At(right);
ker(in.mutable_data<T>(), ker(in.mutable_data<T>(),
out.mutable_data<T>(), out.mutable_data<T>(),
Mean->mutable_data<T>(), Mean->template mutable_data<T>(),
Var->mutable_data<T>(), Var->template mutable_data<T>(),
Scale->data<T>(), Scale->template data<T>(),
Bias->data<T>(), Bias->template data<T>(),
static_cast<int>(left), static_cast<int>(left),
epsilon, epsilon,
right); right);
......
...@@ -33,15 +33,15 @@ class LookupTableCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -33,15 +33,15 @@ class LookupTableCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto *ids_t = param.Ids; auto *ids_t = param.Ids;
auto *output_t = param.Out; auto *output_t = param.Out;
int64_t padding_idx = param.padding_idx; int64_t padding_idx = param.padding_idx;
const int64_t *ids = ids_t->data<int64_t>(); const int64_t *ids = ids_t->template data<int64_t>();
int64_t ids_numel = ids_t->dims().production(); int64_t ids_numel = ids_t->dims().production();
auto *table_t = param.W; auto *table_t = param.W;
int64_t row_number = table_t->dims()[0]; int64_t row_number = table_t->dims()[0];
int64_t row_width = table_t->dims()[1]; int64_t row_width = table_t->dims()[1];
const T *table = table_t->data<T>(); const T *table = table_t->template data<T>();
T *output = output_t->mutable_data<T>(); T *output = output_t->template mutable_data<T>();
memset(output, 0, output_t->dims().production() * sizeof(T)); memset(output, 0, output_t->dims().production() * sizeof(T));
for (int64_t i = 0; i < ids_numel; ++i) { for (int64_t i = 0; i < ids_numel; ++i) {
if (padding_idx != -1 && ids[i] == padding_idx) { if (padding_idx != -1 && ids[i] == padding_idx) {
......
...@@ -35,7 +35,7 @@ void MatchMatrixTensorCompute<T>::Run() { ...@@ -35,7 +35,7 @@ void MatchMatrixTensorCompute<T>::Run() {
const auto& offset_l = x->lod()[0]; const auto& offset_l = x->lod()[0];
const auto& offset_r = y->lod()[0]; const auto& offset_r = y->lod()[0];
std::vector<size_t> top_offset; std::vector<uint64_t> top_offset;
int top_size = 0; int top_size = 0;
top_offset.push_back(top_size); top_offset.push_back(top_size);
for (size_t b = 0; b < x->lod()[0].size() - 1; b++) { for (size_t b = 0; b < x->lod()[0].size() - 1; b++) {
...@@ -97,9 +97,9 @@ void MatchMatrixTensorCompute<T>::Run() { ...@@ -97,9 +97,9 @@ void MatchMatrixTensorCompute<T>::Run() {
int batch_size = x->lod()[0].size() - 1; int batch_size = x->lod()[0].size() - 1;
int lod_lv1_size = batch_size * dim_t; int lod_lv1_size = batch_size * dim_t;
int lod_lv2_size = x->lod()[0].back() * dim_t; int lod_lv2_size = x->lod()[0].back() * dim_t;
std::vector<size_t> out_lod0(batch_size + 1, 0); std::vector<uint64_t> out_lod0(batch_size + 1, 0);
std::vector<size_t> out_lod1(lod_lv1_size + 1, 0); std::vector<uint64_t> out_lod1(lod_lv1_size + 1, 0);
std::vector<size_t> out_lod2(lod_lv2_size + 1, 0); std::vector<uint64_t> out_lod2(lod_lv2_size + 1, 0);
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
out_lod0[i + 1] = out_lod0[i] + dim_t; out_lod0[i + 1] = out_lod0[i] + dim_t;
int len_l = offset_l[i + 1] - offset_l[i]; int len_l = offset_l[i + 1] - offset_l[i];
......
...@@ -56,7 +56,7 @@ class MatMulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -56,7 +56,7 @@ class MatMulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto *x = param.X; auto *x = param.X;
auto *y = param.Y; auto *y = param.Y;
auto *out = param.Out; auto *out = param.Out;
out->mutable_data<T>(); out->template mutable_data<T>();
auto blas = lite::x86::math::GetBlas<lite::TargetType::kX86, T>(context); auto blas = lite::x86::math::GetBlas<lite::TargetType::kX86, T>(context);
auto mat_dim_a = lite::x86::math::CreateMatrixDescriptor( auto mat_dim_a = lite::x86::math::CreateMatrixDescriptor(
......
...@@ -64,7 +64,7 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -64,7 +64,7 @@ class MulCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
y_matrix = *y; y_matrix = *y;
} }
z->mutable_data<T>(); z->template mutable_data<T>();
auto z_dim = z->dims(); auto z_dim = z->dims();
if (z_dim.size() != 2) { if (z_dim.size() != 2) {
z->Resize({x_matrix.dims()[0], y_matrix.dims()[1]}); z->Resize({x_matrix.dims()[0], y_matrix.dims()[1]});
......
...@@ -49,7 +49,7 @@ class ReduceSumCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -49,7 +49,7 @@ class ReduceSumCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
bool reduce_all = param.reduce_all; bool reduce_all = param.reduce_all;
auto* input = param.x; auto* input = param.x;
auto* output = param.output; auto* output = param.output;
param.output->mutable_data<T>(); param.output->template mutable_data<T>();
const auto& dims = param.dim; const auto& dims = param.dim;
bool keep_dim = param.keep_dim; bool keep_dim = param.keep_dim;
......
...@@ -41,8 +41,8 @@ class ScaleCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -41,8 +41,8 @@ class ScaleCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void Run() override { void Run() override {
auto& param = *param_.get_mutable<param_t>(); auto& param = *param_.get_mutable<param_t>();
scale_compute(param.x->data<T>(), scale_compute(param.x->template data<T>(),
param.output->mutable_data<T>(), param.output->template mutable_data<T>(),
param.x->dims().production(), param.x->dims().production(),
param.scale, param.scale,
param.bias, param.bias,
......
...@@ -84,7 +84,7 @@ void SearchGrnnCompute<T>::PrepareLayout(const Tensor* input_blob) { ...@@ -84,7 +84,7 @@ void SearchGrnnCompute<T>::PrepareLayout(const Tensor* input_blob) {
int max_width = width_data[idx_sorted_by_width_data[0]]; int max_width = width_data[idx_sorted_by_width_data[0]];
// start of reorganizing the input // start of reorganizing the input
std::vector<size_t> new_offset; std::vector<uint64_t> new_offset;
new_offset.resize(max_width + 1); new_offset.resize(max_width + 1);
new_offset[0] = 0; new_offset[0] = 0;
......
...@@ -50,7 +50,7 @@ class SearchGroupPaddingCompute ...@@ -50,7 +50,7 @@ class SearchGroupPaddingCompute
} }
} }
std::vector<size_t> new_offset; std::vector<uint64_t> new_offset;
new_offset.resize(batch + 1); new_offset.resize(batch + 1);
for (int i = 0; i < batch + 1; ++i) { for (int i = 0; i < batch + 1; ++i) {
new_offset[i] = i * max_seq; new_offset[i] = i * max_seq;
...@@ -67,7 +67,7 @@ class SearchGroupPaddingCompute ...@@ -67,7 +67,7 @@ class SearchGroupPaddingCompute
top1_lod.push_back(offset); top1_lod.push_back(offset);
top1->set_lod(top1_lod); top1->set_lod(top1_lod);
top1->Resize({dim0, 1}); top1->Resize({dim0, 1});
memset(top1->mutable_data<T>(), memset(top1->template mutable_data<T>(),
0, 0,
top1->dims()[0] * top1->dims()[1] * sizeof(T)); top1->dims()[0] * top1->dims()[1] * sizeof(T));
// for padding input id // for padding input id
...@@ -76,9 +76,9 @@ class SearchGroupPaddingCompute ...@@ -76,9 +76,9 @@ class SearchGroupPaddingCompute
top2->set_lod(top2_lod); top2->set_lod(top2_lod);
top2->Resize({batch * max_seq, 1}); top2->Resize({batch * max_seq, 1});
// copy data // copy data
const auto* bottom_data = bottom0->data<T>(); const auto* bottom_data = bottom0->template data<T>();
auto* top_data = top0->mutable_data<T>(); auto* top_data = top0->template mutable_data<T>();
auto* top_padding_input_data = top2->mutable_data<T>(); auto* top_padding_input_data = top2->template mutable_data<T>();
for (int i = 0; i < batch; i++) { for (int i = 0; i < batch; i++) {
const int copy_step = offset[i + 1] - offset[i]; const int copy_step = offset[i + 1] - offset[i];
const int start = i * max_seq; const int start = i * max_seq;
......
...@@ -58,8 +58,10 @@ class SearchSeqFcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -58,8 +58,10 @@ class SearchSeqFcCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
int M = x_dims[0]; int M = x_dims[0];
int N = w_dims[0]; int N = w_dims[0];
for (int i = 0; i < M; i++) { for (int i = 0; i < M; i++) {
blas.AXPY( blas.AXPY(N,
N, static_cast<T>(1), b->data<T>(), out->mutable_data<T>() + i * N); static_cast<T>(1),
b->template data<T>(),
out->template mutable_data<T>() + i * N);
} }
} }
} }
......
...@@ -39,9 +39,9 @@ class SequenceArithmeticCompute ...@@ -39,9 +39,9 @@ class SequenceArithmeticCompute
out->Resize(x->dims()); out->Resize(x->dims());
out->set_lod(x->lod()); out->set_lod(x->lod());
auto x_data = x->data<T>(); auto x_data = x->template data<T>();
auto y_data = y->data<T>(); auto y_data = y->template data<T>();
auto out_data = out->mutable_data<T>(); auto out_data = out->template mutable_data<T>();
auto x_seq_offset = x->lod()[0]; auto x_seq_offset = x->lod()[0];
auto y_seq_offset = y->lod()[0]; auto y_seq_offset = y->lod()[0];
int seq_num = x_seq_offset.size() - 1; int seq_num = x_seq_offset.size() - 1;
......
...@@ -25,7 +25,7 @@ namespace x86 { ...@@ -25,7 +25,7 @@ namespace x86 {
template <typename T> template <typename T>
inline LoD ConcatLoD(const std::vector<lite::Tensor*>& xs, inline LoD ConcatLoD(const std::vector<lite::Tensor*>& xs,
std::vector<lite::Tensor>* xs_in_order) { std::vector<lite::Tensor>* xs_in_order) {
std::vector<size_t> result; std::vector<uint64_t> result;
result.resize(xs[0]->lod()[0].size()); result.resize(xs[0]->lod()[0].size());
for (size_t i = 1; i < result.size(); ++i) { for (size_t i = 1; i < result.size(); ++i) {
...@@ -75,7 +75,7 @@ class SequenceConcatCompute ...@@ -75,7 +75,7 @@ class SequenceConcatCompute
out_dims[0] = batch_size; out_dims[0] = batch_size;
param.Out->Resize(out_dims); param.Out->Resize(out_dims);
T* dout = param.Out->mutable_data<T>(); T* dout = param.Out->template mutable_data<T>();
std::vector<lite::Tensor> x_in_order; std::vector<lite::Tensor> x_in_order;
param.Out->set_lod(ConcatLoD<T>(param.X, &x_in_order)); param.Out->set_lod(ConcatLoD<T>(param.X, &x_in_order));
......
...@@ -26,7 +26,7 @@ namespace x86 { ...@@ -26,7 +26,7 @@ namespace x86 {
namespace { namespace {
inline LoD ConcatLoD(const std::vector<lite::Tensor*>& xs, inline LoD ConcatLoD(const std::vector<lite::Tensor*>& xs,
std::vector<lite::Tensor>* xs_in_order) { std::vector<lite::Tensor>* xs_in_order) {
std::vector<size_t> result; std::vector<uint64_t> result;
result.resize(xs[0]->lod()[0].size()); result.resize(xs[0]->lod()[0].size());
for (size_t i = 1; i < result.size(); ++i) { for (size_t i = 1; i < result.size(); ++i) {
......
...@@ -29,9 +29,10 @@ using Tensor = lite::Tensor; ...@@ -29,9 +29,10 @@ using Tensor = lite::Tensor;
template <typename T> template <typename T>
struct SequenceExpandFunctor { struct SequenceExpandFunctor {
void operator()(const Tensor &x, void operator()(
const std::vector<size_t> &ref_lod, /*expand referenced lod*/ const Tensor &x,
Tensor *out) { const std::vector<uint64_t> &ref_lod, /*expand referenced lod*/
Tensor *out) {
int64_t hight = x.dims()[0]; int64_t hight = x.dims()[0];
int64_t width = x.data_size() / hight; int64_t width = x.data_size() / hight;
...@@ -39,13 +40,13 @@ struct SequenceExpandFunctor { ...@@ -39,13 +40,13 @@ struct SequenceExpandFunctor {
T *out_data = out->mutable_data<T, T>(); T *out_data = out->mutable_data<T, T>();
for (int h_id = 0; h_id < hight; ++h_id) { for (int h_id = 0; h_id < hight; ++h_id) {
size_t span = ref_lod[h_id + 1] - ref_lod[h_id]; uint64_t span = ref_lod[h_id + 1] - ref_lod[h_id];
if (span == 0) continue; if (span == 0) continue;
const T *src = in_data + h_id * width; const T *src = in_data + h_id * width;
for (int64_t w_id = 0; w_id < width; ++w_id) { for (uint64_t w_id = 0; w_id < width; ++w_id) {
T ele = src[w_id]; T ele = src[w_id];
size_t offset = ref_lod[h_id] * width; size_t offset = ref_lod[h_id] * width;
for (size_t k = 0; k < span; ++k) { for (uint64_t k = 0; k < span; ++k) {
out_data[offset + k * width + w_id] = ele; out_data[offset + k * width + w_id] = ele;
} }
} }
...@@ -68,7 +69,7 @@ class SequenceExpandAsCompute ...@@ -68,7 +69,7 @@ class SequenceExpandAsCompute
CHECK_EQ(y_lod.size(), 1); CHECK_EQ(y_lod.size(), 1);
CHECK_GT(y_lod[0].size(), 1); CHECK_GT(y_lod[0].size(), 1);
out->mutable_data<T, T>(); out->template mutable_data<T, T>();
SequenceExpandFunctor<T> seq_espand_functor; SequenceExpandFunctor<T> seq_espand_functor;
seq_espand_functor(*x, y_lod[0], out); seq_espand_functor(*x, y_lod[0], out);
......
...@@ -40,7 +40,7 @@ class SequencePoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -40,7 +40,7 @@ class SequencePoolCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
dims[0] = lod[0].size() - 1; dims[0] = lod[0].size() - 1;
out->Resize({dims}); out->Resize({dims});
out->mutable_data<T>(); out->template mutable_data<T>();
lite::Tensor* index = nullptr; lite::Tensor* index = nullptr;
const bool is_test = true; const bool is_test = true;
......
...@@ -64,9 +64,9 @@ class SequenceReshapeCompute ...@@ -64,9 +64,9 @@ class SequenceReshapeCompute
out->Resize(std::vector<int64_t>{static_cast<int64_t>(out->lod()[0].back()), out->Resize(std::vector<int64_t>{static_cast<int64_t>(out->lod()[0].back()),
out_width}); out_width});
auto* dst_ptr = out->mutable_data<T>(); auto* dst_ptr = out->template mutable_data<T>();
auto size = in->numel() * sizeof(T); auto size = in->numel() * sizeof(T);
std::memcpy(dst_ptr, in->data<T>(), size); std::memcpy(dst_ptr, in->template data<T>(), size);
} }
virtual ~SequenceReshapeCompute() = default; virtual ~SequenceReshapeCompute() = default;
......
...@@ -29,7 +29,7 @@ class ShapeCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -29,7 +29,7 @@ class ShapeCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
void Run() override { void Run() override {
auto& param = *param_.get_mutable<operators::ShapeParam>(); auto& param = *param_.get_mutable<operators::ShapeParam>();
// auto& context = context_->As<X86Context>(); // auto& context = context_->As<X86Context>();
auto out_data = param.Out->mutable_data<int32_t>(); auto out_data = param.Out->template mutable_data<int32_t>();
auto in_dims = param.X->dims(); auto in_dims = param.X->dims();
for (int i = 0; i < in_dims.size(); ++i) { for (int i = 0; i < in_dims.size(); ++i) {
out_data[i] = in_dims[i]; out_data[i] = in_dims[i];
......
...@@ -58,7 +58,7 @@ class SoftmaxCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -58,7 +58,7 @@ class SoftmaxCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto* x = param.x; auto* x = param.x;
auto* output = param.output; auto* output = param.output;
output->mutable_data<T>(); output->template mutable_data<T>();
const int rank = x->dims().size(); const int rank = x->dims().size();
const int axis = CanonicalAxis(param.axis, rank); const int axis = CanonicalAxis(param.axis, rank);
......
...@@ -35,8 +35,8 @@ class SqueezeCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -35,8 +35,8 @@ class SqueezeCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto x = param.X; auto x = param.X;
auto output = param.Out; auto output = param.Out;
auto x_dims = x->dims(); auto x_dims = x->dims();
auto* x_data = x->data<T>(); auto* x_data = x->template data<T>();
auto* out_data = output->mutable_data<T>(); auto* out_data = output->template mutable_data<T>();
memcpy(out_data, x_data, x_dims.production() * sizeof(T)); memcpy(out_data, x_data, x_dims.production() * sizeof(T));
} }
...@@ -54,9 +54,9 @@ class Squeeze2Compute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -54,9 +54,9 @@ class Squeeze2Compute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto output = param.Out; auto output = param.Out;
auto xshape = param.XShape; auto xshape = param.XShape;
auto x_dims = x->dims(); auto x_dims = x->dims();
auto* x_data = x->data<T>(); auto* x_data = x->template data<T>();
auto* out_data = output->mutable_data<T>(); auto* out_data = output->template mutable_data<T>();
auto* xshape_data = xshape->mutable_data<T>(); auto* xshape_data = xshape->template mutable_data<T>();
memcpy(out_data, x_data, x_dims.production() * sizeof(T)); memcpy(out_data, x_data, x_dims.production() * sizeof(T));
memcpy(xshape_data, x_data, x_dims.production() * sizeof(T)); memcpy(xshape_data, x_data, x_dims.production() * sizeof(T));
} }
......
...@@ -40,9 +40,9 @@ class StackCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -40,9 +40,9 @@ class StackCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
if (axis < 0) axis += (x[0]->dims().size() + 1); if (axis < 0) axis += (x[0]->dims().size() + 1);
int n = static_cast<int>(x.size()); int n = static_cast<int>(x.size());
auto y_data = y->mutable_data<T>(); auto y_data = y->template mutable_data<T>();
std::vector<const T*> x_datas(n); std::vector<const T*> x_datas(n);
for (int i = 0; i < n; ++i) x_datas[i] = x[i]->data<T>(); for (int i = 0; i < n; ++i) x_datas[i] = x[i]->template data<T>();
int pre = 1, post = 1; int pre = 1, post = 1;
auto dim = x[0]->dims(); auto dim = x[0]->dims();
......
...@@ -73,7 +73,7 @@ class TransposeCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -73,7 +73,7 @@ class TransposeCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto& param = *param_.get_mutable<param_t>(); auto& param = *param_.get_mutable<param_t>();
auto* x = param.x; auto* x = param.x;
auto* out = param.output; auto* out = param.output;
out->mutable_data<T>(); out->template mutable_data<T>();
int ndims = param.axis.size(); int ndims = param.axis.size();
auto& context = ctx_->As<X86Context>(); auto& context = ctx_->As<X86Context>();
TransCompute<lite::TargetType::kX86, T>( TransCompute<lite::TargetType::kX86, T>(
...@@ -92,7 +92,7 @@ class Transpose2Compute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -92,7 +92,7 @@ class Transpose2Compute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
auto& param = *param_.get_mutable<param_t>(); auto& param = *param_.get_mutable<param_t>();
auto* x = param.x; auto* x = param.x;
auto* out = param.output; auto* out = param.output;
out->mutable_data<T>(); out->template mutable_data<T>();
int ndims = param.axis.size(); int ndims = param.axis.size();
auto& context = ctx_->As<X86Context>(); auto& context = ctx_->As<X86Context>();
TransCompute<lite::TargetType::kX86, T>( TransCompute<lite::TargetType::kX86, T>(
......
...@@ -34,8 +34,8 @@ class UniformRandomCompute ...@@ -34,8 +34,8 @@ class UniformRandomCompute
auto *param_out = &param.Out->raw_tensor(); auto *param_out = &param.Out->raw_tensor();
T *data = T *data = param_out->template mutable_data<T>(
param_out->mutable_data<T>(context.x86_device_context()->GetPlace()); context.x86_device_context()->GetPlace());
unsigned int seed = static_cast<unsigned int>(param.seed); unsigned int seed = static_cast<unsigned int>(param.seed);
std::minstd_rand engine; std::minstd_rand engine;
......
...@@ -80,7 +80,7 @@ class VarConv2DCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -80,7 +80,7 @@ class VarConv2DCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
std::vector<int64_t> col_dims_vec{top_size}; std::vector<int64_t> col_dims_vec{top_size};
col_dims_vec.push_back(1); col_dims_vec.push_back(1);
col->Resize(col_dims_vec); col->Resize(col_dims_vec);
auto* top_data = col->mutable_data<T>(); auto* top_data = col->template mutable_data<T>();
const auto* bottom_data = input.data<T>(); const auto* bottom_data = input.data<T>();
int kernel_win_size = kernel_h * kernel_w; int kernel_win_size = kernel_h * kernel_w;
...@@ -149,7 +149,7 @@ class VarConv2DCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -149,7 +149,7 @@ class VarConv2DCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
// const auto& offset_y = in_row->lod()[0]; // const auto& offset_y = in_row->lod()[0];
const auto& offset_y = param.X->lod()[1]; const auto& offset_y = param.X->lod()[1];
const auto& offset_x = param.X->lod()[2]; const auto& offset_x = param.X->lod()[2];
std::vector<size_t> top_offset; std::vector<uint64_t> top_offset;
int top_size = 0; int top_size = 0;
top_offset.push_back(top_size); top_offset.push_back(top_size);
for (int b = 0; b < batch; ++b) { for (int b = 0; b < batch; ++b) {
...@@ -178,9 +178,9 @@ class VarConv2DCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> { ...@@ -178,9 +178,9 @@ class VarConv2DCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
std::vector<int64_t> top_dims_vec{top_size}; std::vector<int64_t> top_dims_vec{top_size};
top_dims_vec.push_back(1); top_dims_vec.push_back(1);
top->Resize(top_dims_vec); top->Resize(top_dims_vec);
auto* top_data = top->mutable_data<T>(); auto* top_data = top->template mutable_data<T>();
const auto* w_data = w->data<T>(); const auto* w_data = w->template data<T>();
const auto* col_data = col->data<T>(); const auto* col_data = col->template data<T>();
auto blas = lite::x86::math::GetBlas<lite::TargetType::kX86, T>(context); auto blas = lite::x86::math::GetBlas<lite::TargetType::kX86, T>(context);
for (int b = 0; b < batch; ++b) { for (int b = 0; b < batch; ++b) {
......
...@@ -140,7 +140,7 @@ static void var_conv_2d_ref(const lite::Tensor* bottom, ...@@ -140,7 +140,7 @@ static void var_conv_2d_ref(const lite::Tensor* bottom,
const auto& col_offset = col->lod()[0]; const auto& col_offset = col->lod()[0];
const auto& offset_x = in_col->lod()[0]; const auto& offset_x = in_col->lod()[0];
const auto& offset_y = in_row->lod()[0]; const auto& offset_y = in_row->lod()[0];
std::vector<size_t> top_offset; std::vector<uint64_t> top_offset;
int top_size = 0; int top_size = 0;
top_offset.push_back(top_size); top_offset.push_back(top_size);
for (int b = 0; b < batch; ++b) { for (int b = 0; b < batch; ++b) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册