提交 1df40eed 编写于 作者: E eclipsess

add tensor mutable and lod

上级 abc1eaf3
...@@ -35,7 +35,7 @@ template <> ...@@ -35,7 +35,7 @@ template <>
void Im2SequenceKernel<CPU, float>::Compute( void Im2SequenceKernel<CPU, float>::Compute(
const Im2SequenceParam<CPU> &param) const { const Im2SequenceParam<CPU> &param) const {
const Tensor *in_x = param.Input(); const Tensor *in_x = param.Input();
Tensor *out = param.Output(); framework::LoDTensor *out = param.Output();
out->mutable_data<float>(); out->mutable_data<float>();
std::vector<int> kernels = param.Kernels(); std::vector<int> kernels = param.Kernels();
...@@ -52,22 +52,31 @@ void Im2SequenceKernel<CPU, float>::Compute( ...@@ -52,22 +52,31 @@ void Im2SequenceKernel<CPU, float>::Compute(
paddings[2], strides[0]); paddings[2], strides[0]);
int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1], int output_width = Im2SeqOutputSize(img_width, kernels[1], paddings[1],
paddings[3], strides[1]); paddings[3], strides[1]);
const std::vector<int> dilations({1, 1});
out->mutable_data<float>({batch_size * output_height * output_width,
img_channels * kernels[0] * kernels[1]});
const std::vector<int> dilations({1, 1});
// TODO: verify // TODO: verify
auto out_dims = out->dims(); auto out_dims = out->dims();
out->Resize({batch_size, out->numel() / batch_size}); out->Resize({batch_size, out->numel() / batch_size});
for (int i = 0; i < batch_size; i++) { for (int i = 0; i < batch_size; i++) {
const Tensor src = const Tensor src =
in_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width}); in_x->Slice(i, i + 1).Resize({img_channels, img_height, img_width});
Tensor dst = out->Slice(i, i + 1).Resize( Tensor dst = out->Slice(i, i + 1).Resize(
{output_height, output_width, img_channels, kernels[0], kernels[1]}); {output_height, output_width, img_channels, kernels[0], kernels[1]});
math::Im2ColFunctor<math::ColFormat::kOCF, CPU, float> f; math::Im2ColFunctor<math::ColFormat::kOCF, CPU, float> f;
f(src, dilations, strides, paddings, &dst); f(src, dilations, strides, paddings, &dst);
} }
out->Resize(out_dims); out->Resize(out_dims);
framework::LoD lod(1);
lod[0].reserve(batch_size + 1);
int offset = 0;
lod[0].push_back(offset);
for (int i = 0; i < batch_size; ++i) {
offset += output_height * output_width;
lod[0].push_back(offset);
}
out->set_lod(lod);
} }
template class Im2SequenceKernel<CPU, float>; template class Im2SequenceKernel<CPU, float>;
......
...@@ -27,9 +27,9 @@ int main() { ...@@ -27,9 +27,9 @@ int main() {
true)) { true)) {
auto time2 = time(); auto time2 = time();
std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl; std::cout << "load cost :" << time_diff(time1, time1) << "ms" << std::endl;
std::vector<int64_t> dims{1, 1, 48, 48}; std::vector<int64_t> dims{1, 1, 48, 512};
LoDTensor input_tensor; LoDTensor input_tensor;
SetupTensor<float>(&input_tensor, {1, 1, 48, 48}, static_cast<float>(0), SetupTensor<float>(&input_tensor, {1, 1, 48, 512}, static_cast<float>(0),
static_cast<float>(1)); static_cast<float>(1));
std::vector<float> input(input_tensor.data<float>(), std::vector<float> input(input_tensor.data<float>(),
...@@ -39,9 +39,9 @@ int main() { ...@@ -39,9 +39,9 @@ int main() {
paddle_mobile.PredictLod(input_tensor); paddle_mobile.PredictLod(input_tensor);
} }
auto time3 = time(); auto time3 = time();
// for (int i = 0; i < 10; ++i) { for (int i = 0; i < 1; ++i) {
// paddle_mobile.Predict(input, dims); paddle_mobile.PredictLod(input_tensor);
// } }
auto time4 = time(); auto time4 = time();
std::cout << "predict cost :" << time_diff(time3, time4) << "ms" std::cout << "predict cost :" << time_diff(time3, time4) << "ms"
<< std::endl; << std::endl;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册