diff --git a/paddle/fluid/inference/api/api_impl.cc b/paddle/fluid/inference/api/api_impl.cc index c57fc64bb6bfeebc7935f19d0e977e8fccd4c9a0..dca4386b21b4a064c21b52218682321258f368c4 100644 --- a/paddle/fluid/inference/api/api_impl.cc +++ b/paddle/fluid/inference/api/api_impl.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/fluid/framework/feed_fetch_method.h" #include "paddle/fluid/inference/api/api_impl.h" +#include "paddle/fluid/inference/api/helper.h" #include "paddle/fluid/inference/api/timer.h" #include "paddle/fluid/platform/profiler.h" @@ -215,57 +216,20 @@ bool NativePaddlePredictor::SetFeed(const std::vector &inputs, template void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch, PaddleTensor *output) { - std::vector shape; - auto dims_i = fetch.dims(); - auto lod = fetch.lod(); - const T *output_ptr = fetch.data(); - auto num = fetch.numel(); - std::vector data; - if (0 == lod.size()) { - std::copy(output_ptr, output_ptr + num, std::back_inserter(data)); - for (int j = 0; j < dims_i.size(); ++j) { - shape.push_back(dims_i[j]); - } - } else { - // for batch detection - // image[0] -> output[0] shape {145, 6} - // image[1] -> output[1] shape {176, 6} - // then, - // the batch output shape {321, 6} - // the lod {{0, 145, 321}} - // so we should append output[0] to {176, 6} - size_t max_dim = 0; - for (size_t j = 1; j < lod[0].size(); j++) { - max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]); - } - size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back(); - if (max_dim > 0) { - data.resize((lod[0].size() - 1) * max_dim * common_dim, 0); - } - for (size_t j = 1; j < lod[0].size(); j++) { - size_t start = lod[0][j - 1] * common_dim; - size_t end = lod[0][j] * common_dim; - if (end > start) { - std::copy(output_ptr + start, output_ptr + end, - data.begin() + (j - 1) * max_dim * common_dim); - } - } - shape.push_back(lod[0].size() - 1); - shape.push_back(max_dim); - for (int j = 1; j < dims_i.size(); ++j) { - shape.push_back(dims_i[j]); - } - } - - output->shape = shape; - auto &buffer = output->data; - if (buffer.empty() || buffer.length() < sizeof(T) * data.size()) { - buffer.Resize(sizeof(T) * data.size()); - } - std::memcpy(buffer.data(), data.data(), sizeof(T) * data.size()); - // copy LoD - for (const auto &level : fetch.lod()) { - output->lod.emplace_back(level); + // set shape. + auto shape = framework::vectorize(fetch.dims()); + output->shape.assign(shape.begin(), shape.end()); + // set data. + const T *data = fetch.data(); + int num_elems = inference::VecReduceToInt(shape); + output->data.Resize(num_elems * sizeof(T)); + // The fetched tensor output by fetch op, should always in CPU memory, so just + // copy. + memcpy(output->data.data(), data, num_elems * sizeof(T)); + // set lod + output->lod.clear(); + for (auto &level : fetch.lod()) { + output->lod.emplace_back(level.begin(), level.end()); } } diff --git a/paddle/fluid/inference/api/helper.h b/paddle/fluid/inference/api/helper.h index 8e359a67738c0df180933421b45f15b39fd0e78c..1fec2f96da0f9d978a3537b2d78e4ce5ef628c81 100644 --- a/paddle/fluid/inference/api/helper.h +++ b/paddle/fluid/inference/api/helper.h @@ -74,13 +74,17 @@ template <> std::string to_string>>( const std::vector>> &vec); +template +int VecReduceToInt(const std::vector &v) { + return std::accumulate(v.begin(), v.end(), 1, [](T a, T b) { return a * b; }); +} + template static void TensorAssignData(PaddleTensor *tensor, const std::vector> &data) { // Assign buffer - int dim = std::accumulate(tensor->shape.begin(), tensor->shape.end(), 1, - [](int a, int b) { return a * b; }); - tensor->data.Resize(sizeof(T) * dim); + int num_elems = VecReduceToInt(tensor->shape); + tensor->data.Resize(sizeof(T) * num_elems); int c = 0; for (const auto &f : data) { for (T v : f) { @@ -89,7 +93,7 @@ static void TensorAssignData(PaddleTensor *tensor, } } -std::string DescribeTensor(const PaddleTensor &tensor) { +static std::string DescribeTensor(const PaddleTensor &tensor) { std::stringstream os; os << "Tensor [" << tensor.name << "]\n"; os << " - type: "; @@ -113,8 +117,7 @@ std::string DescribeTensor(const PaddleTensor &tensor) { os << "\n"; os << " - data: "; - int dim = std::accumulate(tensor.shape.begin(), tensor.shape.end(), 1, - [](int a, int b) { return a * b; }); + int dim = VecReduceToInt(tensor.shape); for (int i = 0; i < dim; i++) { os << static_cast(tensor.data.data())[i] << " "; } @@ -122,8 +125,8 @@ std::string DescribeTensor(const PaddleTensor &tensor) { return os.str(); } -void PrintTime(int batch_size, int repeat, int num_threads, int tid, - double latency, int epoch = 1) { +static void PrintTime(int batch_size, int repeat, int num_threads, int tid, + double latency, int epoch = 1) { LOG(INFO) << "====== batch_size: " << batch_size << ", repeat: " << repeat << ", threads: " << num_threads << ", thread id: " << tid << ", latency: " << latency << "ms ======"; diff --git a/paddle/fluid/inference/tests/api/tester_helper.h b/paddle/fluid/inference/tests/api/tester_helper.h index 7189df775227680726a9d4840386280c5ad44c23..9fcb5129d268a7730c11e5910077ad233050484e 100644 --- a/paddle/fluid/inference/tests/api/tester_helper.h +++ b/paddle/fluid/inference/tests/api/tester_helper.h @@ -47,11 +47,8 @@ void CompareResult(const std::vector &outputs, for (size_t i = 0; i < outputs.size(); i++) { auto &out = outputs[i]; auto &ref_out = ref_outputs[i]; - size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1, - [](int a, int b) { return a * b; }); - size_t ref_size = - std::accumulate(ref_out.shape.begin(), ref_out.shape.end(), 1, - [](int a, int b) { return a * b; }); + size_t size = VecReduceToInt(out.shape); + size_t ref_size = VecReduceToInt(ref_out.shape); EXPECT_GT(size, 0); EXPECT_EQ(size, ref_size); EXPECT_EQ(out.dtype, ref_out.dtype); @@ -87,10 +84,7 @@ std::unique_ptr CreateTestPredictor( } } -size_t GetSize(const PaddleTensor &out) { - return std::accumulate(out.shape.begin(), out.shape.end(), 1, - [](int a, int b) { return a * b; }); -} +size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); } std::unordered_map GetFuseStatis(AnalysisConfig config, int *num_ops) {