未验证 提交 e426cdae 编写于 作者: Y Yan Chunwei 提交者: GitHub

fix inference output with lod (#13557)

上级 bc1fa4fd
......@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/timer.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -215,57 +216,20 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
template <typename T>
void NativePaddlePredictor::GetFetchOne(const framework::LoDTensor &fetch,
PaddleTensor *output) {
std::vector<int> shape;
auto dims_i = fetch.dims();
auto lod = fetch.lod();
const T *output_ptr = fetch.data<T>();
auto num = fetch.numel();
std::vector<T> data;
if (0 == lod.size()) {
std::copy(output_ptr, output_ptr + num, std::back_inserter(data));
for (int j = 0; j < dims_i.size(); ++j) {
shape.push_back(dims_i[j]);
}
} else {
// for batch detection
// image[0] -> output[0] shape {145, 6}
// image[1] -> output[1] shape {176, 6}
// then,
// the batch output shape {321, 6}
// the lod {{0, 145, 321}}
// so we should append output[0] to {176, 6}
size_t max_dim = 0;
for (size_t j = 1; j < lod[0].size(); j++) {
max_dim = std::max(max_dim, lod[0][j] - lod[0][j - 1]);
}
size_t common_dim = lod[0].back() == 0 ? 0 : num / lod[0].back();
if (max_dim > 0) {
data.resize((lod[0].size() - 1) * max_dim * common_dim, 0);
}
for (size_t j = 1; j < lod[0].size(); j++) {
size_t start = lod[0][j - 1] * common_dim;
size_t end = lod[0][j] * common_dim;
if (end > start) {
std::copy(output_ptr + start, output_ptr + end,
data.begin() + (j - 1) * max_dim * common_dim);
}
}
shape.push_back(lod[0].size() - 1);
shape.push_back(max_dim);
for (int j = 1; j < dims_i.size(); ++j) {
shape.push_back(dims_i[j]);
}
}
output->shape = shape;
auto &buffer = output->data;
if (buffer.empty() || buffer.length() < sizeof(T) * data.size()) {
buffer.Resize(sizeof(T) * data.size());
}
std::memcpy(buffer.data(), data.data(), sizeof(T) * data.size());
// copy LoD
for (const auto &level : fetch.lod()) {
output->lod.emplace_back(level);
// set shape.
auto shape = framework::vectorize(fetch.dims());
output->shape.assign(shape.begin(), shape.end());
// set data.
const T *data = fetch.data<T>();
int num_elems = inference::VecReduceToInt(shape);
output->data.Resize(num_elems * sizeof(T));
// The fetched tensor output by fetch op, should always in CPU memory, so just
// copy.
memcpy(output->data.data(), data, num_elems * sizeof(T));
// set lod
output->lod.clear();
for (auto &level : fetch.lod()) {
output->lod.emplace_back(level.begin(), level.end());
}
}
......
......@@ -74,13 +74,17 @@ template <>
std::string to_string<std::vector<std::vector<float>>>(
const std::vector<std::vector<std::vector<float>>> &vec);
template <typename T>
int VecReduceToInt(const std::vector<T> &v) {
return std::accumulate(v.begin(), v.end(), 1, [](T a, T b) { return a * b; });
}
template <typename T>
static void TensorAssignData(PaddleTensor *tensor,
const std::vector<std::vector<T>> &data) {
// Assign buffer
int dim = std::accumulate(tensor->shape.begin(), tensor->shape.end(), 1,
[](int a, int b) { return a * b; });
tensor->data.Resize(sizeof(T) * dim);
int num_elems = VecReduceToInt(tensor->shape);
tensor->data.Resize(sizeof(T) * num_elems);
int c = 0;
for (const auto &f : data) {
for (T v : f) {
......@@ -89,7 +93,7 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
std::string DescribeTensor(const PaddleTensor &tensor) {
static std::string DescribeTensor(const PaddleTensor &tensor) {
std::stringstream os;
os << "Tensor [" << tensor.name << "]\n";
os << " - type: ";
......@@ -113,8 +117,7 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
os << "\n";
os << " - data: ";
int dim = std::accumulate(tensor.shape.begin(), tensor.shape.end(), 1,
[](int a, int b) { return a * b; });
int dim = VecReduceToInt(tensor.shape);
for (int i = 0; i < dim; i++) {
os << static_cast<float *>(tensor.data.data())[i] << " ";
}
......@@ -122,8 +125,8 @@ std::string DescribeTensor(const PaddleTensor &tensor) {
return os.str();
}
void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double latency, int epoch = 1) {
static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double latency, int epoch = 1) {
LOG(INFO) << "====== batch_size: " << batch_size << ", repeat: " << repeat
<< ", threads: " << num_threads << ", thread id: " << tid
<< ", latency: " << latency << "ms ======";
......
......@@ -47,11 +47,8 @@ void CompareResult(const std::vector<PaddleTensor> &outputs,
for (size_t i = 0; i < outputs.size(); i++) {
auto &out = outputs[i];
auto &ref_out = ref_outputs[i];
size_t size = std::accumulate(out.shape.begin(), out.shape.end(), 1,
[](int a, int b) { return a * b; });
size_t ref_size =
std::accumulate(ref_out.shape.begin(), ref_out.shape.end(), 1,
[](int a, int b) { return a * b; });
size_t size = VecReduceToInt(out.shape);
size_t ref_size = VecReduceToInt(ref_out.shape);
EXPECT_GT(size, 0);
EXPECT_EQ(size, ref_size);
EXPECT_EQ(out.dtype, ref_out.dtype);
......@@ -87,10 +84,7 @@ std::unique_ptr<PaddlePredictor> CreateTestPredictor(
}
}
size_t GetSize(const PaddleTensor &out) {
return std::accumulate(out.shape.begin(), out.shape.end(), 1,
[](int a, int b) { return a * b; });
}
size_t GetSize(const PaddleTensor &out) { return VecReduceToInt(out.shape); }
std::unordered_map<std::string, int> GetFuseStatis(AnalysisConfig config,
int *num_ops) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册