Clear no persistable tensor array before predicting, fix crash when predicting...

Clear no persistable tensor array before predicting, fix crash when predicting with gpu debugging mode (#1548) * Clear no persistable tensor array before predicting, fix crash when predicting with gpu debugging mode * Fix code style

Clear no persistable tensor array before predicting, fix crash when predicting...
Clear no persistable tensor array before predicting, fix crash when predicting with gpu debugging mode (#1548) * Clear no persistable tensor array before predicting, fix crash when predicting with gpu debugging mode * Fix code style
04c139b9 · Houjiang Chen · GitHub · 71c8718b · 04c139b9 · 04c139b9
隐藏空白更改
内联并排

Showing with 40 addition and 26 deletion

src/common/log.h src/common/log.h +12 -4

src/framework/executor.cpp src/framework/executor.cpp +18 -0

src/framework/operator.cpp src/framework/operator.cpp +10 -22

未找到文件。
--- a/src/common/log.h
+++ b/src/common/log.h
@@ -36,16 +36,16 @@ static const char *ANDROID_LOG_TAG =
 #define ANDROIDLOGI(...)                                               \
  __android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \
-  printf(__VA_ARGS__)
+  printf("%s\n", __VA_ARGS__);
 #define ANDROIDLOGW(...)                                                  \
  __android_log_print(ANDROID_LOG_WARNING, ANDROID_LOG_TAG, __VA_ARGS__); \
-  printf(__VA_ARGS__)
+  printf("%s\n", __VA_ARGS__);
 #define ANDROIDLOGD(...)                                                \
  __android_log_print(ANDROID_LOG_DEBUG, ANDROID_LOG_TAG, __VA_ARGS__); \
-  printf(__VA_ARGS__)
+  printf("%s\n", __VA_ARGS__)
 #define ANDROIDLOGE(...)                                                \
  __android_log_print(ANDROID_LOG_ERROR, ANDROID_LOG_TAG, __VA_ARGS__); \
-  printf(__VA_ARGS__)
+  printf("%s\n", __VA_ARGS__)
 #else
 #define ANDROIDLOGI(...)
 #define ANDROIDLOGW(...)
@@ -88,9 +88,17 @@ struct Print {
  void print(LogLevel level) {
    // buffer_ << std::endl;
    if (level == kLOG_ERROR) {
+#ifdef ANDROID
+      ANDROIDLOGE(buffer_.str().c_str());
+#else
      std::cerr << buffer_.str() << std::endl;
+#endif
    } else {
+#ifdef ANDROID
+      ANDROIDLOGI(buffer_.str().c_str());
+#else
      std::cout << buffer_.str() << std::endl;
+#endif
    }
  }
  std::ostringstream buffer_;

--- a/src/framework/executor.cpp
+++ b/src/framework/executor.cpp
@@ -228,6 +228,20 @@ void Executor<Device, T>::InitMemory() {
  }
 }
+static void ClearNoPersistableTensorArray(const framework::ProgramDesc *program,
+                                          framework::Scope *scope) {
+  for (const auto &block : program->Blocks()) {
+    for (const auto &var_desc : block->Vars()) {
+      if (!var_desc->Persistable() &&
+          var_desc->Type() == VARTYPE_TYPE_STEP_LOD_TENSOR_ARRAY) {
+        auto var = scope->Var(var_desc->Name());
+        auto array = var->template GetMutable<framework::LoDTensorArray>();
+        array->resize(1);
+      }
+    }
+  }
+}
 template <typename Device, typename T>
 void Executor<Device, T>::InitCombineMemory() {
  char *origin_data = nullptr;
@@ -421,6 +435,10 @@ PMStatus Executor<Device, T>::Predict() {
 #if _OPENMP
  omp_set_num_threads(get_global_num_threads());
 #endif
+  // clear all no persistable tensor array since write_to_array
+  // is always push back a new tensor in the array
+  ClearNoPersistableTensorArray(program_desc_.get(), program_.scope.get());
 #ifdef PADDLE_MOBILE_PROFILE
  std::vector<ProfInfo> profile(ops_of_block0_.size());
  struct timespec ts;

--- a/src/framework/operator.cpp
+++ b/src/framework/operator.cpp
@@ -102,16 +102,11 @@ void OperatorBase<GPU_CL>::Run() {
  for (const auto key : input_keys) {
    auto var_vec_in = inputs_.at(key);
    for (int i = 0; i < var_vec_in.size(); ++i) {
-      auto vari = scope_->FindVar(var_vec_in[i]);
+      auto var = scope_->FindVar(var_vec_in[i]);
-      if (vari->IsInitialized()) {
+      if (var->IsInitialized() && var->template IsType<framework::CLImage>()) {
-        if (type_ == "feed") {
+        const CLImage *cl_image = var->template Get<framework::CLImage>();
-          const Tensor *tensor = vari->template Get<framework::LoDTensor>();
+        if (cl_image) {
-          if (tensor) DLOG << type_ << " input- " << key << "=" << *tensor;
+          DLOG << type_ << " input- " << key << "=" << *cl_image;
-        } else {
-          const CLImage *cl_image = vari->template Get<framework::CLImage>();
-          if (cl_image) {
-            DLOG << type_ << " input- " << key << "=" << *cl_image;
-          }
        }
      }
    }
@@ -119,18 +114,11 @@ void OperatorBase<GPU_CL>::Run() {
  for (const auto key : GetOutKeys()) {
    auto var_vec_out = outputs_.at(key);
    for (int i = 0; i < var_vec_out.size(); ++i) {
-      auto vari = scope_->FindVar(var_vec_out[i]);
+      auto var = scope_->FindVar(var_vec_out[i]);
-      if (vari->IsInitialized()) {
+      if (var->IsInitialized() && var->template IsType<framework::CLImage>()) {
-        if (type_ == "fetch") {
+        const CLImage *cl_image = var->template Get<framework::CLImage>();
-          const Tensor *tensor = vari->template Get<framework::LoDTensor>();
+        if (cl_image) {
-          if (tensor) {
+          DLOG << type_ << " output- " << key << "=" << *cl_image;
-            DLOG << type_ << " output- " << key << "=" << *tensor;
-          }
-        } else {
-          const CLImage *cl_image = vari->template Get<framework::CLImage>();
-          if (cl_image) {
-            DLOG << type_ << " output- " << key << "=" << *cl_image;
-          }
        }
      }
    }