Merge branch 'develop' into develop

74996f7c · Zhang Jun · GitHub · 4f98717c · 2ef61b63 · 74996f7c
7 changed file
--- a/README.md
+++ b/README.md
@@ -259,7 +259,7 @@ output
 ### Developers
 - [How to deploy Paddle Serving on K8S?(Chinese)](doc/PADDLE_SERVING_ON_KUBERNETES.md)
- [How to route Paddle Serving to secure endpoint?(Chinese)](doc/SERVIING_AUTH_DOCKER.md)
+- [How to route Paddle Serving to secure endpoint?(Chinese)](doc/SERVING_AUTH_DOCKER.md)
 - [How to develop a new Web Service?](doc/NEW_WEB_SERVICE.md)
 - [Compile from source code](doc/COMPILE.md)
 - [Develop Pipeline Serving](doc/PIPELINE_SERVING.md)

--- a/README_CN.md
+++ b/README_CN.md
@@ -262,7 +262,7 @@ python3 pipeline_rpc_client.py
 - [如何编译PaddleServing?](doc/COMPILE_CN.md)
 - [如何开发Pipeline?](doc/PIPELINE_SERVING_CN.md)
 - [如何在K8S集群上部署Paddle Serving?](doc/PADDLE_SERVING_ON_KUBERNETES.md)
- [如何在Paddle Serving上部署安全网关?](doc/SERVIING_AUTH_DOCKER.md)
+- [如何在Paddle Serving上部署安全网关?](doc/SERVING_AUTH_DOCKER.md)
 - [如何开发Pipeline?](doc/PIPELINE_SERVING_CN.md)
 - [如何使用uWSGI部署Web Service](doc/UWSGI_DEPLOY_CN.md)
 - [如何实现模型文件热加载](doc/HOT_LOADING_IN_SERVING_CN.md)

--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -18,7 +18,6 @@
 #include "core/sdk-cpp/include/common.h"
 #include "core/sdk-cpp/include/predictor_sdk.h"
 #include "core/util/include/timer.h"
 DEFINE_bool(profile_client, false, "");
 DEFINE_bool(profile_server, false, "");
@@ -46,7 +45,7 @@ void PredictorClient::init_gflags(std::vector<std::string> argv) {
    int argc = argv.size();
    char **arr = new char *[argv.size()];
    std::string line;
-    for (size_t i = 0; i < argv.size(); i++) {
+    for (size_t i = 0; i < argv.size(); ++i) {
      arr[i] = &argv[i][0];
      line += argv[i];
      line += ' ';
@@ -189,7 +188,6 @@ int PredictorClient::numpy_predict(
  }
  int vec_idx = 0;
  for (int bi = 0; bi < batch_size; bi++) {
    VLOG(2) << "prepare batch " << bi;
    std::vector<Tensor *> tensor_vec;
@@ -220,11 +218,10 @@ int PredictorClient::numpy_predict(
        return -1;
      }
      int nbytes = float_feed[vec_idx].nbytes();
-      // int ndims = float_feed[vec_idx].ndim();
+      void *rawdata_ptr = (void *)(float_feed[vec_idx].data(0));
-      void *rawdata_ptr = (void *)float_feed[vec_idx].data(0);
      int total_number = float_feed[vec_idx].size();
-      // float* end_ptr = (rawdata_ptr + total_number);
      Tensor *tensor = tensor_vec[idx];
      VLOG(2) << "prepare float feed " << name << " shape size "
              << float_shape[vec_idx].size();
      for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
@@ -234,6 +231,7 @@ int PredictorClient::numpy_predict(
        tensor->add_lod(float_lod_slot_batch[vec_idx][j]);
      }
      tensor->set_elem_type(P_FLOAT32);
      tensor->mutable_float_data()->Resize(total_number, 0);
      memcpy(tensor->mutable_float_data()->mutable_data(), rawdata_ptr, nbytes);
      vec_idx++;
@@ -251,7 +249,7 @@ int PredictorClient::numpy_predict(
      }
      Tensor *tensor = tensor_vec[idx];
      int nbytes = int_feed[vec_idx].nbytes();
-      void *rawdata_ptr = (void *)int_feed[vec_idx].data(0);
+      void *rawdata_ptr = (void *)(int_feed[vec_idx].data(0));
      int total_number = int_feed[vec_idx].size();
      for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
@@ -263,19 +261,14 @@ int PredictorClient::numpy_predict(
      tensor->set_elem_type(_type[idx]);
      if (_type[idx] == P_INT64) {
-        VLOG(2) << "prepare int feed " << name << " shape size "
-                << int_shape[vec_idx].size();
        tensor->mutable_int64_data()->Resize(total_number, 0);
        memcpy(
            tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
-        vec_idx++;
      } else {
-        VLOG(2) << "prepare int32 feed " << name << " shape size "
-                << int_shape[vec_idx].size();
        tensor->mutable_int_data()->Resize(total_number, 0);
        memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
-        vec_idx++;
      }
+      vec_idx++;
    }
    VLOG(2) << "batch [" << bi << "] "

--- a/doc/DOCKER_IMAGES.md
+++ b/doc/DOCKER_IMAGES.md
@@ -29,10 +29,12 @@ You can get images in two ways:
 Runtime images cannot be used for compilation.
 If you want to customize your Serving based on source code, use the version with the suffix - devel.
+**cuda10.1-cudnn7-gcc54 image is not ready, you should run from dockerfile if you need it.**
 |                         Description                          |   OS    |             TAG              |                          Dockerfile                          |
 | :----------------------------------------------------------: | :-----: | :--------------------------: | :----------------------------------------------------------: |
 |                       CPU development                        | Ubuntu16 |         latest-devel         |        [Dockerfile.devel](../tools/Dockerfile.devel)         |
-|              GPU (cuda10.1-cudnn7-tensorRT6-gcc54) development               | Ubuntu16 | latest-cuda10.1-cudnn7-gcc54-devel | [Dockerfile.cuda10.1-cudnn7-gcc54.devel](../tools/Dockerfile.cuda10.1-cudnn7-gcc54.devel) |
+|              GPU (cuda10.1-cudnn7-tensorRT6-gcc54) development               | Ubuntu16 | latest-cuda10.1-cudnn7-gcc54-devel(not ready) | [Dockerfile.cuda10.1-cudnn7-gcc54.devel](../tools/Dockerfile.cuda10.1-cudnn7-gcc54.devel) |
 |              GPU (cuda10.1-cudnn7-tensorRT6) development               | Ubuntu16 | latest-cuda10.1-cudnn7-devel | [Dockerfile.cuda10.1-cudnn7.devel](../tools/Dockerfile.cuda10.1-cudnn7.devel) |
 |              GPU (cuda10.2-cudnn8-tensorRT7) development               | Ubuntu16 | latest-cuda10.2-cudnn8-devel | [Dockerfile.cuda10.2-cudnn8.devel](../tools/Dockerfile.cuda10.2-cudnn8.devel) |
 |              GPU (cuda11-cudnn8-tensorRT7) development               | Ubuntu18 | latest-cuda11-cudnn8-devel | [Dockerfile.cuda11-cudnn8.devel](../tools/Dockerfile.cuda11-cudnn8.devel) |
@@ -65,7 +67,7 @@ Develop Images:
 |    CPU   | >=0.5.0 | 0.6.0-devel                 | Ubuntu 16 |  8.2.0       |
 |          | <=0.4.0 | 0.4.0-devel                  | CentOS 7  | 4.8.5       |
 | Cuda10.1 | >=0.5.0 | 0.6.0-cuda10.1-cudnn7-devel  | Ubuntu 16 |   8.2.0       |
-|          | 0.6.0   | 0.6.0-cuda10.1-cudnn7-gcc54-devel  | Ubuntu 16 |  5.4.0 |
+|          | 0.6.0   | 0.6.0-cuda10.1-cudnn7-gcc54-devel(not ready)  | Ubuntu 16 |  5.4.0 |
 |          | <=0.4.0 | 0.6.0-cuda10.1-cudnn7-devel    | CentOS 7  | 4.8.5     |
 | Cuda10.2 | >=0.5.0 | 0.6.0-cuda10.2-cudnn8-devel  | Ubuntu 16 |   8.2.0       |
 |          | <=0.4.0 | Nan                          | Nan       | Nan         |

--- a/doc/DOCKER_IMAGES_CN.md
+++ b/doc/DOCKER_IMAGES_CN.md
@@ -31,11 +31,12 @@
 若需要基于源代码二次开发编译，请使用后缀为-devel的版本。
 **在TAG列，latest也可以替换成对应的版本号，例如0.5.0/0.4.1等，但需要注意的是，部分开发环境随着某个版本迭代才增加，因此并非所有环境都有对应的版本号可以使用。**
+**cuda10.1-cudnn7-gcc54环境尚未同步到镜像仓库，如果您需要相关镜像请运行相关dockerfile**
 |                         镜像选择                         |   操作系统    |             TAG              |                          Dockerfile                          |
 | :----------------------------------------------------------: | :-----: | :--------------------------: | :----------------------------------------------------------: |
 |                       CPU development                        | Ubuntu16 |         latest-devel         |        [Dockerfile.devel](../tools/Dockerfile.devel)         |
-|              GPU (cuda10.1-cudnn7-tensorRT6-gcc54) development               | Ubuntu16 | latest-cuda10.1-cudnn7-gcc54-devel | [Dockerfile.cuda10.1-cudnn7-gcc54.devel](../tools/Dockerfile.cuda10.1-cudnn7-gcc54.devel) |
+|              GPU (cuda10.1-cudnn7-tensorRT6-gcc54) development               | Ubuntu16 | latest-cuda10.1-cudnn7-gcc54-devel (not ready) | [Dockerfile.cuda10.1-cudnn7-gcc54.devel](../tools/Dockerfile.cuda10.1-cudnn7-gcc54.devel) |
 |              GPU (cuda10.1-cudnn7-tensorRT6) development               | Ubuntu16 | latest-cuda10.1-cudnn7-devel | [Dockerfile.cuda10.1-cudnn7.devel](../tools/Dockerfile.cuda10.1-cudnn7.devel) |
 |              GPU (cuda10.2-cudnn8-tensorRT7) development               | Ubuntu16 | latest-cuda10.2-cudnn8-devel | [Dockerfile.cuda10.2-cudnn8.devel](../tools/Dockerfile.cuda10.2-cudnn8.devel) |
 |              GPU (cuda11-cudnn8-tensorRT7) development               | Ubuntu18 | latest-cuda11-cudnn8-devel | [Dockerfile.cuda11-cudnn8.devel](../tools/Dockerfile.cuda11-cudnn8.devel) |
@@ -71,7 +72,7 @@ registry.baidubce.com/paddlepaddle/serving:xpu-x86 # for x86 xpu user
 |    CPU   | >=0.5.0 | 0.6.0-devel                 | Ubuntu 16 |  8.2.0       |
 |          | <=0.4.0 | 0.4.0-devel                  | CentOS 7  | 4.8.5       |
 | Cuda10.1 | >=0.5.0 | 0.6.0-cuda10.1-cudnn7-devel  | Ubuntu 16 |   8.2.0       |
-|          | 0.6.0   | 0.6.0-cuda10.1-cudnn7-gcc54-devel  | Ubuntu 16 |  5.4.0 |
+|          | 0.6.0   | 0.6.0-cuda10.1-cudnn7-gcc54-devel (not ready)  | Ubuntu 16 |  5.4.0 |
 |          | <=0.4.0 | 0.6.0-cuda10.1-cudnn7-devel    | CentOS 7  | 4.8.5     |
 | Cuda10.2 | >=0.5.0 | 0.6.0-cuda10.2-cudnn8-devel  | Ubuntu 16 |   8.2.0       |
 |          | <=0.4.0 | Nan                          | Nan       | Nan         |

--- a/python/paddle_serving_client/client.py
+++ b/python/paddle_serving_client/client.py
@@ -356,7 +356,8 @@ class Client(object):
                        int_feed_names.append(key)
                        shape_lst = []
                        if batch == False:
-                            feed_i[key] = feed_i[key][np.newaxis, :]
+                            feed_i[key] = np.expand_dims(feed_i[key], 0).repeat(
+                                1, axis=0)
                        if isinstance(feed_i[key], np.ndarray):
                            shape_lst.extend(list(feed_i[key].shape))
                            int_shape.append(shape_lst)
@@ -369,10 +370,10 @@ class Client(object):
                            int_lod_slot_batch.append([])
                    if isinstance(feed_i[key], np.ndarray):
-                        int_slot.append(feed_i[key])
+                        int_slot.append(np.ascontiguousarray(feed_i[key]))
                        self.has_numpy_input = True
                    else:
-                        int_slot.append(feed_i[key])
+                        int_slot.append(np.ascontiguousarray(feed_i[key]))
                        self.all_numpy_input = False
                elif self.feed_types_[key] in float_type:
@@ -380,7 +381,8 @@ class Client(object):
                        float_feed_names.append(key)
                        shape_lst = []
                        if batch == False:
-                            feed_i[key] = feed_i[key][np.newaxis, :]
+                            feed_i[key] = np.expand_dims(feed_i[key], 0).repeat(
+                                1, axis=0)
                        if isinstance(feed_i[key], np.ndarray):
                            shape_lst.extend(list(feed_i[key].shape))
                            float_shape.append(shape_lst)
@@ -393,10 +395,10 @@ class Client(object):
                            float_lod_slot_batch.append([])
                    if isinstance(feed_i[key], np.ndarray):
-                        float_slot.append(feed_i[key])
+                        float_slot.append(np.ascontiguousarray(feed_i[key]))
                        self.has_numpy_input = True
                    else:
-                        float_slot.append(feed_i[key])
+                        float_slot.append(np.ascontiguousarray(feed_i[key]))
                        self.all_numpy_input = False
                #if input is string, feed is not numpy.
                elif self.feed_types_[key] in string_type:
@@ -408,7 +410,7 @@ class Client(object):
                                key)])
                        else:
                            string_lod_slot_batch.append([])
-                    string_slot.append(feed_i[key])
+                    string_slot.append(np.ascontiguousarray(feed_i[key]))
                    self.has_numpy_input = True
            int_slot_batch.append(int_slot)
            int_lod_slot_batch.append(int_lod_slot)
@@ -626,6 +628,7 @@ class MultiLangClient(object):
                        raise Exception("error tensor value type.")
                else:
                    raise Exception("var must be list or ndarray.")
+                data = np.ascontiguousarray(data)
                tensor.data = data.tobytes()
            tensor.shape.extend(list(var.shape))
            if "{}.lod".format(name) in feed.keys():
@@ -700,7 +703,7 @@ class MultiLangClient(object):
        if batch is False:
            for key in feed:
                if ".lod" not in key:
-                    feed[key] = feed[key][np.newaxis, :]
+                    feed[key] = np.expand_dims(feed[key], 0).repeat(1, axis=0)
        if not asyn:
            try:
                self.profile_.record('py_prepro_0')

--- a/python/paddle_serving_server/rpc_service.py
+++ b/python/paddle_serving_server/rpc_service.py
@@ -126,7 +126,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
                    else:
                        raise Exception("error type.")
                data.shape = list(feed_inst.tensor_array[idx].shape)
-                feed_dict[name] = data
+                feed_dict[name] = np.ascontiguousarray(data)
                if len(var.lod) > 0:
                    feed_dict["{}.lod".format(name)] = var.lod
            feed_batch.append(feed_dict)