Merge branch 'develop' of https://github.com/PaddlePaddle/Serving into pyserving

c8288f85 · barrierye · dd458c19 · 65fc3e00 · c8288f85 · c8288f85
18 changed file
--- a/README.md
+++ b/README.md
@@ -184,6 +184,12 @@ Here, `client.predict` function has two arguments. `feed` is a `python dict` wit
 <h2 align="center">Community</h2>
+### User Group in China
+<p align="center"><img width="200" height="300" margin="500" src="./doc/qq.jpeg"/>&#8194;&#8194;&#8194;&#8194;&#8194<img width="200" height="300"  src="doc/wechat.jpeg"/></p>
+<p align="center">PaddleServing交流QQ群&#8194;&#8194;&#8194;&#8194;&#8194;&#8194;&#8194;&#8194;&#8194;&#8194;&#8194;&#8194;&#8194;&#8194;&#8194;PaddleServing微信群</p>    
 ### Slack
 To connect with other users and contributors, welcome to join our [Slack channel](https://paddleserving.slack.com/archives/CUBPKHKMJ)

--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -285,22 +285,16 @@ int PredictorClient::batch_predict(
        // int idx = _fetch_name_to_idx[name];
        if (_fetch_name_to_type[name] == 0) {
          VLOG(2) << "ferch var " << name << "type int";
-          model._int64_value_map[name].resize(
-              output.insts(0).tensor_array(idx).int64_data_size());
          int size = output.insts(0).tensor_array(idx).int64_data_size();
-          for (int i = 0; i < size; ++i) {
+          model._int64_value_map[name] = std::vector<int64_t>(
-            model._int64_value_map[name][i] =
+              output.insts(0).tensor_array(idx).int64_data().begin(),
-                output.insts(0).tensor_array(idx).int64_data(i);
+              output.insts(0).tensor_array(idx).int64_data().begin() + size);
-          }
        } else {
          VLOG(2) << "fetch var " << name << "type float";
-          model._float_value_map[name].resize(
-              output.insts(0).tensor_array(idx).float_data_size());
          int size = output.insts(0).tensor_array(idx).float_data_size();
-          for (int i = 0; i < size; ++i) {
+          model._float_value_map[name] = std::vector<float>(
-            model._float_value_map[name][i] =
+              output.insts(0).tensor_array(idx).float_data().begin(),
-                output.insts(0).tensor_array(idx).float_data(i);
+              output.insts(0).tensor_array(idx).float_data().begin() + size);
-          }
        }
        idx += 1;
      }
@@ -564,22 +558,16 @@ int PredictorClient::numpy_predict(
        // int idx = _fetch_name_to_idx[name];
        if (_fetch_name_to_type[name] == 0) {
          VLOG(2) << "ferch var " << name << "type int";
-          model._int64_value_map[name].resize(
-              output.insts(0).tensor_array(idx).int64_data_size());
          int size = output.insts(0).tensor_array(idx).int64_data_size();
-          for (int i = 0; i < size; ++i) {
+          model._int64_value_map[name] = std::vector<int64_t>(
-            model._int64_value_map[name][i] =
+              output.insts(0).tensor_array(idx).int64_data().begin(),
-                output.insts(0).tensor_array(idx).int64_data(i);
+              output.insts(0).tensor_array(idx).int64_data().begin() + size);
-          }
        } else {
          VLOG(2) << "fetch var " << name << "type float";
-          model._float_value_map[name].resize(
-              output.insts(0).tensor_array(idx).float_data_size());
          int size = output.insts(0).tensor_array(idx).float_data_size();
-          for (int i = 0; i < size; ++i) {
+          model._float_value_map[name] = std::vector<float>(
-            model._float_value_map[name][i] =
+              output.insts(0).tensor_array(idx).float_data().begin(),
-                output.insts(0).tensor_array(idx).float_data(i);
+              output.insts(0).tensor_array(idx).float_data().begin() + size);
-          }
        }
        idx += 1;
      }

--- a/core/predictor/tools/seq_generator.cpp
+++ b/core/predictor/tools/seq_generator.cpp
@@ -12,13 +12,23 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
+#include <sys/time.h>
 #include <fstream>
 #include <iostream>
 #include <memory>
+#include <thread>
 #include "core/predictor/framework.pb.h"
 #include "quant.h"
 #include "seq_file.h"
+inline uint64_t time_diff(const struct timeval &start_time,
+                          const struct timeval &end_time) {
+  return (end_time.tv_sec - start_time.tv_sec) * 1000000 +
+         (end_time.tv_usec - start_time.tv_usec);
+}
 using paddle::framework::proto::VarType;
 std::map<int, size_t> var_type_size;
 void reg_var_types() {
@@ -100,8 +110,8 @@ int dump_parameter(const char *input_file, const char *output_file) {
  char *value_buf = new char[value_buf_len];
  size_t offset = 0;
  for (int64_t i = 0; i < dims[0]; ++i) {
-    // std::cout << "key_len " << key_len << " value_len " << value_buf_len <<
+    // std::cout << "key_len " << key_len << " value_len " << value_buf_len
-    // std::endl;
+    // << std::endl;
    memcpy(value_buf, tensor_buf + offset, value_buf_len);
    seq_file_writer.write((char *)&i, sizeof(i), value_buf, value_buf_len);
    offset += value_buf_len;
@@ -109,14 +119,14 @@ int dump_parameter(const char *input_file, const char *output_file) {
  return 0;
 }
-int compress_parameter(const char *file1, const char *file2, int bits) {
+float *read_embedding_table(const char *file1, std::vector<int64_t> &dims) {
  std::ifstream is(file1);
  // Step 1: is read version, os write version
  uint32_t version;
  is.read(reinterpret_cast<char *>(&version), sizeof(version));
  if (version != 0) {
    std::cout << "Version number " << version << " not supported" << std::endl;
-    return -1;
+    return NULL;
  }
  std::cout << "Version size: " << sizeof(version) << std::endl;
  // Step 2: is read LoD level, os write LoD level
@@ -138,7 +148,7 @@ int compress_parameter(const char *file1, const char *file2, int bits) {
  is.read(reinterpret_cast<char *>(&version), sizeof(version));
  if (version != 0) {
    std::cout << "Version number " << version << " not supported" << std::endl;
-    return -1;
+    return NULL;
  }
  // Step 4: is read Tensor Data, os write  min/max/quant data
@@ -149,10 +159,10 @@ int compress_parameter(const char *file1, const char *file2, int bits) {
  is.read(reinterpret_cast<char *>(buf.get()), size);
  if (!desc.ParseFromArray(buf.get(), size)) {
    std::cout << "Cannot parse tensor desc" << std::endl;
-    return -1;
+    return NULL;
  }
  // read tensor
-  std::vector<int64_t> dims;
+  // std::vector<int64_t> dims;
  dims.reserve(static_cast<size_t>(desc.dims().size()));
  std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims));
@@ -164,7 +174,7 @@ int compress_parameter(const char *file1, const char *file2, int bits) {
  if (dims.size() != 2) {
    std::cout << "Parameter dims not 2D" << std::endl;
-    return -1;
+    return NULL;
  }
  size_t numel = 1;
@@ -176,47 +186,96 @@ int compress_parameter(const char *file1, const char *file2, int bits) {
  char *tensor_buf = new char[buf_size];
  is.read(static_cast<char *>(tensor_buf), buf_size);
  float *tensor_float_buf = reinterpret_cast<float *>(tensor_buf);
-  size_t per_line_size = dims[1] * 1 + 2 * sizeof(float);
+  return tensor_float_buf;
-  char *tensor_out = new char[per_line_size * dims[0]];
+}
-  float loss = 0;
+int compress_parameter_parallel(const char *file1,
-  float all_loss = 0;
+                                const char *file2,
+                                int bits,
+                                int n_threads) {
+#define MIN_THREADS (1)
+#define MAX_THREADS (80)
+  std::vector<int64_t> dims;
+  float *emb_table = read_embedding_table(file1, dims);
+  if (emb_table == NULL || dims.size() != 2) {
+    return -1;
+  }
+  // int64_t dict_size = dims[0]/100000000;
+  int64_t dict_size = dims[0];
+  int64_t emb_size = dims[1];
+  size_t per_line_size = emb_size * 1 + 2 * sizeof(float);
+  n_threads = std::min(std::max(MIN_THREADS, n_threads), MAX_THREADS);
+  int64_t step = dict_size / n_threads;
+  std::vector<char *> result;
+  result.reserve(dict_size + 1);
+  double pow2bits = pow(2, bits);
  std::cout << "Start Quant" << std::endl;
-  SeqFileWriter seq_file_writer(file2);
+  std::vector<std::thread> threads;
+  for (int i = 0; i < n_threads + 1; ++i) {
-  size_t offset = 0;
+    threads.push_back(std::thread([=, &result]() {
+      int64_t start = i * step;
-  for (int64_t i = 0; i < dims[0]; ++i) {
+      int64_t end = (i + 1) * step;
+      if (i == n_threads) {
+        if (start == dict_size) {
+          return;
+        }
+        end = dict_size;
+      }
+      printf("THREAD[%d], index [%ld, %ld), start Quant table...\n",
+             i,
+             start,
+             end);
+      struct timeval quant_start;
+      gettimeofday(&(quant_start), NULL);
+      for (int64_t k = start; k < end; ++k) {
        float xmin = 0, xmax = 0, loss = 0;
-    size_t scale = dims[1];
        char *tensor_temp = new char[per_line_size];
        greedy_search(
-        tensor_float_buf + i * dims[1], xmin, xmax, loss, scale, bits);
+            emb_table + k * emb_size, xmin, xmax, loss, emb_size, bits);
-    for (size_t e = 0; e < dims[1]; ++e) {
+        // 得出 loss 最小的时候的 scale
-      float x = *(tensor_float_buf + i * dims[1] + e);
+        float scale = (xmax - xmin) * (pow2bits - 1);
-      int val = round((x - xmin) / (xmax - xmin) * (pow(2, bits) - 1));
-      val = std::max(0, val);
-      val = std::min((int)pow(2, bits) - 1, val);
        char *min_ptr = tensor_temp;
        char *max_ptr = tensor_temp + sizeof(float);
        memcpy(min_ptr, &xmin, sizeof(float));
        memcpy(max_ptr, &xmax, sizeof(float));
+        for (size_t e = 0; e < emb_size; ++e) {
+          float x = *(emb_table + k * emb_size + e);
+          int val = round((x - xmin) / scale);
+          val = std::max(0, val);
+          val = std::min((int)pow2bits - 1, val);
          *(tensor_temp + 2 * sizeof(float) + e) = val;
-      float unit = (xmax - xmin) / pow(2, bits);
-      float trans_val = unit * val + xmin;
        }
-    seq_file_writer.write((char *)&i, sizeof(i), tensor_temp, per_line_size);
+        result[k] = tensor_temp;
+        if ((k - start) % 10000 == 0) {
+          printf("THREAD[%d], handle line: %ld\n", i, k - start);
+        }
+      }
+      struct timeval quant_end;
+      gettimeofday(&(quant_end), NULL);
+      printf("THREAD[%d], Quantization finished, cost: %lu us!!!\n",
+             i,
+             time_diff(quant_start, quant_end));
+    }));
+  }
+  for (auto &thread : threads) {
+    thread.join();
+  }
+  SeqFileWriter seq_file_writer(file2);
+  for (int64_t i = 0; i < dict_size; i++) {
+    seq_file_writer.write((char *)&i, sizeof(i), result[i], per_line_size);
  }
  return 0;
 }
 int main(int argc, char **argv) {
-  if (argc < 3 || argc > 4) {
+  if (argc < 3 || argc > 5) {
-    std::cout << "Usage: if no compress, please follow:" << std::endl;
+    std::cout << "Usage:" << std::endl;
-    std::cout << "seq_generator PARAMETER_FILE OUTPUT_FILE\n" << std::endl;
+    std::cout << "if no compress, please follow:" << std::endl;
+    std::cout << "  seq_generator PARAMETER_FILE OUTPUT_FILE\n" << std::endl;
    std::cout << "if compress, please follow: " << std::endl;
-    std::cout << "seq_generator PARAMETER_FILE OUTPUT_FILE QUANT_BITS"
+    std::cout << "  seq_generator PARAMETER_FILE OUTPUT_FILE QUANT_BITS "
+                 "[N_THREADS]"
              << std::endl;
-    std::cout << "Now it only support 8 bit." << std::endl;
+    std::cout << "  Now it only support 8 bit." << std::endl;
    return -1;
  }
  reg_var_types();
@@ -227,7 +286,13 @@ int main(int argc, char **argv) {
  }
  if (argc == 4) {
    std::cout << "generate compressed sparse param sequence file" << std::endl;
-    compress_parameter(argv[1], argv[2], atoi(argv[3]));
+    compress_parameter_parallel(argv[1], argv[2], atoi(argv[3]), 1);
+    return 0;
+  }
+  if (argc == 5) {
+    std::cout << "parallel generate compressed sparse param sequence file"
+              << std::endl;
+    compress_parameter_parallel(argv[1], argv[2], atoi(argv[3]), atoi(argv[4]));
    return 0;
  }
 }

--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -43,7 +43,7 @@ In the default centos7 image we provide, the Python path is `/usr/bin/python`. I
 ### Integrated CPU version paddle inference library
 ``` shell
-mkdir build && cd build
+mkdir server-build-cpu && cd server-build-cpu
 cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
 make -j10
 ```
@@ -53,7 +53,7 @@ you can execute `make install` to put targets under directory `./output`, you ne
 ### Integrated GPU version paddle inference library
 ``` shell
-mkdir build && cd build
+mkdir server-build-gpu && cd server-build-gpu
 cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON -DWITH_GPU=ON ..
 make -j10
 ```
@@ -65,7 +65,7 @@ execute `make install` to put targets under directory `./output`
 ## Compile Client
 ``` shell
-mkdir build && cd build
+mkdir client-build && cd client-build
 cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT=ON ..
 make -j10
 ```
@@ -75,7 +75,7 @@ execute `make install` to put targets under directory `./output`
 ## Compile the App
 ```bash
-mkdir build && cd build
+mkdir app-build && cd app-build
 cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DAPP=ON ..
 make
 ```

--- a/doc/COMPILE_CN.md
+++ b/doc/COMPILE_CN.md
@@ -43,7 +43,7 @@ export PYTHONROOT=/usr/
 ### 集成CPU版本Paddle Inference Library
 ``` shell
-mkdir build && cd build
+mkdir server-build-cpu && cd server-build-cpu
 cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
 make -j10
 ```
@@ -53,7 +53,7 @@ make -j10
 ### 集成GPU版本Paddle Inference Library
 ``` shell
-mkdir build && cd build
+mkdir server-build-gpu && cd server-build-gpu
 cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON -DWITH_GPU=ON ..
 make -j10
 ```
@@ -65,7 +65,7 @@ make -j10
 ## 编译Client部分
 ``` shell
-mkdir build && cd build
+mkdir client-build && cd client-build
 cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT=ON ..
 make -j10
 ```
@@ -75,7 +75,7 @@ make -j10
 ## 编译App部分
 ```bash
-mkdir build && cd build
+mkdir app-build && cd app-build
 cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCMAKE_INSTALL_PREFIX=./output -DAPP=ON ..
 make
 ```

--- a/doc/LATEST_PACKAGES.md
+++ b/doc/LATEST_PACKAGES.md
@@ -3,45 +3,45 @@
 ## CPU server
 ### Python 3
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.0-py3-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.1-py3-none-any.whl
 ```
 ### Python 2
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.0-py2-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.1-py2-none-any.whl
 ```
 ## GPU server
 ### Python 3
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.0-py3-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.1-py3-none-any.whl
 ```
 ### Python 2
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.0-py2-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.1-py2-none-any.whl
 ```
 ## Client
 ### Python 3.7
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp37-none-manylinux1_x86_64.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.1-cp37-none-any.whl
 ```
 ### Python 3.6
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp36-none-manylinux1_x86_64.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.1-cp36-none-any.whl
 ```
 ### Python 2.7
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp27-none-manylinux1_x86_64.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.1-cp27-none-any.whl
 ```
 ## App
 ### Python 3
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.0-py3-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.1-py3-none-any.whl
 ```
 ### Python 2
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.0-py2-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.1-py2-none-any.whl
 ```
--- a/doc/qq.jpeg
+++ b/doc/qq.jpeg
--- a/doc/wechat.jpeg
+++ b/doc/wechat.jpeg
--- a/python/paddle_serving_app/version.py
+++ b/python/paddle_serving_app/version.py
@@ -12,4 +12,4 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving App version string """
-serving_app_version = "0.1.0"
+serving_app_version = "0.1.1"
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -391,7 +391,13 @@ class MultiLangClient(object):
        self._parse_model_config(path)
    def connect(self, endpoint):
-        self.channel_ = grpc.insecure_channel(endpoint[0])  #TODO
+        # https://github.com/tensorflow/serving/issues/1382
+        options = [('grpc.max_receive_message_length', 512 * 1024 * 1024),
+                   ('grpc.max_send_message_length', 512 * 1024 * 1024),
+                   ('grpc.max_receive_message_length', 512 * 1024 * 1024)]
+        self.channel_ = grpc.insecure_channel(
+            endpoint[0], options=options)  #TODO
        self.stub_ = multi_lang_general_model_service_pb2_grpc.MultiLangGeneralModelServiceStub(
            self.channel_)

--- a/python/paddle_serving_client/version.py
+++ b/python/paddle_serving_client/version.py
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.3.0"
+serving_client_version = "0.3.1"
-serving_server_version = "0.3.0"
+serving_server_version = "0.3.1"
-module_proto_version = "0.3.0"
+module_proto_version = "0.3.1"
--- a/python/paddle_serving_server/version.py
+++ b/python/paddle_serving_server/version.py
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.3.0"
+serving_client_version = "0.3.1"
-serving_server_version = "0.3.0"
+serving_server_version = "0.3.1"
-module_proto_version = "0.3.0"
+module_proto_version = "0.3.1"
--- a/python/paddle_serving_server_gpu/version.py
+++ b/python/paddle_serving_server_gpu/version.py
@@ -12,6 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.3.0"
+serving_client_version = "0.3.1"
-serving_server_version = "0.3.0"
+serving_server_version = "0.3.1"
-module_proto_version = "0.3.0"
+module_proto_version = "0.3.1"
--- a/python/pipeline/channel.py
+++ b/python/pipeline/channel.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
--- a/python/pipeline/operator.py
+++ b/python/pipeline/operator.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
--- a/python/pipeline/pipeline_server.py
+++ b/python/pipeline/pipeline_server.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
--- a/python/pipeline/profiler.py
+++ b/python/pipeline/profiler.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
--- a/tools/python_tag.py
+++ b/tools/python_tag.py
@@ -15,6 +15,6 @@
 from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
 import re
 with open("setup.cfg", "w") as f:
-    line = "[bdist_wheel]\npython-tag={0}{1}\nplat-name=manylinux1_x86_64".format(
+    line = "[bdist_wheel]\npython-tag={0}{1}".format(get_abbr_impl(),
-        get_abbr_impl(), get_impl_ver())
+                                                     get_impl_ver())
    f.write(line)