Merge branch 'develop' of https://github.com/PaddlePaddle/Serving into java-sdk

36916674 · barrierye · a5fa06d1 · 9a65a914 · 36916674 · 36916674
6 changed file
--- a/core/cube/cube-api/src/cube_cli.cpp
+++ b/core/cube/cube-api/src/cube_cli.cpp
@@ -13,6 +13,7 @@
 // limitations under the License.
 #include <gflags/gflags.h>
+#include <algorithm>
 #include <atomic>
 #include <fstream>
 #include <thread>  //NOLINT
@@ -33,7 +34,7 @@ std::atomic<int> g_concurrency(0);
 std::vector<std::vector<uint64_t>> time_list;
 std::vector<uint64_t> request_list;
-int turns = 1000000 / FLAGS_batch;
+int turns = 1000;
 namespace {
 inline uint64_t time_diff(const struct timeval& start_time,
@@ -94,14 +95,15 @@ int run(int argc, char** argv, int thread_id) {
  uint64_t file_size = key_list.size();
  uint64_t index = 0;
  uint64_t request = 0;
  while (g_concurrency.load() >= FLAGS_thread_num) {
  }
  g_concurrency++;
  time_list[thread_id].resize(turns);
-  while (index < file_size) {
+  while (request < turns) {
    // uint64_t key = strtoul(buffer, NULL, 10);
+    if (index >= file_size) {
+      index = 0;
+    }
    keys.push_back(key_list[index]);
    index += 1;
    int ret = 0;
@@ -160,7 +162,7 @@ int run_m(int argc, char** argv) {
  uint64_t sum_time = 0;
  uint64_t max_time = 0;
  uint64_t min_time = 1000000;
-  uint64_t request_num = 0;
+  std::vector<uint64_t> all_time_list;
  for (int i = 0; i < thread_num; i++) {
    for (int j = 0; j < request_list[i]; j++) {
      sum_time += time_list[i][j];
@@ -170,19 +172,28 @@ int run_m(int argc, char** argv) {
      if (time_list[i][j] < min_time) {
        min_time = time_list[i][j];
      }
+      all_time_list.push_back(time_list[i][j]);
    }
-    request_num += request_list[i];
  }
+  std::sort(all_time_list.begin(), all_time_list.end());
  uint64_t mean_time = sum_time / (thread_num * turns);
  uint64_t main_time = time_diff(main_start, main_end);
-  LOG(INFO) << "\n"
+  uint64_t request_num = turns * thread_num;
+  LOG(INFO)
+      << "\n"
      << thread_num << " thread seek cost"
-            << "\navg = " << std::to_string(mean_time)
+      << "\navg: " << std::to_string(mean_time) << "\n50 percent: "
-            << "\nmax = " << std::to_string(max_time)
+      << std::to_string(all_time_list[static_cast<int>(0.5 * request_num)])
-            << "\nmin = " << std::to_string(min_time);
+      << "\n80 percent: "
-  LOG(INFO) << "\ntotal_request = " << std::to_string(request_num)
+      << std::to_string(all_time_list[static_cast<int>(0.8 * request_num)])
-            << "\nspeed = " << std::to_string(request_num * 1000000 /
+      << "\n90 percent: "
-                                              main_time)  // mean_time us
+      << std::to_string(all_time_list[static_cast<int>(0.9 * request_num)])
+      << "\n99 percent: "
+      << std::to_string(all_time_list[static_cast<int>(0.99 * request_num)])
+      << "\n99.9 percent: "
+      << std::to_string(all_time_list[static_cast<int>(0.999 * request_num)])
+      << "\ntotal_request: " << std::to_string(request_num) << "\nspeed: "
+      << std::to_string(turns * 1000000 / main_time)  // mean_time us
      << " query per second";
  return 0;
 }

--- a/doc/CUBE_QUANT.md
+++ b/doc/CUBE_QUANT.md
@@ -42,7 +42,7 @@ cd python/examples/criteo_ctr_with_cube
 python local_train.py
 cp ../../../build_server/core/predictor/seq_generator seq_generator
 cp ../../../build_server/output/bin/cube* ./cube/
-sh cube_prepare_quant.sh &
+sh cube_quant_prepare.sh &
 python test_server_quant.py ctr_serving_model_kv &
 python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
 ```

--- a/doc/CUBE_QUANT_CN.md
+++ b/doc/CUBE_QUANT_CN.md
@@ -42,7 +42,7 @@ cd python/examples/criteo_ctr_with_cube
 python local_train.py
 cp ../../../build_server/core/predictor/seq_generator seq_generator
 cp ../../../build_server/output/bin/cube* ./cube/
-sh cube_prepare_quant.sh &
+sh cube_quant_prepare.sh &
 python test_server_quant.py ctr_serving_model_kv &
 python test_client.py ctr_client_conf/serving_client_conf.prototxt ./raw_data
 ```

--- a/python/examples/criteo_ctr_with_cube/README.md
+++ b/python/examples/criteo_ctr_with_cube/README.md
@@ -27,7 +27,7 @@ mv cube_app/cube* ./cube/
 sh cube_prepare.sh &
 ```
-Here, the sparse parameter is loaded by cube sparse parameter indexing service Cube，for more details please read [Cube: Sparse Parameter Indexing Service (Local Mode)](../../../doc/CUBE_LOCAL.md)
+Here, the sparse parameter is loaded by cube sparse parameter indexing service Cube.
 ### Start RPC Predictor, the number of serving thread is 4（configurable in test_server.py）

--- a/python/examples/criteo_ctr_with_cube/README_CN.md
+++ b/python/examples/criteo_ctr_with_cube/README_CN.md
@@ -25,7 +25,7 @@ mv cube_app/cube* ./cube/
 sh cube_prepare.sh &
 ```
-此处，模型当中的稀疏参数会被存放在稀疏参数索引服务Cube当中，关于稀疏参数索引服务Cube的介绍，请阅读[稀疏参数索引服务Cube单机版使用指南](../../../doc/CUBE_LOCAL_CN.md)
+此处，模型当中的稀疏参数会被存放在稀疏参数索引服务Cube当中。
 ### 启动RPC预测服务，服务端线程数为4（可在test_server.py配置）

--- a/python/examples/criteo_ctr_with_cube/benchmark_cube.sh
+++ b/python/examples/criteo_ctr_with_cube/benchmark_cube.sh
 rm profile_log
-wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz --no-check-certificate
+#wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz --no-check-certificate
-tar xf ctr_cube_unittest.tar.gz
+#tar xf ctr_cube_unittest.tar.gz
 mv models/ctr_client_conf ./
 mv models/ctr_serving_model_kv ./
 mv models/data ./cube/
-wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz --no-check-certificate
+#wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz --no-check-certificate
-tar xf cube_app.tar.gz
+#tar xf cube_app.tar.gz
 mv cube_app/cube* ./cube/
 sh cube_prepare.sh &
@@ -24,8 +24,7 @@ do
    echo "========================================"
    echo "batch size : $batch_size" >> profile_log
    echo "thread num : $thread_num" >> profile_log
-    tail -n 7 profile | head -n 4 >> profile_log
+    tail -n 8 profile >> profile_log
-    tail -n 2 profile >> profile_log
 done
 done