Merge pull request #709 from MRXLT/ce-script

fix cube demo

Merge pull request #709 from MRXLT/ce-script
fix cube demo
d198eb35 · MRXLT · GitHub · b9782cd9 · 5735cbb9 · d198eb35
5 changed file
--- a/python/examples/criteo_ctr_with_cube/benchmark.py
+++ b/python/examples/criteo_ctr_with_cube/benchmark.py
@@ -29,6 +29,7 @@ args = benchmark_args()
 def single_func(idx, resource):
    client = Client()
+    print([resource["endpoint"][idx % len(resource["endpoint"])]])
    client.load_client_config('ctr_client_conf/serving_client_conf.prototxt')
    client.connect(['127.0.0.1:9292'])
    batch = 1
@@ -40,27 +41,29 @@ def single_func(idx, resource):
    ]
    reader = dataset.infer_reader(test_filelists[len(test_filelists) - 40:],
                                  batch, buf_size)
-    args.batch_size = 1
    if args.request == "rpc":
        fetch = ["prob"]
-        print("Start Time")
        start = time.time()
        itr = 1000
        for ei in range(itr):
-            if args.batch_size == 1:
+            if args.batch_size > 0:
+                feed_batch = []
+                for bi in range(args.batch_size):
                    data = reader().next()
                    feed_dict = {}
                    feed_dict['dense_input'] = data[0][0]
                    for i in range(1, 27):
-                    feed_dict["embedding_{}.tmp_0".format(i - 1)] = data[0][i]
+                        feed_dict["embedding_{}.tmp_0".format(i - 1)] = data[0][
-                result = client.predict(feed=feed_dict, fetch=fetch)
+                            i]
+                    feed_batch.append(feed_dict)
+                result = client.predict(feed=feed_batch, fetch=fetch)
            else:
                print("unsupport batch size {}".format(args.batch_size))
    elif args.request == "http":
        raise ("Not support http service.")
    end = time.time()
-    qps = itr / (end - start)
+    qps = itr * args.batch_size / (end - start)
    return [[end - start, qps]]
@@ -70,6 +73,7 @@ if __name__ == '__main__':
    #result = single_func(0, {"endpoint": endpoint_list})
    result = multi_thread_runner.run(single_func, args.thread,
                                     {"endpoint": endpoint_list})
+    print(result)
    avg_cost = 0
    qps = 0
    for i in range(args.thread):

--- a/python/examples/criteo_ctr_with_cube/benchmark.sh
+++ b/python/examples/criteo_ctr_with_cube/benchmark.sh
 rm profile_log
-batch_size=1
+export FLAGS_profile_client=1
+export FLAGS_profile_server=1
 for thread_num in 1 2 4 8 16
 do
-    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --model ctr_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
+for batch_size in 1 4 16 64 256
+do
+    $PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
+    echo "batch size : $batch_size"
+    echo "thread num : $thread_num"
    echo "========================================"
    echo "batch size : $batch_size" >> profile_log
    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
    tail -n 2 profile >> profile_log
 done
+done
--- a/python/examples/criteo_ctr_with_cube/benchmark_batch.py
+++ b/python/examples/criteo_ctr_with_cube/benchmark_batch.py
-# -*- coding: utf-8 -*-
-#
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-from paddle_serving_client import Client
-import sys
-import os
-import criteo as criteo
-import time
-from paddle_serving_client.utils import MultiThreadRunner
-from paddle_serving_client.utils import benchmark_args
-from paddle_serving_client.metric import auc
-args = benchmark_args()
-def single_func(idx, resource):
-    client = Client()
-    print([resource["endpoint"][idx % len(resource["endpoint"])]])
-    client.load_client_config('ctr_client_conf/serving_client_conf.prototxt')
-    client.connect(['127.0.0.1:9292'])
-    batch = 1
-    buf_size = 100
-    dataset = criteo.CriteoDataset()
-    dataset.setup(1000001)
-    test_filelists = [
-        "./raw_data/part-%d" % x for x in range(len(os.listdir("./raw_data")))
-    ]
-    reader = dataset.infer_reader(test_filelists[len(test_filelists) - 40:],
-                                  batch, buf_size)
-    if args.request == "rpc":
-        fetch = ["prob"]
-        start = time.time()
-        itr = 1000
-        for ei in range(itr):
-            if args.batch_size > 1:
-                feed_batch = []
-                for bi in range(args.batch_size):
-                    data = reader().next()
-                    feed_dict = {}
-                    feed_dict['dense_input'] = data[0][0]
-                    for i in range(1, 27):
-                        feed_dict["embedding_{}.tmp_0".format(i - 1)] = data[0][
-                            i]
-                    feed_batch.append(feed_dict)
-                result = client.predict(feed=feed_batch, fetch=fetch)
-            else:
-                print("unsupport batch size {}".format(args.batch_size))
-    elif args.request == "http":
-        raise ("Not support http service.")
-    end = time.time()
-    qps = itr * args.batch_size / (end - start)
-    return [[end - start, qps]]
-if __name__ == '__main__':
-    multi_thread_runner = MultiThreadRunner()
-    endpoint_list = ["127.0.0.1:9292"]
-    #result = single_func(0, {"endpoint": endpoint_list})
-    result = multi_thread_runner.run(single_func, args.thread,
-                                     {"endpoint": endpoint_list})
-    print(result)
-    avg_cost = 0
-    qps = 0
-    for i in range(args.thread):
-        avg_cost += result[0][i * 2 + 0]
-        qps += result[0][i * 2 + 1]
-    avg_cost = avg_cost / args.thread
-    print("average total cost {} s.".format(avg_cost))
-    print("qps {} ins/s".format(qps))
--- a/python/examples/criteo_ctr_with_cube/benchmark_batch.sh
+++ b/python/examples/criteo_ctr_with_cube/benchmark_batch.sh
-rm profile_log
-for thread_num in 1 2 4 8 16
-do
-for batch_size in 1 2 4 8 16 32 64 128 256 512
-do
-    $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
-    echo "========================================"
-    echo "batch size : $batch_size" >> profile_log
-    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
-    tail -n 2 profile >> profile_log
-done
-done
--- a/python/examples/criteo_ctr_with_cube/cube_prepare.sh
+++ b/python/examples/criteo_ctr_with_cube/cube_prepare.sh
@@ -16,7 +16,5 @@
 mkdir -p cube_model
 mkdir -p cube/data
-./seq_generator ctr_serving_model/SparseFeatFactors ./cube_model/feature  
 ./cube/cube-builder -dict_name=test_dict -job_mode=base -last_version=0 -cur_version=0 -depend_version=0 -input_path=./cube_model -output_path=${PWD}/cube/data -shard_num=1  -only_build=false
-mv ./cube/data/0_0/test_dict_part0/* ./cube/data/
 cd cube && ./cube