Merge pull request #215 from MRXLT/general-server-bert

add imdb benchmark && bert demo && timeline tools

Merge pull request #215 from MRXLT/general-server-bert
add imdb benchmark && bert demo && timeline tools
aa85253a · Dong Daxiang · GitHub · 8a48acd6 · 213310bf · aa85253a
12 changed file
--- a/python/examples/bert/benchmark.py
+++ b/python/examples/bert/benchmark.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+from paddle_serving_client import Client
+from paddle_serving_client.metric import auc
+from paddle_serving_client.utils import MultiThreadRunner
+import time
+from test_bert_client import BertService
+
+
+def predict(thr_id, resource):
+    bc = BertService(
+        model_name="bert_chinese_L-12_H-768_A-12",
+        max_seq_len=20,
+        do_lower_case=True)
+    bc.load_client(resource["conf_file"], resource["server_endpoint"])
+    thread_num = resource["thread_num"]
+    file_list = resource["filelist"]
+    line_id = 0
+    result = []
+    label_list = []
+    dataset = []
+    for fn in file_list:
+        fin = open(fn)
+        for line in fin:
+            if line_id % thread_num == thr_id - 1:
+                dataset.append(line.strip())
+            line_id += 1
+        fin.close()
+
+    start = time.time()
+    fetch = ["pooled_output"]
+    for inst in dataset:
+        fetch_map = bc.run_general([[inst]], fetch)
+        result.append(fetch_map["pooled_output"])
+    end = time.time()
+    return [result, label_list, [end - start]]
+
+
+if __name__ == '__main__':
+    conf_file = sys.argv[1]
+    data_file = sys.argv[2]
+    thread_num = sys.argv[3]
+    resource = {}
+    resource["conf_file"] = conf_file
+    resource["server_endpoint"] = ["127.0.0.1:9293"]
+    resource["filelist"] = [data_file]
+    resource["thread_num"] = int(thread_num)
+
+    thread_runner = MultiThreadRunner()
+    result = thread_runner.run(predict, int(sys.argv[3]), resource)
+
+    print("total time {} s".format(sum(result[-1]) / len(result[-1])))
--- a/python/examples/bert/benchmark.sh
+++ b/python/examples/bert/benchmark.sh
+rm profile_log
+for thread_num in 1 4 8 12 16 20 24
+do
+    $PYTHONROOT/bin/python benchmark.py serving_client_conf/serving_client_conf.prototxt data.txt $thread_num $batch_size > profile 2>&1
+    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
+    tail -n 1 profile >> profile_log
+done
--- a/python/examples/bert/benchmark_batch.py
+++ b/python/examples/bert/benchmark_batch.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+from paddle_serving_client import Client
+from paddle_serving_client.metric import auc
+from paddle_serving_client.utils import MultiThreadRunner
+import time
+from test_bert_client import BertService
+
+
+def predict(thr_id, resource, batch_size):
+    bc = BertService(
+        model_name="bert_chinese_L-12_H-768_A-12",
+        max_seq_len=20,
+        do_lower_case=True)
+    bc.load_client(resource["conf_file"], resource["server_endpoint"])
+    thread_num = resource["thread_num"]
+    file_list = resource["filelist"]
+    line_id = 0
+    result = []
+    label_list = []
+    dataset = []
+    for fn in file_list:
+        fin = open(fn)
+        for line in fin:
+            if line_id % thread_num == thr_id - 1:
+                dataset.append(line.strip())
+            line_id += 1
+        fin.close()
+
+    start = time.time()
+    fetch = ["pooled_output"]
+    batch = []
+    for inst in dataset:
+        if len(batch) < batch_size:
+            batch.append([inst])
+        else:
+            fetch_map_batch = bc.run_batch_general(batch, fetch)
+            batch = []
+            result.append(fetch_map_batch)
+    end = time.time()
+    return [result, label_list, [end - start]]
+
+
+if __name__ == '__main__':
+    conf_file = sys.argv[1]
+    data_file = sys.argv[2]
+    thread_num = sys.argv[3]
+    batch_size = sys.ragv[4]
+    resource = {}
+    resource["conf_file"] = conf_file
+    resource["server_endpoint"] = ["127.0.0.1:9293"]
+    resource["filelist"] = [data_file]
+    resource["thread_num"] = int(thread_num)
+
+    thread_runner = MultiThreadRunner()
+    result = thread_runner.run(predict, int(sys.argv[3]), resource, batch_size)
+
+    print("total time {} s".format(sum(result[-1]) / len(result[-1])))
--- a/python/examples/bert/benchmark_batch.sh
+++ b/python/examples/bert/benchmark_batch.sh
+rm profile_log
+thread_num=1
+for batch_size in 1 4 8 16 32 64 128 256
+do
+    $PYTHONROOT/bin/python benchmark_batch.py serving_client_conf/serving_client_conf.prototxt data.txt $thread_num $batch_size > profile 2>&1
+    $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
+    tail -n 1 profile >> profile_log
+done
--- a/python/examples/bert/prepare_model.py
+++ b/python/examples/bert/prepare_model.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddlehub as hub
+import paddle.fluid as fluid
+import paddle_serving_client.io as serving_io
+
+model_name = "bert_chinese_L-12_H-768_A-12"
+module = hub.Module(model_name)
+inputs, outputs, program = module.context(trainable=True, max_seq_len=20)
+place = fluid.core_avx.CPUPlace()
+exe = fluid.Executor(place)
+input_ids = inputs["input_ids"]
+position_ids = inputs["position_ids"]
+segment_ids = inputs["segment_ids"]
+input_mask = inputs["input_mask"]
+pooled_output = outputs["pooled_output"]
+sequence_output = outputs["sequence_output"]
+
+feed_var_names = [
+    input_ids.name, position_ids.name, segment_ids.name, input_mask.name
+]
+
+target_vars = [pooled_output, sequence_output]
+
+serving_io.save_model("serving_server_model", "serving_client_conf", {
+    "input_ids": input_ids,
+    "position_ids": position_ids,
+    "segment_ids": segment_ids,
+    "input_mask": input_mask,
+}, {"pooled_output": pooled_output,
+    "sequence_output": sequence_output}, program)
--- a/python/examples/bert/test_bert_client.py
+++ b/python/examples/bert/test_bert_client.py
+# coding:utf-8
+import sys
+import numpy as np
+import paddlehub as hub
+import ujson
+import random
+from paddlehub.common.logger import logger
+import socket
+from paddle_serving_client import Client
+
+_ver = sys.version_info
+is_py2 = (_ver[0] == 2)
+is_py3 = (_ver[0] == 3)
+
+if is_py2:
+    import httplib
+if is_py3:
+    import http.client as httplib
+
+
+class BertService():
+    def __init__(self,
+                 profile=False,
+                 max_seq_len=128,
+                 model_name="bert_uncased_L-12_H-768_A-12",
+                 show_ids=False,
+                 do_lower_case=True,
+                 process_id=0,
+                 retry=3,
+                 load_balance='round_robin'):
+        self.process_id = process_id
+        self.reader_flag = False
+        self.batch_size = 0
+        self.max_seq_len = max_seq_len
+        self.profile = profile
+        self.model_name = model_name
+        self.show_ids = show_ids
+        self.do_lower_case = do_lower_case
+        self.con_list = []
+        self.con_index = 0
+        self.load_balance = load_balance
+        self.server_list = []
+        self.serving_list = []
+        self.feed_var_names = ''
+        self.retry = retry
+
+        module = hub.Module(name=self.model_name)
+        inputs, outputs, program = module.context(
+            trainable=True, max_seq_len=self.max_seq_len)
+        input_ids = inputs["input_ids"]
+        position_ids = inputs["position_ids"]
+        segment_ids = inputs["segment_ids"]
+        input_mask = inputs["input_mask"]
+        self.feed_var_names = input_ids.name + ';' + position_ids.name + ';' + segment_ids.name + ';' + input_mask.name
+        self.reader = hub.reader.ClassifyReader(
+            vocab_path=module.get_vocab_path(),
+            dataset=None,
+            max_seq_len=self.max_seq_len,
+            do_lower_case=self.do_lower_case)
+        self.reader_flag = True
+
+    def load_client(self, config_file, server_addr):
+        self.client = Client()
+        self.client.load_client_config(config_file)
+        self.client.connect(server_addr)
+
+    def run_general(self, text, fetch):
+        self.batch_size = len(text)
+        data_generator = self.reader.data_generator(
+            batch_size=self.batch_size, phase='predict', data=text)
+        result = []
+        for run_step, batch in enumerate(data_generator(), start=1):
+            token_list = batch[0][0].reshape(-1).tolist()
+            pos_list = batch[0][1].reshape(-1).tolist()
+            sent_list = batch[0][2].reshape(-1).tolist()
+            mask_list = batch[0][3].reshape(-1).tolist()
+            for si in range(self.batch_size):
+                feed = {
+                    "input_ids": token_list,
+                    "position_ids": pos_list,
+                    "segment_ids": sent_list,
+                    "input_mask": mask_list
+                }
+                fetch_map = self.client.predict(feed=feed, fetch=fetch)
+
+        return fetch_map
+
+    def run_batch_general(self, text, fetch):
+        self.batch_size = len(text)
+        data_generator = self.reader.data_generator(
+            batch_size=self.batch_size, phase='predict', data=text)
+        result = []
+        for run_step, batch in enumerate(data_generator(), start=1):
+            token_list = batch[0][0].reshape(-1).tolist()
+            pos_list = batch[0][1].reshape(-1).tolist()
+            sent_list = batch[0][2].reshape(-1).tolist()
+            mask_list = batch[0][3].reshape(-1).tolist()
+            feed_batch = []
+            for si in range(self.batch_size):
+                feed = {
+                    "input_ids": token_list[si * self.max_seq_len:(si + 1) *
+                                            self.max_seq_len],
+                    "position_ids":
+                    pos_list[si * self.max_seq_len:(si + 1) * self.max_seq_len],
+                    "segment_ids": sent_list[si * self.max_seq_len:(si + 1) *
+                                             self.max_seq_len],
+                    "input_mask":
+                    mask_list[si * self.max_seq_len:(si + 1) * self.max_seq_len]
+                }
+                feed_batch.append(feed)
+            fetch_map_batch = self.client.batch_predict(
+                feed_batch=feed_batch, fetch=fetch)
+        return fetch_map_batch
+
+
+def test():
+
+    bc = BertService(
+        model_name='bert_uncased_L-12_H-768_A-12',
+        max_seq_len=20,
+        show_ids=False,
+        do_lower_case=True)
+    server_addr = ["127.0.0.1:9293"]
+    config_file = './serving_client_conf/serving_client_conf.prototxt'
+    fetch = ["pooled_output"]
+    bc.load_client(config_file, server_addr)
+    batch_size = 4
+    batch = []
+    for line in sys.stdin:
+        if len(batch) < batch_size:
+            batch.append([line.strip()])
+        else:
+            result = bc.run_batch_general(batch, fetch)
+            batch = []
+            for r in result:
+                for e in r["pooled_output"]:
+                    print(e)
+
+
+if __name__ == '__main__':
+    test()
--- a/python/examples/bert/test_gpu_server.py
+++ b/python/examples/bert/test_gpu_server.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+from paddle_serving_server_gpu import OpMaker
+from paddle_serving_server_gpu import OpSeqMaker
+from paddle_serving_server_gpu import Server
+
+op_maker = OpMaker()
+read_op = op_maker.create('general_reader')
+general_infer_op = op_maker.create('general_infer')
+general_response_op = op_maker.create('general_response')
+
+op_seq_maker = OpSeqMaker()
+op_seq_maker.add_op(read_op)
+op_seq_maker.add_op(general_infer_op)
+op_seq_maker.add_op(general_response_op)
+
+server = Server()
+server.set_op_sequence(op_seq_maker.get_op_sequence())
+server.set_num_threads(8)
+server.set_memory_optimize(True)
+server.set_gpuid(1)
+
+server.load_model_config(sys.argv[1])
+port = int(sys.argv[2])
+server.prepare_server(workdir="work_dir1", port=port, device="gpu")
+server.run_server()
--- a/python/examples/bert/test_server.py
+++ b/python/examples/bert/test_server.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+from paddle_serving_server import OpMaker
+from paddle_serving_server import OpSeqMaker
+from paddle_serving_server import Server
+
+op_maker = OpMaker()
+read_op = op_maker.create('general_reader')
+general_infer_op = op_maker.create('general_infer')
+general_response_op = op_maker.create('general_response')
+
+op_seq_maker = OpSeqMaker()
+op_seq_maker.add_op(read_op)
+op_seq_maker.add_op(general_infer_op)
+op_seq_maker.add_op(general_response_op)
+
+server = Server()
+server.set_op_sequence(op_seq_maker.get_op_sequence())
+server.set_num_threads(4)
+server.set_local_bin(
+    "~/github/Serving/build_server/core/general-server/serving")
+
+server.load_model_config(sys.argv[1])
+port = int(sys.argv[2])
+server.prepare_server(workdir="work_dir1", port=port, device="cpu")
+server.run_server()
--- a/python/examples/imdb/README.md
+++ b/python/examples/imdb/README.md
@@ -14,3 +14,83 @@ batch clienit，若batch size为4
 ```
 cat test.data | python test_client_batch.py inference.conf 4 > result
 ```
+
+### Benchmark
+
+设备 ：Intel(R) Xeon(R)  Gold 6271 CPU @ 2.60GHz * 48
+
+模型 ：IMDB-CNN
+
+测试中，client共发送2500条测试样本，图中数据为单个线程的耗时，时间单位为秒
+
+server thread num ：4
+
+| client  thread num | prepro | client infer | op0    | op1   | op2    | postpro | total |
+| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
+| 1                  | 0.99   | 27.39        | 0.085  | 19.92 | 0.046  | 0.032   | 29.84 |
+| 4                  | 0.22   | 7.66         | 0.021  | 4.93  | 0.011  | 0.0082  | 8.28  |
+| 8                  | 0.1    | 6.66         | 0.01   | 2.42  | 0.0038 | 0.0046  | 6.95  |
+| 12                 | 0.074  | 6.87         | 0.0069 | 1.61  | 0.0059 | 0.0032  | 7.07  |
+| 16                 | 0.056  | 7.01         | 0.0053 | 1.23  | 0.0029 | 0.0026  | 7.17  |
+| 20                 | 0.045  | 7.02         | 0.0042 | 0.97  | 0.0023 | 0.002   | 7.15  |
+| 24                 | 0.039  | 7.012        | 0.0034 | 0.8   | 0.0019 | 0.0016  | 7.12  |
+
+server thread num ： 8
+
+| client  thread num | prepro | client infer | op0    | op1   | op2    | postpro | total |
+| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
+| 1                  | 1.02   | 28.9         | 0.096  | 20.64 | 0.047  | 0.036   | 31.51 |
+| 4                  | 0.22   | 7.83         | 0.021  | 5.08  | 0.012  | 0.01    | 8.45  |
+| 8                  | 0.11   | 4.44         | 0.01   | 2.5   | 0.0059 | 0.0051  | 4.73  |
+| 12                 | 0.074  | 4.11         | 0.0069 | 1.65  | 0.0039 | 0.0029  | 4.31  |
+| 16                 | 0.057  | 4.2          | 0.0052 | 1.24  | 0.0029 | 0.0024  | 4.35  |
+| 20                 | 0.046  | 4.05         | 0.0043 | 1.01  | 0.0024 | 0.0021  | 4.18  |
+| 24                 | 0.038  | 4.02         | 0.0034 | 0.81  | 0.0019 | 0.0015  | 4.13  |
+
+server thread num ： 12
+
+| client  thread num | prepro | client infer | op0    | op1   | op2    | postpro | total |
+| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
+| 1                  | 1.02   | 29.47        | 0.098  | 20.95 | 0.048  | 0.038   | 31.96 |
+| 4                  | 0.21   | 7.36         | 0.022  | 5.01  | 0.011  | 0.0081  | 7.95  |
+| 8                  | 0.11   | 4.52         | 0.011  | 2.58  | 0.0061 | 0.0051  | 4.83  |
+| 12                 | 0.072  | 3.25         | 0.0076 | 1.72  | 0.0042 | 0.0038  | 3.45  |
+| 16                 | 0.059  | 3.93         | 0.0055 | 1.26  | 0.0029 | 0.0023  | 4.1   |
+| 20                 | 0.047  | 3.79         | 0.0044 | 1.01  | 0.0024 | 0.0021  | 3.92  |
+| 24                 | 0.041  | 3.76         | 0.0036 | 0.83  | 0.0019 | 0.0017  | 3.87  |
+
+server thread num ： 16
+
+| client  thread num | prepro | client infer | op0    | op1   | op2    | postpro | total |
+| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
+| 1                  | 1.09   | 28.79        | 0.094  | 20.59 | 0.047  | 0.034   | 31.41 |
+| 4                  | 0.22   | 7.41         | 0.023  | 5.01  | 0.011  | 0.0098  | 8.01  |
+| 8                  | 0.11   | 4.7          | 0.012  | 2.61  | 0.0062 | 0.0049  | 5.01  |
+| 12                 | 0.081  | 4.69         | 0.0078 | 1.72  | 0.0042 | 0.0035  | 4.91  |
+| 16                 | 0.058  | 3.46         | 0.0061 | 1.32  | 0.0033 | 0.003   | 3.63  |
+| 20                 | 0.049  | 3.77         | 0.0047 | 1.03  | 0.0025 | 0.0022  | 3.91  |
+| 24                 | 0.041  | 3.86         | 0.0039 | 0.85  | 0.002  | 0.0017  | 3.98  |
+
+server thread num ： 20
+
+| client  thread num | prepro | client infer | op0    | op1   | op2    | postpro | total |
+| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
+| 1                  | 1.03   | 28.42        | 0.085  | 20.47 | 0.046  | 0.037   | 30.98 |
+| 4                  | 0.22   | 7.94         | 0.022  | 5.33  | 0.012  | 0.011   | 8.53  |
+| 8                  | 0.11   | 4.54         | 0.01   | 2.58  | 0.006  | 0.0046  | 4.84  |
+| 12                 | 0.079  | 4.54         | 0.0076 | 1.78  | 0.0042 | 0.0039  | 4.76  |
+| 16                 | 0.059  | 3.41         | 0.0057 | 1.33  | 0.0032 | 0.0027  | 3.58  |
+| 20                 | 0.051  | 4.33         | 0.0047 | 1.06  | 0.0025 | 0.0023  | 4.48  |
+| 24                 | 0.043  | 4.51         | 0.004  | 0.88  | 0.0021 | 0.0018  | 4.63  |
+
+server thread num ：24
+
+| client  thread num | prepro | client infer | op0    | op1  | op2    | postpro | total |
+| ------------------ | ------ | ------------ | ------ | ---- | ------ | ------- | ----- |
+| 1                  | 0.93   | 29.28        | 0.099  | 20.5 | 0.048  | 0.028   | 31.61 |
+| 4                  | 0.22   | 7.72         | 0.023  | 4.98 | 0.011  | 0.0095  | 8.33  |
+| 8                  | 0.11   | 4.77         | 0.012  | 2.65 | 0.0062 | 0.0049  | 5.09  |
+| 12                 | 0.081  | 4.22         | 0.0078 | 1.77 | 0.0042 | 0.0033  | 4.44  |
+| 16                 | 0.062  | 4.21         | 0.0061 | 1.34 | 0.0032 | 0.0026  | 4.39  |
+| 20                 | 0.5    | 3.58         | 0.005  | 1.07 | 0.0026 | 0.0023  | 3.72  |
+| 24                 | 0.043  | 4.27         | 0.0042 | 0.89 | 0.0022 | 0.0018  | 4.4   |
--- a/python/examples/imdb/benchmark.py
+++ b/python/examples/imdb/benchmark.py
@@ -43,15 +43,13 @@ def predict(thr_id, resource):

    start = time.time()
    fetch = ["acc", "cost", "prediction"]
-    infer_time_list = []
    for inst in dataset:
-        fetch_map = client.predict(feed=inst, fetch=fetch, profile=True)
+        fetch_map = client.predict(feed=inst, fetch=fetch)
        prob.append(fetch_map["prediction"][1])
        label_list.append(label[0])
-        infer_time_list.append(fetch_map["infer_time"])
    end = time.time()
    client.release()
-    return [prob, label_list, [sum(infer_time_list)], [end - start]]
+    return [prob, label_list, [end - start]]


 if __name__ == '__main__':
@@ -59,14 +57,11 @@ if __name__ == '__main__':
    data_file = sys.argv[2]
    resource = {}
    resource["conf_file"] = conf_file
-    resource["server_endpoint"] = ["127.0.0.1:9292"]
+    resource["server_endpoint"] = ["127.0.0.1:9293"]
    resource["filelist"] = [data_file]
    resource["thread_num"] = int(sys.argv[3])

    thread_runner = MultiThreadRunner()
    result = thread_runner.run(predict, int(sys.argv[3]), resource)

-    print("thread num {}\ttotal time {}".format(sys.argv[
-        3], sum(result[-1]) / len(result[-1])))
-    print("thread num {}\ttotal time {}".format(sys.argv[
-        3], sum(result[2]) / 1000.0 / 1000.0 / len(result[2])))
+    print("total time {} s".format(sum(result[-1]) / len(result[-1])))
--- a/python/examples/util/show_profile.py
+++ b/python/examples/util/show_profile.py
+#coding=utf-8
+import sys
+import collections
+
+profile_file = sys.argv[1]
+thread_num = sys.argv[2]
+time_dict = collections.OrderedDict()
+
+
+def prase(line):
+    profile_list = line.split(" ")
+    num = len(profile_list)
+    for idx in range(num / 2):
+        profile_0_list = profile_list[idx * 2].split(":")
+        profile_1_list = profile_list[idx * 2 + 1].split(":")
+        if len(profile_0_list[0].split("_")) == 2:
+            name = profile_0_list[0].split("_")[0]
+        else:
+            name = profile_0_list[0].split("_")[0] + "_" + profile_0_list[
+                0].split("_")[1]
+        cost = long(profile_1_list[1]) - long(profile_0_list[1])
+        if name not in time_dict:
+            time_dict[name] = cost
+        else:
+            time_dict[name] += cost
+
+
+with open(profile_file) as f:
+    for line in f.readlines():
+        line = line.strip().split("\t")
+        if line[0] == "PROFILE":
+            prase(line[1])
+
+print("thread num {}".format(thread_num))
+for name in time_dict:
+    print("{} cost {} s per thread ".format(name, time_dict[name] / (
+        1000000.0 * float(thread_num))))
--- a/python/examples/util/timeline_trace.py
+++ b/python/examples/util/timeline_trace.py
+#coding=utf-8
+import json
+import sys
+
+profile_file = sys.argv[1]
+
+
+def prase(line, counter):
+    event_list = line.split(" ")
+    trace_list = []
+    for event in event_list:
+        name, ts = event.split(":")
+        name_list = name.split("_")
+        ph = "B" if (name_list[-1] == "0") else "E"
+        if len(name_list) == 2:
+            name = name_list[0]
+        else:
+            name = name_list[0] + "_" + name_list[1]
+        event_dict = {}
+        event_dict["name"] = name
+        event_dict["tid"] = 0
+        event_dict["pid"] = 0
+        event_dict["ts"] = ts
+        event_dict["ph"] = ph
+
+        trace_list.append(event_dict)
+    return trace_list
+
+
+if __name__ == "__main__":
+    profile_file = sys.argv[1]
+    trace_file = sys.argv[2]
+    all_list = []
+    counter = 0
+    with open(profile_file) as f:
+        for line in f.readlines():
+            line = line.strip().split("\t")
+            if line[0] == "PROFILE":
+                trace_list = prase(line[1], counter)
+                counter += 1
+                for trace in trace_list:
+                    all_list.append(trace)
+
+    trace = json.dumps(all_list, indent=2, separators=(',', ':'))
+    with open(trace_file, "w") as f:
+        f.write(trace)