Merge branch 'develop' into docs_0.5.0

26af11ba · Jiawei Wang · GitHub · 24afa1b8 · 50d0c290 · 26af11ba
4 changed file
--- a/python/examples/criteo_ctr/README.md
+++ b/python/examples/criteo_ctr/README.md
@@ -26,6 +26,6 @@ python -m paddle_serving_server_gpu.serve --model ctr_serving_model/ --port 9292
 ### RPC Infer

 ```
-python test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/
+python test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0
 ```
 the latency will display in the end.
--- a/python/examples/criteo_ctr/README_CN.md
+++ b/python/examples/criteo_ctr/README_CN.md
@@ -26,6 +26,6 @@ python -m paddle_serving_server_gpu.serve --model ctr_serving_model/ --port 9292
 ### 执行预测

 ```
-python test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/
+python test_client.py ctr_client_conf/serving_client_conf.prototxt raw_data/part-0
 ```
 预测完毕会输出预测过程的耗时。
--- a/python/examples/criteo_ctr/criteo_reader.py
+++ b/python/examples/criteo_ctr/criteo_reader.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# pylint: disable=doc-string-missing
-
-import sys
-import paddle.fluid.incubate.data_generator as dg
-
-
-class CriteoDataset(dg.MultiSlotDataGenerator):
-    def setup(self, sparse_feature_dim):
-        self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-        self.cont_max_ = [
-            20, 600, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50
-        ]
-        self.cont_diff_ = [
-            20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50
-        ]
-        self.hash_dim_ = sparse_feature_dim
-        # here, training data are lines with line_index < train_idx_
-        self.train_idx_ = 41256555
-        self.continuous_range_ = range(1, 14)
-        self.categorical_range_ = range(14, 40)
-
-    def _process_line(self, line):
-        features = line.rstrip('\n').split('\t')
-        dense_feature = []
-        sparse_feature = []
-        for idx in self.continuous_range_:
-            if features[idx] == '':
-                dense_feature.append(0.0)
-            else:
-                dense_feature.append((float(features[idx]) - self.cont_min_[idx - 1]) / \
-                                     self.cont_diff_[idx - 1])
-        for idx in self.categorical_range_:
-            sparse_feature.append(
-                [hash(str(idx) + features[idx]) % self.hash_dim_])
-
-        return dense_feature, sparse_feature, [int(features[0])]
-
-    def infer_reader(self, filelist, batch, buf_size):
-        def local_iter():
-            for fname in filelist:
-                with open(fname.strip(), "r") as fin:
-                    for line in fin:
-                        dense_feature, sparse_feature, label = self._process_line(
-                            line)
-                        #yield dense_feature, sparse_feature, label
-                        yield [dense_feature] + sparse_feature + [label]
-
-        import paddle
-        batch_iter = paddle.batch(
-            paddle.reader.shuffle(
-                local_iter, buf_size=buf_size),
-            batch_size=batch)
-        return batch_iter
-
-    def generate_sample(self, line):
-        def data_iter():
-            dense_feature, sparse_feature, label = self._process_line(line)
-            feature_name = ["dense_input"]
-            for idx in self.categorical_range_:
-                feature_name.append("C" + str(idx - 13))
-            feature_name.append("label")
-            yield zip(feature_name, [dense_feature] + sparse_feature + [label])
-
-        return data_iter
-
-
-if __name__ == "__main__":
-    criteo_dataset = CriteoDataset()
-    criteo_dataset.setup(int(sys.argv[1]))
-    criteo_dataset.run_from_stdin()
--- a/python/examples/criteo_ctr/test_client.py
+++ b/python/examples/criteo_ctr/test_client.py
@@ -14,43 +14,63 @@
 # pylint: disable=doc-string-missing

 from paddle_serving_client import Client
-import paddle
 import sys
 import os
 import time
-import criteo_reader as criteo
 from paddle_serving_client.metric import auc
 import numpy as np
 import sys

+class CriteoReader(object):
+    def __init__(self, sparse_feature_dim):
+        self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
+        self.cont_max_ = [
+            20, 600, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50
+        ]
+        self.cont_diff_ = [
+            20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50
+        ]
+        self.hash_dim_ = sparse_feature_dim
+        # here, training data are lines with line_index < train_idx_
+        self.train_idx_ = 41256555
+        self.continuous_range_ = range(1, 14)
+        self.categorical_range_ = range(14, 40)
+
+    def process_line(self, line):
+        features = line.rstrip('\n').split('\t')
+        dense_feature = []
+        sparse_feature = []
+        for idx in self.continuous_range_:
+            if features[idx] == '':
+                dense_feature.append(0.0)
+            else:
+                dense_feature.append((float(features[idx]) - self.cont_min_[idx - 1]) / \
+                                     self.cont_diff_[idx - 1])
+        for idx in self.categorical_range_:
+            sparse_feature.append(
+                [hash(str(idx) + features[idx]) % self.hash_dim_])
+
+        return sparse_feature
+
 py_version = sys.version_info[0]

 client = Client()
 client.load_client_config(sys.argv[1])
 client.connect(["127.0.0.1:9292"])
-
+reader = CriteoReader(1000001)
 batch = 1
 buf_size = 100
-dataset = criteo.CriteoDataset()
-dataset.setup(1000001)
-test_filelists = [
-    "{}/part-%d".format(sys.argv[2]) % x
-    for x in range(len(os.listdir(sys.argv[2])))
-]
-reader = dataset.infer_reader(test_filelists[len(test_filelists) - 40:], batch,
-                              buf_size)
 label_list = []
 prob_list = []
 start = time.time()
-for ei in range(1000):
-    if py_version == 2:
-        data = reader().next()
-    else:
-        data = reader().__next__()
+f = open(sys.argv[2], 'r')
+for ei in range(10):
+    data = reader.process_line(f.readline())
    feed_dict = {}
    for i in range(1, 27):
-        feed_dict["sparse_{}".format(i - 1)] = np.array(data[0][i]).reshape(-1)
-        feed_dict["sparse_{}.lod".format(i - 1)] = [0, len(data[0][i])]
+        feed_dict["sparse_{}".format(i - 1)] = np.array(data[i-1]).reshape(-1)
+        feed_dict["sparse_{}.lod".format(i - 1)] = [0, len(data[i-1])]
    fetch_map = client.predict(feed=feed_dict, fetch=["prob"])
+    print(fetch_map)
 end = time.time()
-print(end - start)
+f.close()