Merge branch 'develop' into 'develop'

fix See merge request !59

Merge branch 'develop' into 'develop'
fix See merge request !59
641a7ef9 · xujiaqi01 · 17f07fe3 · b95b2fed · 17f07fe3 · 641a7ef9
隐藏空白更改
内联并排

Showing with 2 addition and 61 deletion

models/rank/criteo_reader.py models/rank/criteo_reader.py +0 -61

models/rank/readme.md models/rank/readme.md +2 -0

未找到文件。
--- a/models/rank/criteo_reader.py
+++ b/models/rank/criteo_reader.py
-#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import print_function
-
-from paddlerec.core.reader import Reader
-from paddlerec.core.utils import envs
-
-
-class TrainReader(Reader):
-    def init(self):
-        self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-        self.cont_max_ = [20, 600, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50]
-        self.cont_diff_ = [20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50]
-        self.hash_dim_ = envs.get_global_env("hyper_parameters.sparse_feature_number", None, "train.model")
-        self.continuous_range_ = range(1, 14)
-        self.categorical_range_ = range(14, 40)
-
-    def generate_sample(self, line):
-        """
-        Read the data line by line and process it as a dictionary
-        """
-
-        def reader():
-            """
-            This function needs to be implemented by the user, based on data format
-            """
-            features = line.rstrip('\n').split('\t')
-
-            dense_feature = []
-            sparse_feature = []
-            for idx in self.continuous_range_:
-                if features[idx] == "":
-                    dense_feature.append(0.0)
-                else:
-                    dense_feature.append(
-                        (float(features[idx]) - self.cont_min_[idx - 1]) /
-                        self.cont_diff_[idx - 1])
-
-            for idx in self.categorical_range_:
-                sparse_feature.append(
-                    [hash(str(idx) + features[idx]) % self.hash_dim_])
-            label = [int(features[0])]
-            feature_name = ["D"]
-            for idx in self.categorical_range_:
-                feature_name.append("S" + str(idx - 13))
-            feature_name.append("label")
-            yield zip(feature_name, [dense_feature] + sparse_feature + [label])
-
-        return reader
--- a/models/rank/readme.md
+++ b/models/rank/readme.md
@@ -64,6 +64,8 @@
 sh run.sh
 ```

+数据读取默认使用core/reader.py
+
 ### 训练
 ```
 python -m paddlerec.run -m paddlerec.models.rank.dnn # 以DNN为例