diff --git a/models/rank/deepfm/data/get_slot_data.py b/models/rank/deepfm/data/get_slot_data.py index 6177c990d8ef0c8a1cf922dd9d50c6419cb8c1b7..c03b29248557de21ddb29c6a287045d9a7f1b500 100755 --- a/models/rank/deepfm/data/get_slot_data.py +++ b/models/rank/deepfm/data/get_slot_data.py @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -import yaml +import yaml, os + from paddlerec.core.reader import Reader from paddlerec.core.utils import envs +import paddle.fluid.incubate.data_generator as dg try: import cPickle as pickle except ImportError: @@ -44,7 +46,7 @@ class TrainReader(dg.MultiSlotDataGenerator): self.continuous_range_ = range(1, 14) self.categorical_range_ = range(14, 40) # load preprocessed feature dict - self.feat_dict_name = "aid_data/feat_dict_10.pkl2" + self.feat_dict_name = "sample_data/feat_dict_10.pkl2" self.feat_dict_ = pickle.load(open(self.feat_dict_name, 'rb')) def _process_line(self, line): diff --git a/models/rank/logistic_regression/data/get_slot_data.py b/models/rank/logistic_regression/data/get_slot_data.py index 6177c990d8ef0c8a1cf922dd9d50c6419cb8c1b7..799762ce06c00147a25aa2b72fe0908e7bd2e8e3 100755 --- a/models/rank/logistic_regression/data/get_slot_data.py +++ b/models/rank/logistic_regression/data/get_slot_data.py @@ -13,8 +13,12 @@ # limitations under the License. import yaml +import os + from paddlerec.core.reader import Reader from paddlerec.core.utils import envs +import paddle.fluid.incubate.data_generator as dg + try: import cPickle as pickle except ImportError: @@ -44,7 +48,7 @@ class TrainReader(dg.MultiSlotDataGenerator): self.continuous_range_ = range(1, 14) self.categorical_range_ = range(14, 40) # load preprocessed feature dict - self.feat_dict_name = "aid_data/feat_dict_10.pkl2" + self.feat_dict_name = "sample_data/feat_dict_10.pkl2" self.feat_dict_ = pickle.load(open(self.feat_dict_name, 'rb')) def _process_line(self, line): @@ -90,6 +94,7 @@ class TrainReader(dg.MultiSlotDataGenerator): return data_iter -reader = TrainReader("../config.yaml") +reader = TrainReader( + "../config.yaml") # run this file in original folder to find config.yaml reader.init() reader.run_from_stdin() diff --git a/models/rank/logistic_regression/data/preprocess.py b/models/rank/logistic_regression/data/preprocess.py index 9da3bdc3d93bfcd0dd98fddc64c870d20feddb38..0eaed5eb7e44fd396729e6b7ac442d05b39e8464 100755 --- a/models/rank/logistic_regression/data/preprocess.py +++ b/models/rank/logistic_regression/data/preprocess.py @@ -66,7 +66,6 @@ def get_feat_dict(): categorical_range_ = range(14, 40) if not os.path.exists(dir_feat_dict_): - # print('generate a feature dict') # Count the number of occurrences of discrete features feat_cnt = Counter() with open('train.txt', 'r') as fin: