diff --git a/core/trainers/transpiler_trainer.py b/core/trainers/transpiler_trainer.py index c5c4513572bc59c2f30ee8d599743b9aa1011930..f9d77f4735e5264a021afce790f0ec096f72af2e 100755 --- a/core/trainers/transpiler_trainer.py +++ b/core/trainers/transpiler_trainer.py @@ -52,6 +52,16 @@ class TranspileTrainer(Trainer): reader = dataloader_instance.dataloader( reader_class, state, self._config_yaml) + debug_mode = envs.get_global_env("debug_mode", False, namespace) + if debug_mode: + print("--- DataLoader Debug Mode Begin , show pre 10 data ---") + for idx, line in enumerate(reader): + print(line) + if idx >= 9: + break + print("--- DataLoader Debug Mode End , show pre 10 data ---") + exit 0 + reader_class = envs.lazy_instance_by_fliename(reader_class, class_name) reader_ins = reader_class(self._config_yaml) if hasattr(reader_ins, 'generate_batch_from_trainfiles'): @@ -98,6 +108,16 @@ class TranspileTrainer(Trainer): ] dataset.set_filelist(file_list) + + debug_mode = envs.get_global_env("debug_mode", False, namespace) + if debug_mode: + print( + "--- Dataset Debug Mode Begin , show pre 10 data of {}---".format(file_list[0])) + os.system("cat {} | {} | head -10".format(file_list[0], pipe_cmd)) + print( + "--- Dataset Debug Mode End , show pre 10 data of {}---".format(file_list[0])) + exit 0 + return dataset def save(self, epoch_id, namespace, is_fleet=False): @@ -116,23 +136,28 @@ class TranspileTrainer(Trainer): if not need_save(epoch_id, save_interval, False): return - + # print("save inference model is not supported now.") # return - feed_varnames = envs.get_global_env("save.inference.feed_varnames", None, namespace) - fetch_varnames = envs.get_global_env("save.inference.fetch_varnames", None, namespace) + feed_varnames = envs.get_global_env( + "save.inference.feed_varnames", None, namespace) + fetch_varnames = envs.get_global_env( + "save.inference.fetch_varnames", None, namespace) if feed_varnames is None or fetch_varnames is None: return - fetch_vars = [fluid.default_main_program().global_block().vars[varname] for varname in fetch_varnames] - dirname = envs.get_global_env("save.inference.dirname", None, namespace) + fetch_vars = [fluid.default_main_program().global_block().vars[varname] + for varname in fetch_varnames] + dirname = envs.get_global_env( + "save.inference.dirname", None, namespace) assert dirname is not None dirname = os.path.join(dirname, str(epoch_id)) if is_fleet: - fleet.save_inference_model(self._exe, dirname, feed_varnames, fetch_vars) + fleet.save_inference_model( + self._exe, dirname, feed_varnames, fetch_vars) else: fluid.io.save_inference_model( dirname, feed_varnames, fetch_vars, self._exe) diff --git a/models/rank/dnn/config.yaml b/models/rank/dnn/config.yaml index ba2a5ac307c98193923391b22bb48d3a0aabc00f..27eb639190cf98ffe275d0dd49514346ceae11b0 100755 --- a/models/rank/dnn/config.yaml +++ b/models/rank/dnn/config.yaml @@ -24,6 +24,7 @@ train: batch_size: 2 class: "{workspace}/../criteo_reader.py" train_data_path: "{workspace}/data/train" + debug_mode: False model: models: "{workspace}/model.py" diff --git a/readme.md b/readme.md index a2cdf9dcfd069e5dd8d6c5f05c466fcc7ed73cc5..3f592b404b6106489ca5bc9fff036944afd784b1 100644 --- a/readme.md +++ b/readme.md @@ -21,10 +21,10 @@

-- PaddleRec是源于飞桨生态的搜索推荐模型一站式开箱即用工具,无论您是初学者,开发者,研究者均可便捷的使用PaddleRec完成调研,训练到预测部署的全流程工作。 -- PaddleRec提供了搜索推荐任务中语义理解、召回、粗排、精排、多任务学习的全流程解决方案,包含的算法模型均在百度各个业务的实际场景中得到了验证。 -- PaddleRec将各个模型及其训练预测流程规范化整理,进行易用性封装,用户只需自定义yaml文件即可快速上手使用。 -- PaddleRec以飞桨深度学习框架为核心,融合了大规模分布式训练框架Fleet,以及一键式推理部署框架PaddleServing,支持推荐搜索算法的工业化应用。 +- 源于飞桨生态的`搜索推荐模型`**一站式开箱即用工具** +- 适合初学者,开发者,研究者的调研,训练到预测部署的全流程解决方案 +- 包含语义理解、召回、粗排、精排、多任务学习、融合等多个任务的推荐搜索算法库 +- 自定义`yaml`即可快速上手使用单机训练、大规模分布式训练、离线预测、在线部署

PadlleRec概览

@@ -37,8 +37,8 @@

便捷安装

### 环境要求 -* Python >= 2.7 -* PaddlePaddle >= 1.7.2 +* Python 2.7/ 3.5 / 3.6 / 3.7 +* PaddlePaddle >= 1.7.2 * 操作系统: Windows/Mac/Linux ### 安装命令 @@ -101,24 +101,24 @@ python -m fleetrec.run -m fleetrec.models.rank.dnn -d cpu -e cluster > 部分表格占位待改(大规模稀疏) -| 方向 | 模型 | 单机CPU训练 | 单机GPU训练 | 分布式CPU训练 | 大规模稀疏 | 分布式GPU训练 | 自定义数据集 | -| :------: | :----------------------------------------------------------------------------: | :---------: | :---------: | :-----------: | :--------: | :-----------: | :----------: | -| 内容理解 | [Text-Classifcation](models/contentunderstanding/text_classification/model.py) | ✓ | x | ✓ | x | ✓ | ✓ | -| 内容理解 | [TagSpace](models/contentunderstanding/tagspace/model.py) | ✓ | x | ✓ | x | ✓ | ✓ | -| 召回 | [Word2Vec](models/recall/word2vec/model.py) | ✓ | x | ✓ | x | ✓ | ✓ | -| 召回 | [TDM](models/recall/tdm/model.py) | ✓ | x | ✓ | x | ✓ | ✓ | -| 召回 | [SSR](models/recall/ssr/model.py) | ✓ | ✓ | ✓ | x | ✓ | ✓ | -| 召回 | [Gru4Rec](models/recall/gru4rec/model.py) | ✓ | ✓ | ✓ | x | ✓ | ✓ | -| 排序 | [CTR-Dnn](models/rank/dnn/model.py) | ✓ | x | ✓ | x | ✓ | ✓ | -| 排序 | [DeepFm](models/rank/deepfm/model.py) | ✓ | x | ✓ | x | ✓ | ✓ | -| 排序 | [xDeepFm](models/rank/xdeepfm/model.py) | ✓ | x | ✓ | x | ✓ | ✓ | -| 排序 | [DIN](models/rank/din/model.py) | ✓ | x | ✓ | x | ✓ | ✓ | -| 排序 | [Wide&Deep](models/rank/wide_deep/model.py) | ✓ | x | ✓ | x | ✓ | ✓ | -| 多任务 | [ESMM](models/multitask/essm/model.py) | ✓ | ✓ | ✓ | x | ✓ | ✓ | -| 多任务 | [MMOE](models/multitask/mmoe/model.py) | ✓ | ✓ | ✓ | x | ✓ | ✓ | -| 排序 | [ShareBottom](models/multitask/share-bottom/model.py) | ✓ | ✓ | ✓ | x | ✓ | ✓ | -| 匹配 | [DSSM](models/match/dssm/model.py) | ✓ | x | ✓ | x | ✓ | ✓ | -| 匹配 | [Simnet](models/match/multiview-simnet/model.py) | ✓ | x | ✓ | x | ✓ | ✓ | +| 方向 | 模型 | 单机CPU训练 | 单机GPU训练 | 分布式CPU训练 | 分布式GPU训练 | +| :------: | :----------------------------------------------------------------------------: | :---------: | :---------: | :-----------: | :-----------: | +| 内容理解 | [Text-Classifcation](models/contentunderstanding/text_classification/model.py) | ✓ | x | ✓ | x | +| 内容理解 | [TagSpace](models/contentunderstanding/tagspace/model.py) | ✓ | x | ✓ | x | +| 召回 | [Word2Vec](models/recall/word2vec/model.py) | ✓ | x | ✓ | x | +| 召回 | [TDM](models/recall/tdm/model.py) | ✓ | x | ✓ | x | +| 召回 | [SSR](models/recall/ssr/model.py) | ✓ | ✓ | ✓ | x | +| 召回 | [Gru4Rec](models/recall/gru4rec/model.py) | ✓ | ✓ | ✓ | x | +| 排序 | [CTR-Dnn](models/rank/dnn/model.py) | ✓ | x | ✓ | x | +| 排序 | [DeepFm](models/rank/deepfm/model.py) | ✓ | x | ✓ | x | +| 排序 | [xDeepFm](models/rank/xdeepfm/model.py) | ✓ | x | ✓ | x | +| 排序 | [DIN](models/rank/din/model.py) | ✓ | x | ✓ | x | +| 排序 | [Wide&Deep](models/rank/wide_deep/model.py) | ✓ | x | ✓ | x | +| 多任务 | [ESMM](models/multitask/essm/model.py) | ✓ | ✓ | ✓ | x | +| 多任务 | [MMOE](models/multitask/mmoe/model.py) | ✓ | ✓ | ✓ | x | +| 排序 | [ShareBottom](models/multitask/share-bottom/model.py) | ✓ | ✓ | ✓ | x | +| 匹配 | [DSSM](models/match/dssm/model.py) | ✓ | x | ✓ | x | +| 匹配 | [Simnet](models/match/multiview-simnet/model.py) | ✓ | x | ✓ | x | diff --git a/run.py b/run.py index 5552682b9733520f52a3c910af1c2f85b6780266..22d16a2a40710f677d91296c21a1a71dbbd8ae6a 100755 --- a/run.py +++ b/run.py @@ -152,7 +152,8 @@ def cluster_engine(args): cluster_envs["train.trainer.engine"] = "cluster" cluster_envs["train.trainer.device"] = args.device cluster_envs["train.trainer.platform"] = envs.get_platform() - print("launch {} engine with cluster to with model: {}".format(trainer, args.model)) + print("launch {} engine with cluster to with model: {}".format( + trainer, args.model)) set_runtime_envs(cluster_envs, args.model) trainer = TrainerFactory.create(args.model) @@ -245,9 +246,11 @@ if __name__ == "__main__": choices=["single", "local_cluster", "cluster", "tdm_single", "tdm_local_cluster", "tdm_cluster"]) - parser.add_argument("-d", "--device", type=str, choices=["cpu", "gpu"], default="cpu") + parser.add_argument("-d", "--device", type=str, + choices=["cpu", "gpu"], default="cpu") parser.add_argument("-b", "--backend", type=str, default=None) - parser.add_argument("-r", "--role", type=str, choices=["master", "worker"], default="master") + parser.add_argument("-r", "--role", type=str, + choices=["master", "worker"], default="master") abs_dir = os.path.dirname(os.path.abspath(__file__)) envs.set_runtime_environs({"PACKAGE_BASE": abs_dir})