diff --git a/models/contentunderstanding/classification/config.yaml b/models/contentunderstanding/classification/config.yaml index 4cf4d1bb7dae64865e2a2738e80672ac23934702..d1748137f0c4d994b3a566debf43dbdc2c3d66dc 100644 --- a/models/contentunderstanding/classification/config.yaml +++ b/models/contentunderstanding/classification/config.yaml @@ -18,7 +18,7 @@ train: strategy: "async" epochs: 10 - workspace: "paddlerec.models.contentunderstandin.classification" + workspace: "paddlerec.models.contentunderstanding.classification" reader: batch_size: 5 diff --git a/models/contentunderstanding/classification/model.py b/models/contentunderstanding/classification/model.py index 6254199c0fd4ceec48ba7f7d8bee3382d967fd02..e4630820c868af8334fc8edfd2b6c1f4d9e77503 100644 --- a/models/contentunderstanding/classification/model.py +++ b/models/contentunderstanding/classification/model.py @@ -40,9 +40,12 @@ class Model(ModelBase): data = fluid.data(name="input", shape=[None, self.max_len], dtype='int64') label = fluid.data(name="label", shape=[None, 1], dtype='int64') seq_len = fluid.data(name="seq_len", shape=[None], dtype='int64') + + self._data_var = [data, label, seq_len] + # embedding layer emb = fluid.embedding(input=data, size=[self.dict_dim, self.emb_dim]) - emb = fluid.layers.sequence_unpad(emb, length=self.seq_len) + emb = fluid.layers.sequence_unpad(emb, length=seq_len) # convolution layer conv = fluid.nets.sequence_conv_pool( input=emb, @@ -52,7 +55,7 @@ class Model(ModelBase): pool_type="max") # full connect layer - fc_1 = fluid.layers.fc(input=[conv], size=hid_dim) + fc_1 = fluid.layers.fc(input=[conv], size=self.hid_dim) # softmax layer prediction = fluid.layers.fc(input=[fc_1], size=self.class_dim, act="softmax") cost = fluid.layers.cross_entropy(input=prediction, label=label) @@ -60,18 +63,18 @@ class Model(ModelBase): acc = fluid.layers.accuracy(input=prediction, label=label) self.cost = avg_cost - self.metrics["acc"] = cos_pos + self._metrics["acc"] = acc def get_cost_op(self): return self.cost def get_metrics(self): - return self.metrics + return self._metrics def optimizer(self): learning_rate = 0.01 sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=learning_rate) return sgd_optimizer - def infer_net(self, parameter_list): + def infer_net(self): self.train_net() diff --git a/models/contentunderstanding/classification/reader.py b/models/contentunderstanding/classification/reader.py index 9a93211ed6997c342c412b7c2a043f89838332da..f90097d702df461d226443c32570e46ea3a0b093 100644 --- a/models/contentunderstanding/classification/reader.py +++ b/models/contentunderstanding/classification/reader.py @@ -44,5 +44,9 @@ class TrainReader(Reader): if data is None: yield None return + data = [int(i) for i in data] + label = [int(i) for i in label] + seq_len = [int(i) for i in seq_len] + print >>sys.stderr, str([('data', data), ('label', label), ('seq_len', seq_len)]) yield [('data', data), ('label', label), ('seq_len', seq_len)] return data_iter diff --git a/models/contentunderstanding/readme.md b/models/contentunderstanding/readme.md index 417fdde3a8c31a184db7d7dd0f372f769c9615c1..06be7106b287149d24b56773689dad08708a064f 100644 --- a/models/contentunderstanding/readme.md +++ b/models/contentunderstanding/readme.md @@ -71,13 +71,13 @@ python text2paddle.py raw_big_train_data/ raw_big_test_data/ train_big_data test ### 训练 ``` -python -m paddlerec.run -m paddlerec.models.rank.dnn -d cpu -e single +python -m paddlerec.run -m paddlerec.models.contentunderstanding.classification -d cpu -e single ``` ### 预测 ``` -python -m paddlerec.run -m paddlerec.models.rank.dnn -d cpu -e single +python -m paddlerec.run -m paddlerec.models.contentunderstanding.classification -d cpu -e single ``` ## 效果对比 diff --git a/models/contentunderstanding/tagspace/config.yaml b/models/contentunderstanding/tagspace/config.yaml index e0232f3faf221be04a6f95a6b0d011d612d0e35c..70333fcbf7edf4b6b5f54145e29cb122ed3ae9c6 100644 --- a/models/contentunderstanding/tagspace/config.yaml +++ b/models/contentunderstanding/tagspace/config.yaml @@ -18,7 +18,7 @@ train: strategy: "async" epochs: 10 - workspace: "paddlerec.models.rank.tagspace" + workspace: "paddlerec.models.contentunderstanding.tagspace" reader: batch_size: 5 diff --git a/readme.md b/readme.md index 3dfbf8d3904d47092052b5d358dd5aeb4ce67b8a..f90176c5de9e90207209ed30a8b9cc53e4a50c4c 100644 --- a/readme.md +++ b/readme.md @@ -1,2 +1,171 @@ -# PaddleRec -推荐算法,大规模并行训练支持 +
+
+
+ +
+
+
+
+
+
+
+ + +
+
+
+ +- 源于飞桨生态的搜索推荐模型**一站式开箱即用工具** +- 适合初学者,开发者,研究者从调研,训练到预测部署的全流程解决方案 +- 包含语义理解、召回、粗排、精排、多任务学习、融合等多个任务的推荐搜索算法库 +- 配置**yaml**自定义选项,即可快速上手使用单机训练、大规模分布式训练、离线预测、在线部署 + + +
+
+
+ + +
+
+
+ +