From ded2280772c0eca26ba81a147ab3af54d4d5f752 Mon Sep 17 00:00:00 2001 From: tangwei Date: Mon, 18 May 2020 10:46:38 +0800 Subject: [PATCH] rebase user --- .../classification/config.yaml | 2 +- .../classification/model.py | 22 +++++++++++-------- models/contentunderstanding/readme.md | 11 +++++++++- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/models/contentunderstanding/classification/config.yaml b/models/contentunderstanding/classification/config.yaml index 9bdfffe1..4cf4d1bb 100644 --- a/models/contentunderstanding/classification/config.yaml +++ b/models/contentunderstanding/classification/config.yaml @@ -18,7 +18,7 @@ train: strategy: "async" epochs: 10 - workspace: "paddlerec.models.rank.text_classification" + workspace: "paddlerec.models.contentunderstandin.classification" reader: batch_size: 5 diff --git a/models/contentunderstanding/classification/model.py b/models/contentunderstanding/classification/model.py index 833e9445..6254199c 100644 --- a/models/contentunderstanding/classification/model.py +++ b/models/contentunderstanding/classification/model.py @@ -26,29 +26,35 @@ import paddle.fluid.layers.control_flow as cf class Model(ModelBase): def __init__(self, config): ModelBase.__init__(self, config) + self.dict_dim = 100 + self.max_len = 10 + self.cnn_dim = 32 + self.cnn_filter_size = 128 + self.emb_dim = 8 + self.hid_dim = 128 + self.class_dim = 2 def train_net(self): """ network definition """ - data = fluid.data(name="input", shape=[None, max_len], dtype='int64') + data = fluid.data(name="input", shape=[None, self.max_len], dtype='int64') label = fluid.data(name="label", shape=[None, 1], dtype='int64') seq_len = fluid.data(name="seq_len", shape=[None], dtype='int64') # embedding layer - emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) - emb = fluid.layers.sequence_unpad(emb, length=seq_len) + emb = fluid.embedding(input=data, size=[self.dict_dim, self.emb_dim]) + emb = fluid.layers.sequence_unpad(emb, length=self.seq_len) # convolution layer conv = fluid.nets.sequence_conv_pool( input=emb, - num_filters=cnn_dim, - filter_size=cnn_filter_size, + num_filters=self.cnn_dim, + filter_size=self.cnn_filter_size, act="tanh", pool_type="max") # full connect layer fc_1 = fluid.layers.fc(input=[conv], size=hid_dim) # softmax layer - prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax") - + prediction = fluid.layers.fc(input=[fc_1], size=self.class_dim, act="softmax") cost = fluid.layers.cross_entropy(input=prediction, label=label) avg_cost = fluid.layers.mean(x=cost) acc = fluid.layers.accuracy(input=prediction, label=label) @@ -65,9 +71,7 @@ class Model(ModelBase): def optimizer(self): learning_rate = 0.01 sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=learning_rate) - return sgd_optimizer - def infer_net(self, parameter_list): self.train_net() diff --git a/models/contentunderstanding/readme.md b/models/contentunderstanding/readme.md index ef3fd883..417fdde3 100644 --- a/models/contentunderstanding/readme.md +++ b/models/contentunderstanding/readme.md @@ -69,14 +69,23 @@ python text2paddle.py raw_big_train_data/ raw_big_test_data/ train_big_data test 无 ### 训练 + +``` +python -m paddlerec.run -m paddlerec.models.rank.dnn -d cpu -e single +``` + ### 预测 +``` +python -m paddlerec.run -m paddlerec.models.rank.dnn -d cpu -e single +``` + ## 效果对比 ### 模型效果 (测试) | 数据集 | 模型 | loss | auc | acc | mae | | :------------------: | :--------------------: | :---------: |:---------: | :---------: |:---------: | -| -- | TagSpace | -- | -- | -- | -- | +| ag news dataset | TagSpace | -- | -- | -- | -- | | -- | Classification | -- | -- | -- | -- | -- GitLab