From 05b60ccc58f9df6adbd17d545cd7d51f331dda54 Mon Sep 17 00:00:00 2001 From: xujiaqi01 Date: Sat, 9 May 2020 19:14:35 +0800 Subject: [PATCH] add text_classification --- models/rank/text_classification/config.yaml | 40 ++++++++++++++ models/rank/text_classification/model.py | 60 +++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 models/rank/text_classification/config.yaml create mode 100644 models/rank/text_classification/model.py diff --git a/models/rank/text_classification/config.yaml b/models/rank/text_classification/config.yaml new file mode 100644 index 00000000..2104a613 --- /dev/null +++ b/models/rank/text_classification/config.yaml @@ -0,0 +1,40 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +train: + trainer: + # for cluster training + strategy: "async" + + epochs: 10 + workspace: "fleetrec.models.rank.text_classification" + + reader: + batch_size: 5 + class: "{workspace}/reader.py" + train_data_path: "{workspace}/train_data" + + model: + models: "{workspace}/model.py" + + save: + increment: + dirname: "increment" + epoch_interval: 1 + save_last: True + inference: + dirname: "inference" + epoch_interval: 100 + save_last: True + diff --git a/models/rank/text_classification/model.py b/models/rank/text_classification/model.py new file mode 100644 index 00000000..0218ab8c --- /dev/null +++ b/models/rank/text_classification/model.py @@ -0,0 +1,60 @@ +import paddle.fluid as fluid +import math + +from fleetrec.core.utils import envs +from fleetrec.core.model import Model as ModelBase + +import paddle.fluid as fluid +import paddle.fluid.layers.nn as nn +import paddle.fluid.layers.tensor as tensor +import paddle.fluid.layers.control_flow as cf + +class Model(ModelBase): + def __init__(self, config): + ModelBase.__init__(self, config) + + def train_net(self): + """ network definition """ + + data = fluid.data(name="input", shape=[None, max_len], dtype='int64') + label = fluid.data(name="label", shape=[None, 1], dtype='int64') + seq_len = fluid.data(name="seq_len", shape=[None], dtype='int64') + # embedding layer + emb = fluid.embedding(input=data, size=[dict_dim, emb_dim]) + emb = fluid.layers.sequence_unpad(emb, length=seq_len) + # convolution layer + conv = fluid.nets.sequence_conv_pool( + input=emb, + num_filters=cnn_dim, + filter_size=cnn_filter_size, + act="tanh", + pool_type="max") + + # full connect layer + fc_1 = fluid.layers.fc(input=[conv], size=hid_dim) + # softmax layer + prediction = fluid.layers.fc(input=[fc_1], size=class_dim, act="softmax") + #if is_prediction: + # return prediction + cost = fluid.layers.cross_entropy(input=prediction, label=label) + avg_cost = fluid.layers.mean(x=cost) + acc = fluid.layers.accuracy(input=prediction, label=label) + + self.cost = avg_cost + self.metrics["acc"] = cos_pos + + def get_cost_op(self): + return self.cost + + def get_metrics(self): + return self.metrics + + def optimizer(self): + learning_rate = 0.01#envs.get_global_env("hyper_parameters.base_lr", None, self._namespace) + sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=learning_rate) + #sgd_optimizer.minimize(avg_cost) + return sgd_optimizer + + + def infer_net(self, parameter_list): + self.train_net() -- GitLab