From 7d68c021972ebb798df11f85a44bd5fd4901fdfd Mon Sep 17 00:00:00 2001 From: malin10 Date: Thu, 28 May 2020 13:45:45 +0800 Subject: [PATCH] fix dssm --- models/match/dssm/config.yaml | 89 ++++++++++++++++----------- models/match/dssm/model.py | 112 ++++++++++------------------------ 2 files changed, 84 insertions(+), 117 deletions(-) diff --git a/models/match/dssm/config.yaml b/models/match/dssm/config.yaml index 22881bdf..f8145d8f 100755 --- a/models/match/dssm/config.yaml +++ b/models/match/dssm/config.yaml @@ -11,44 +11,61 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -evaluate: - reader: - batch_size: 1 - class: "{workspace}/synthetic_evaluate_reader.py" - test_data_path: "{workspace}/data/train" - -train: - trainer: - # for cluster training - strategy: "async" - epochs: 4 - workspace: "paddlerec.models.match.dssm" +# 轮数 +epochs: 4 +# 设备 +device: cpu +# 工作目录 +workspace: "paddlerec.models.match.dssm" - reader: - batch_size: 4 - class: "{workspace}/synthetic_reader.py" - train_data_path: "{workspace}/data/train" +# dataset列表 +dataset: +- name: dataset_train # 名字,用来区分不同的dataset + batch_size: 4 + type: QueueDataset + data_path: "{workspace}/data/train" # 数据路径 + data_converter: "{workspace}/synthetic_reader.py" +#- name: dataset_infer # 名字,用来区分不同的dataset +# batch_size: 1 +# type: QueueDataset +# data_path: "{workspace}/data/train" # 数据路径 +# data_converter: "{workspace}/synthetic_evaluate_reader.py" - model: - models: "{workspace}/model.py" - hyper_parameters: - TRIGRAM_D: 1000 - NEG: 4 - fc_sizes: [300, 300, 128] - fc_acts: ['tanh', 'tanh', 'tanh'] - learning_rate: 0.01 - optimizer: sgd +# 超参数 +hyper_parameters: + #优化器 + optimizer: + class: sgd + learning_rate: 0.01 + strategy: async + # 用户自定义 + TRIGRAM_D: 1000 + NEG: 4 + fc_sizes: [300, 300, 128] + fc_acts: ['tanh', 'tanh', 'tanh'] - save: - increment: - dirname: "increment" - epoch_interval: 2 - save_last: True +# executor配置 +epoch: + name: + trainer_class: single + save_checkpoint_interval: 2 # 保存模型 + save_inference_interval: 4 # 保存预测模型 + save_checkpoint_path: "increment" # 保存模型路径 + save_inference_path: "inference" # 保存预测模型路径 + save_inference_feed_varnames: ["query", "doc_pos"] # 预测模型feed vars + save_inference_fetch_varnames: ["cos_sim_0.tmp_0"] # 预测模型 fetch vars + #init_model_path: "xxxx" # 加载模型 - inference: - dirname: "inference" - epoch_interval: 4 - feed_varnames: ["query", "doc_pos"] - fetch_varnames: ["cos_sim_0.tmp_0"] - save_last: True +# 执行器,每轮要跑的所有模型 +executor: + - name: train + model: "{workspace}/model.py" # 模型路径 + dataset_name: dataset_train # 名字,用来区分不同的阶段 + thread_num: 1 # 线程数 + is_infer: False # 是否是infer +# - name: infer +# model: "{workspace}/model.py" # 模型路径 +# dataset_name: dataset_infer # 名字,用来区分不同的阶段 +# thread_num: 1 # 线程数 +# is_infer: True # 是否是infer diff --git a/models/match/dssm/model.py b/models/match/dssm/model.py index 05d6f762..fed0d692 100755 --- a/models/match/dssm/model.py +++ b/models/match/dssm/model.py @@ -22,45 +22,35 @@ class Model(ModelBase): def __init__(self, config): ModelBase.__init__(self, config) - def input(self): - TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None, - self._namespace) - - Neg = envs.get_global_env("hyper_parameters.NEG", None, - self._namespace) - - self.query = fluid.data( - name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0) - self.doc_pos = fluid.data( + def _init_hyper_parameters(self): + self.TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D") + self.Neg = envs.get_global_env("hyper_parameters.NEG") + self.hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes") + self.hidden_acts = envs.get_global_env("hyper_parameters.fc_acts") + self.learning_rate = envs.get_global_env("hyper_parameters.learning_rate") + + def input_data(self, is_infer=False, **kwargs): + query = fluid.data( + name="query", shape=[-1, self.TRIGRAM_D], dtype='float32', lod_level=0) + doc_pos = fluid.data( name="doc_pos", - shape=[-1, TRIGRAM_D], + shape=[-1, self.TRIGRAM_D], dtype='float32', lod_level=0) - self.doc_negs = [ + + if is_infer: + return [query, doc_pos] + + doc_negs = [ fluid.data( name="doc_neg_" + str(i), - shape=[-1, TRIGRAM_D], + shape=[-1, self.TRIGRAM_D], dtype="float32", - lod_level=0) for i in range(Neg) + lod_level=0) for i in range(self.Neg) ] - self._data_var.append(self.query) - self._data_var.append(self.doc_pos) - for input in self.doc_negs: - self._data_var.append(input) - - if self._platform != "LINUX": - self._data_loader = fluid.io.DataLoader.from_generator( - feed_list=self._data_var, - capacity=64, - use_double_buffer=False, - iterable=False) - - def net(self, is_infer=False): - hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes", None, - self._namespace) - hidden_acts = envs.get_global_env("hyper_parameters.fc_acts", None, - self._namespace) + return [query, doc_pos] + doc_negs + def net(self, inputs, is_infer=False): def fc(data, hidden_layers, hidden_acts, names): fc_inputs = [data] for i in range(len(hidden_layers)): @@ -77,71 +67,31 @@ class Model(ModelBase): fc_inputs.append(out) return fc_inputs[-1] - query_fc = fc(self.query, hidden_layers, hidden_acts, + query_fc = fc(inputs[0], self.hidden_layers, self.hidden_acts, ['query_l1', 'query_l2', 'query_l3']) - doc_pos_fc = fc(self.doc_pos, hidden_layers, hidden_acts, + doc_pos_fc = fc(inputs[1], self.hidden_layers, self.hidden_acts, ['doc_pos_l1', 'doc_pos_l2', 'doc_pos_l3']) - self.R_Q_D_p = fluid.layers.cos_sim(query_fc, doc_pos_fc) + R_Q_D_p = fluid.layers.cos_sim(query_fc, doc_pos_fc) if is_infer: + self._infer_results["query_doc_sim"] = R_Q_D_p return R_Q_D_ns = [] - for i, doc_neg in enumerate(self.doc_negs): - doc_neg_fc_i = fc(doc_neg, hidden_layers, hidden_acts, [ + for i in range(len(inputs)-2): + doc_neg_fc_i = fc(inputs[i+2], self.hidden_layers, self.hidden_acts, [ 'doc_neg_l1_' + str(i), 'doc_neg_l2_' + str(i), 'doc_neg_l3_' + str(i) ]) R_Q_D_ns.append(fluid.layers.cos_sim(query_fc, doc_neg_fc_i)) concat_Rs = fluid.layers.concat( - input=[self.R_Q_D_p] + R_Q_D_ns, axis=-1) + input=[R_Q_D_p] + R_Q_D_ns, axis=-1) prob = fluid.layers.softmax(concat_Rs, axis=1) hit_prob = fluid.layers.slice( prob, axes=[0, 1], starts=[0, 0], ends=[4, 1]) loss = -fluid.layers.reduce_sum(fluid.layers.log(hit_prob)) - self.avg_cost = fluid.layers.mean(x=loss) - - def infer_results(self): - self._infer_results['query_doc_sim'] = self.R_Q_D_p - - def avg_loss(self): - self._cost = self.avg_cost - - def metrics(self): - self._metrics["LOSS"] = self.avg_cost - - def train_net(self): - self.input() - self.net(is_infer=False) - self.avg_loss() - self.metrics() - - def optimizer(self): - learning_rate = envs.get_global_env("hyper_parameters.learning_rate", - None, self._namespace) - optimizer = fluid.optimizer.SGD(learning_rate) - return optimizer - - def infer_input(self): - TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None, - self._namespace) - self.query = fluid.data( - name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0) - self.doc_pos = fluid.data( - name="doc_pos", - shape=[-1, TRIGRAM_D], - dtype='float32', - lod_level=0) - self._infer_data_var = [self.query, self.doc_pos] - - self._infer_data_loader = fluid.io.DataLoader.from_generator( - feed_list=self._infer_data_var, - capacity=64, - use_double_buffer=False, - iterable=False) + avg_cost = fluid.layers.mean(x=loss) + self._cost = avg_cost + self._metrics["LOSS"] = avg_cost - def infer_net(self): - self.infer_input() - self.net(is_infer=True) - self.infer_results() -- GitLab