提交 7d68c021 编写于 作者: M malin10

fix dssm

上级 eabfd85d
...@@ -11,44 +11,61 @@ ...@@ -11,44 +11,61 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
evaluate:
reader:
batch_size: 1
class: "{workspace}/synthetic_evaluate_reader.py"
test_data_path: "{workspace}/data/train"
train:
trainer:
# for cluster training
strategy: "async"
epochs: 4 # 轮数
workspace: "paddlerec.models.match.dssm" epochs: 4
# 设备
device: cpu
# 工作目录
workspace: "paddlerec.models.match.dssm"
reader: # dataset列表
batch_size: 4 dataset:
class: "{workspace}/synthetic_reader.py" - name: dataset_train # 名字,用来区分不同的dataset
train_data_path: "{workspace}/data/train" batch_size: 4
type: QueueDataset
data_path: "{workspace}/data/train" # 数据路径
data_converter: "{workspace}/synthetic_reader.py"
#- name: dataset_infer # 名字,用来区分不同的dataset
# batch_size: 1
# type: QueueDataset
# data_path: "{workspace}/data/train" # 数据路径
# data_converter: "{workspace}/synthetic_evaluate_reader.py"
model: # 超参数
models: "{workspace}/model.py" hyper_parameters:
hyper_parameters: #优化器
TRIGRAM_D: 1000 optimizer:
NEG: 4 class: sgd
fc_sizes: [300, 300, 128] learning_rate: 0.01
fc_acts: ['tanh', 'tanh', 'tanh'] strategy: async
learning_rate: 0.01 # 用户自定义
optimizer: sgd TRIGRAM_D: 1000
NEG: 4
fc_sizes: [300, 300, 128]
fc_acts: ['tanh', 'tanh', 'tanh']
save: # executor配置
increment: epoch:
dirname: "increment" name:
epoch_interval: 2 trainer_class: single
save_last: True save_checkpoint_interval: 2 # 保存模型
save_inference_interval: 4 # 保存预测模型
save_checkpoint_path: "increment" # 保存模型路径
save_inference_path: "inference" # 保存预测模型路径
save_inference_feed_varnames: ["query", "doc_pos"] # 预测模型feed vars
save_inference_fetch_varnames: ["cos_sim_0.tmp_0"] # 预测模型 fetch vars
#init_model_path: "xxxx" # 加载模型
inference: # 执行器,每轮要跑的所有模型
dirname: "inference" executor:
epoch_interval: 4 - name: train
feed_varnames: ["query", "doc_pos"] model: "{workspace}/model.py" # 模型路径
fetch_varnames: ["cos_sim_0.tmp_0"] dataset_name: dataset_train # 名字,用来区分不同的阶段
save_last: True thread_num: 1 # 线程数
is_infer: False # 是否是infer
# - name: infer
# model: "{workspace}/model.py" # 模型路径
# dataset_name: dataset_infer # 名字,用来区分不同的阶段
# thread_num: 1 # 线程数
# is_infer: True # 是否是infer
...@@ -22,45 +22,35 @@ class Model(ModelBase): ...@@ -22,45 +22,35 @@ class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
def input(self): def _init_hyper_parameters(self):
TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None, self.TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D")
self._namespace) self.Neg = envs.get_global_env("hyper_parameters.NEG")
self.hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes")
Neg = envs.get_global_env("hyper_parameters.NEG", None, self.hidden_acts = envs.get_global_env("hyper_parameters.fc_acts")
self._namespace) self.learning_rate = envs.get_global_env("hyper_parameters.learning_rate")
self.query = fluid.data( def input_data(self, is_infer=False, **kwargs):
name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0) query = fluid.data(
self.doc_pos = fluid.data( name="query", shape=[-1, self.TRIGRAM_D], dtype='float32', lod_level=0)
doc_pos = fluid.data(
name="doc_pos", name="doc_pos",
shape=[-1, TRIGRAM_D], shape=[-1, self.TRIGRAM_D],
dtype='float32', dtype='float32',
lod_level=0) lod_level=0)
self.doc_negs = [
if is_infer:
return [query, doc_pos]
doc_negs = [
fluid.data( fluid.data(
name="doc_neg_" + str(i), name="doc_neg_" + str(i),
shape=[-1, TRIGRAM_D], shape=[-1, self.TRIGRAM_D],
dtype="float32", dtype="float32",
lod_level=0) for i in range(Neg) lod_level=0) for i in range(self.Neg)
] ]
self._data_var.append(self.query) return [query, doc_pos] + doc_negs
self._data_var.append(self.doc_pos)
for input in self.doc_negs:
self._data_var.append(input)
if self._platform != "LINUX":
self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var,
capacity=64,
use_double_buffer=False,
iterable=False)
def net(self, is_infer=False):
hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes", None,
self._namespace)
hidden_acts = envs.get_global_env("hyper_parameters.fc_acts", None,
self._namespace)
def net(self, inputs, is_infer=False):
def fc(data, hidden_layers, hidden_acts, names): def fc(data, hidden_layers, hidden_acts, names):
fc_inputs = [data] fc_inputs = [data]
for i in range(len(hidden_layers)): for i in range(len(hidden_layers)):
...@@ -77,71 +67,31 @@ class Model(ModelBase): ...@@ -77,71 +67,31 @@ class Model(ModelBase):
fc_inputs.append(out) fc_inputs.append(out)
return fc_inputs[-1] return fc_inputs[-1]
query_fc = fc(self.query, hidden_layers, hidden_acts, query_fc = fc(inputs[0], self.hidden_layers, self.hidden_acts,
['query_l1', 'query_l2', 'query_l3']) ['query_l1', 'query_l2', 'query_l3'])
doc_pos_fc = fc(self.doc_pos, hidden_layers, hidden_acts, doc_pos_fc = fc(inputs[1], self.hidden_layers, self.hidden_acts,
['doc_pos_l1', 'doc_pos_l2', 'doc_pos_l3']) ['doc_pos_l1', 'doc_pos_l2', 'doc_pos_l3'])
self.R_Q_D_p = fluid.layers.cos_sim(query_fc, doc_pos_fc) R_Q_D_p = fluid.layers.cos_sim(query_fc, doc_pos_fc)
if is_infer: if is_infer:
self._infer_results["query_doc_sim"] = R_Q_D_p
return return
R_Q_D_ns = [] R_Q_D_ns = []
for i, doc_neg in enumerate(self.doc_negs): for i in range(len(inputs)-2):
doc_neg_fc_i = fc(doc_neg, hidden_layers, hidden_acts, [ doc_neg_fc_i = fc(inputs[i+2], self.hidden_layers, self.hidden_acts, [
'doc_neg_l1_' + str(i), 'doc_neg_l2_' + str(i), 'doc_neg_l1_' + str(i), 'doc_neg_l2_' + str(i),
'doc_neg_l3_' + str(i) 'doc_neg_l3_' + str(i)
]) ])
R_Q_D_ns.append(fluid.layers.cos_sim(query_fc, doc_neg_fc_i)) R_Q_D_ns.append(fluid.layers.cos_sim(query_fc, doc_neg_fc_i))
concat_Rs = fluid.layers.concat( concat_Rs = fluid.layers.concat(
input=[self.R_Q_D_p] + R_Q_D_ns, axis=-1) input=[R_Q_D_p] + R_Q_D_ns, axis=-1)
prob = fluid.layers.softmax(concat_Rs, axis=1) prob = fluid.layers.softmax(concat_Rs, axis=1)
hit_prob = fluid.layers.slice( hit_prob = fluid.layers.slice(
prob, axes=[0, 1], starts=[0, 0], ends=[4, 1]) prob, axes=[0, 1], starts=[0, 0], ends=[4, 1])
loss = -fluid.layers.reduce_sum(fluid.layers.log(hit_prob)) loss = -fluid.layers.reduce_sum(fluid.layers.log(hit_prob))
self.avg_cost = fluid.layers.mean(x=loss) avg_cost = fluid.layers.mean(x=loss)
self._cost = avg_cost
def infer_results(self): self._metrics["LOSS"] = avg_cost
self._infer_results['query_doc_sim'] = self.R_Q_D_p
def avg_loss(self):
self._cost = self.avg_cost
def metrics(self):
self._metrics["LOSS"] = self.avg_cost
def train_net(self):
self.input()
self.net(is_infer=False)
self.avg_loss()
self.metrics()
def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.learning_rate",
None, self._namespace)
optimizer = fluid.optimizer.SGD(learning_rate)
return optimizer
def infer_input(self):
TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None,
self._namespace)
self.query = fluid.data(
name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_pos = fluid.data(
name="doc_pos",
shape=[-1, TRIGRAM_D],
dtype='float32',
lod_level=0)
self._infer_data_var = [self.query, self.doc_pos]
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var,
capacity=64,
use_double_buffer=False,
iterable=False)
def infer_net(self):
self.infer_input()
self.net(is_infer=True)
self.infer_results()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册