提交 64dbc133 编写于 作者: T tangwei

fix code style

上级 f9ef376e
......@@ -28,4 +28,3 @@ notifications:
email:
on_success: change
on_failure: always
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
......@@ -11,9 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import sys
import abc
import os
......@@ -64,7 +64,11 @@ class SlotReader(dg.MultiSlotDataGenerator):
from operator import mul
self.sparse_slots = sparse_slots.strip().split(" ")
self.dense_slots = dense_slots.strip().split(" ")
self.dense_slots_shape = [reduce(mul, [int(j) for j in i.split(":")[1].strip("[]").split(",")]) for i in self.dense_slots]
self.dense_slots_shape = [
reduce(mul,
[int(j) for j in i.split(":")[1].strip("[]").split(",")])
for i in self.dense_slots
]
self.dense_slots = [i.split(":")[0] for i in self.dense_slots]
self.slots = self.dense_slots + self.sparse_slots
self.slot2index = {}
......@@ -93,10 +97,13 @@ class SlotReader(dg.MultiSlotDataGenerator):
slot = i
if not self.visit[slot]:
if i in self.dense_slots:
output[self.slot2index[i]][1].extend([self.padding] * self.dense_slots_shape[self.slot2index[i]])
output[self.slot2index[i]][1].extend(
[self.padding] *
self.dense_slots_shape[self.slot2index[i]])
else:
output[self.slot2index[i]][1].extend([self.padding])
else:
self.visit[slot] = False
yield output
return reader
......@@ -18,7 +18,7 @@ from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient
def is_afs_path(path):
"""R
"""is_afs_path
"""
if path.startswith("afs") or path.startswith("hdfs"):
return True
......@@ -133,8 +133,9 @@ class FileHandler(object):
if mode.find('a') >= 0:
org_content = self._hdfs_client.cat(dest_path)
content = content + org_content
self._local_fs_client.write(content, temp_local_file,
mode) # fleet hdfs_client only support upload, so write tmp file
self._local_fs_client.write(
content, temp_local_file, mode
) # fleet hdfs_client only support upload, so write tmp file
self._hdfs_client.delete(dest_path + ".tmp")
self._hdfs_client.upload(dest_path + ".tmp", temp_local_file)
self._hdfs_client.delete(dest_path + ".bak")
......@@ -158,7 +159,8 @@ class FileHandler(object):
files = []
if is_afs_path(path):
files = self._hdfs_client.ls(path)
files = [path + '/' + self.get_file_name(fi) for fi in files] # absulte path
files = [path + '/' + self.get_file_name(fi)
for fi in files] # absulte path
else:
files = self._local_fs_client.ls(path)
files = [path + '/' + fi for fi in files] # absulte path
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
......@@ -26,8 +26,10 @@ class Model(ModelBase):
ModelBase.__init__(self, config)
self.cost = None
self.metrics = {}
self.vocab_text_size = envs.get_global_env("vocab_text_size", None, self._namespace)
self.vocab_tag_size = envs.get_global_env("vocab_tag_size", None, self._namespace)
self.vocab_text_size = envs.get_global_env("vocab_text_size", None,
self._namespace)
self.vocab_tag_size = envs.get_global_env("vocab_tag_size", None,
self._namespace)
self.emb_dim = envs.get_global_env("emb_dim", None, self._namespace)
self.hid_dim = envs.get_global_env("hid_dim", None, self._namespace)
self.win_size = envs.get_global_env("win_size", None, self._namespace)
......@@ -35,8 +37,9 @@ class Model(ModelBase):
self.neg_size = envs.get_global_env("neg_size", None, self._namespace)
def train_net(self):
""" network definition """
text = fluid.data(name="text", shape=[None, 1], lod_level=1, dtype='int64')
""" network"""
text = fluid.data(
name="text", shape=[None, 1], lod_level=1, dtype='int64')
pos_tag = fluid.data(
name="pos_tag", shape=[None, 1], lod_level=1, dtype='int64')
neg_tag = fluid.data(
......@@ -45,13 +48,19 @@ class Model(ModelBase):
self._data_var = [text, pos_tag, neg_tag]
text_emb = fluid.embedding(
input=text, size=[self.vocab_text_size, self.emb_dim], param_attr="text_emb")
input=text,
size=[self.vocab_text_size, self.emb_dim],
param_attr="text_emb")
text_emb = fluid.layers.squeeze(input=text_emb, axes=[1])
pos_tag_emb = fluid.embedding(
input=pos_tag, size=[self.vocab_tag_size, self.emb_dim], param_attr="tag_emb")
input=pos_tag,
size=[self.vocab_tag_size, self.emb_dim],
param_attr="tag_emb")
pos_tag_emb = fluid.layers.squeeze(input=pos_tag_emb, axes=[1])
neg_tag_emb = fluid.embedding(
input=neg_tag, size=[self.vocab_tag_size, self.emb_dim], param_attr="tag_emb")
input=neg_tag,
size=[self.vocab_tag_size, self.emb_dim],
param_attr="tag_emb")
neg_tag_emb = fluid.layers.squeeze(input=neg_tag_emb, axes=[1])
conv_1d = fluid.nets.sequence_conv_pool(
......@@ -65,7 +74,8 @@ class Model(ModelBase):
size=self.emb_dim,
param_attr="text_hid")
cos_pos = nn.cos_sim(pos_tag_emb, text_hid)
mul_text_hid = fluid.layers.sequence_expand_as(x=text_hid, y=neg_tag_emb)
mul_text_hid = fluid.layers.sequence_expand_as(
x=text_hid, y=neg_tag_emb)
mul_cos_neg = nn.cos_sim(neg_tag_emb, mul_text_hid)
cos_neg_all = fluid.layers.sequence_reshape(
input=mul_cos_neg, new_dim=self.neg_size)
......@@ -74,7 +84,10 @@ class Model(ModelBase):
#calculate hinge loss
loss_part1 = nn.elementwise_sub(
tensor.fill_constant_batch_size_like(
input=cos_pos, shape=[-1, 1], value=self.margin, dtype='float32'),
input=cos_pos,
shape=[-1, 1],
value=self.margin,
dtype='float32'),
cos_pos)
loss_part2 = nn.elementwise_add(loss_part1, cos_neg)
loss_part3 = nn.elementwise_max(
......@@ -96,7 +109,8 @@ class Model(ModelBase):
return self.metrics
def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.base_lr", None, self._namespace)
learning_rate = envs.get_global_env("hyper_parameters.base_lr", None,
self._namespace)
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=learning_rate)
return sgd_optimizer
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import numpy as np
......
......@@ -23,13 +23,26 @@ class Model(ModelBase):
ModelBase.__init__(self, config)
def input(self):
TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None, self._namespace)
Neg = envs.get_global_env("hyper_parameters.NEG", None, self._namespace)
self.query = fluid.data(name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_pos = fluid.data(name="doc_pos", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_negs = [fluid.data(name="doc_neg_" + str(i), shape=[-1, TRIGRAM_D], dtype="float32", lod_level=0) for i
in range(Neg)]
TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None,
self._namespace)
Neg = envs.get_global_env("hyper_parameters.NEG", None,
self._namespace)
self.query = fluid.data(
name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_pos = fluid.data(
name="doc_pos",
shape=[-1, TRIGRAM_D],
dtype='float32',
lod_level=0)
self.doc_negs = [
fluid.data(
name="doc_neg_" + str(i),
shape=[-1, TRIGRAM_D],
dtype="float32",
lod_level=0) for i in range(Neg)
]
self._data_var.append(self.query)
self._data_var.append(self.doc_pos)
for input in self.doc_negs:
......@@ -37,16 +50,24 @@ class Model(ModelBase):
if self._platform != "LINUX":
self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False)
feed_list=self._data_var,
capacity=64,
use_double_buffer=False,
iterable=False)
def net(self, is_infer=False):
hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace)
hidden_acts = envs.get_global_env("hyper_parameters.fc_acts", None, self._namespace)
hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes", None,
self._namespace)
hidden_acts = envs.get_global_env("hyper_parameters.fc_acts", None,
self._namespace)
def fc(data, hidden_layers, hidden_acts, names):
fc_inputs = [data]
for i in range(len(hidden_layers)):
xavier = fluid.initializer.Xavier(uniform=True, fan_in=fc_inputs[-1].shape[1], fan_out=hidden_layers[i])
xavier = fluid.initializer.Xavier(
uniform=True,
fan_in=fc_inputs[-1].shape[1],
fan_out=hidden_layers[i])
out = fluid.layers.fc(input=fc_inputs[-1],
size=hidden_layers[i],
act=hidden_acts[i],
......@@ -56,8 +77,10 @@ class Model(ModelBase):
fc_inputs.append(out)
return fc_inputs[-1]
query_fc = fc(self.query, hidden_layers, hidden_acts, ['query_l1', 'query_l2', 'query_l3'])
doc_pos_fc = fc(self.doc_pos, hidden_layers, hidden_acts, ['doc_pos_l1', 'doc_pos_l2', 'doc_pos_l3'])
query_fc = fc(self.query, hidden_layers, hidden_acts,
['query_l1', 'query_l2', 'query_l3'])
doc_pos_fc = fc(self.doc_pos, hidden_layers, hidden_acts,
['doc_pos_l1', 'doc_pos_l2', 'doc_pos_l3'])
self.R_Q_D_p = fluid.layers.cos_sim(query_fc, doc_pos_fc)
if is_infer:
......@@ -65,13 +88,17 @@ class Model(ModelBase):
R_Q_D_ns = []
for i, doc_neg in enumerate(self.doc_negs):
doc_neg_fc_i = fc(doc_neg, hidden_layers, hidden_acts,
['doc_neg_l1_' + str(i), 'doc_neg_l2_' + str(i), 'doc_neg_l3_' + str(i)])
doc_neg_fc_i = fc(doc_neg, hidden_layers, hidden_acts, [
'doc_neg_l1_' + str(i), 'doc_neg_l2_' + str(i),
'doc_neg_l3_' + str(i)
])
R_Q_D_ns.append(fluid.layers.cos_sim(query_fc, doc_neg_fc_i))
concat_Rs = fluid.layers.concat(input=[self.R_Q_D_p] + R_Q_D_ns, axis=-1)
concat_Rs = fluid.layers.concat(
input=[self.R_Q_D_p] + R_Q_D_ns, axis=-1)
prob = fluid.layers.softmax(concat_Rs, axis=1)
hit_prob = fluid.layers.slice(prob, axes=[0, 1], starts=[0, 0], ends=[4, 1])
hit_prob = fluid.layers.slice(
prob, axes=[0, 1], starts=[0, 0], ends=[4, 1])
loss = -fluid.layers.reduce_sum(fluid.layers.log(hit_prob))
self.avg_cost = fluid.layers.mean(x=loss)
......@@ -91,18 +118,28 @@ class Model(ModelBase):
self.metrics()
def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.learning_rate", None, self._namespace)
learning_rate = envs.get_global_env("hyper_parameters.learning_rate",
None, self._namespace)
optimizer = fluid.optimizer.SGD(learning_rate)
return optimizer
def infer_input(self):
TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None, self._namespace)
self.query = fluid.data(name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_pos = fluid.data(name="doc_pos", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None,
self._namespace)
self.query = fluid.data(
name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_pos = fluid.data(
name="doc_pos",
shape=[-1, TRIGRAM_D],
dtype='float32',
lod_level=0)
self._infer_data_var = [self.query, self.doc_pos]
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
feed_list=self._infer_data_var,
capacity=64,
use_double_buffer=False,
iterable=False)
def infer_net(self):
self.infer_input()
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册