diff --git a/core/model.py b/core/model.py index 9422dc49ea9492ff958106a1a51ea11f318bd148..c5fc33fdc44cac467dd859e34aca0fdcee6c41a0 100755 --- a/core/model.py +++ b/core/model.py @@ -149,11 +149,11 @@ class Model(object): return optimizer_i def optimizer(self): - learning_rate = envs.get_global_env("hyper_parameters.learning_rate", - None, self._namespace) - optimizer = envs.get_global_env("hyper_parameters.optimizer", None, - self._namespace) - return self._build_optimizer(optimizer, learning_rate) + opt_name = envs.get_global_env("hyper_parameters.optimizer.class") + opt_lr = envs.get_global_env("hyper_parameters.optimizer.learning_rate") + opt_strategy = envs.get_global_env("hyper_parameters.optimizer.strategy") + + return self._build_optimizer(opt_name, opt_lr, opt_strategy) def input_data(self, is_infer=False, **kwargs): name = "dataset." + kwargs.get("dataset_name") + "." diff --git a/core/trainers/single_infer.py b/core/trainers/single_infer.py index 873ff010416a4a3eecb88edb63dbb9c2adbf27da..ee41832d6e5d2d789c37969678e85ebe2b44aaa3 100755 --- a/core/trainers/single_infer.py +++ b/core/trainers/single_infer.py @@ -167,6 +167,7 @@ class SingleInfer(TranspileTrainer): model = envs.lazy_instance_by_fliename( model_path, "Model")(self._env) model._infer_data_var = model.input_data( + is_infer=True, dataset_name=model_dict["dataset_name"]) if envs.get_global_env("dataset." + dataset_name + ".type") == "DataLoader": diff --git a/core/trainers/single_trainer.py b/core/trainers/single_trainer.py index 274047a0127d6ef2df601284cb9cd1e67335cf85..acd972ac06237f8c3ef6dec85cf0430afed18fe2 100755 --- a/core/trainers/single_trainer.py +++ b/core/trainers/single_trainer.py @@ -147,11 +147,6 @@ class SingleTrainer(TranspileTrainer): startup_program = fluid.Program() scope = fluid.Scope() dataset_name = model_dict["dataset_name"] - opt_name = envs.get_global_env("hyper_parameters.optimizer.class") - opt_lr = envs.get_global_env( - "hyper_parameters.optimizer.learning_rate") - opt_strategy = envs.get_global_env( - "hyper_parameters.optimizer.strategy") with fluid.program_guard(train_program, startup_program): with fluid.unique_name.guard(): with fluid.scope_guard(scope): @@ -168,8 +163,7 @@ class SingleTrainer(TranspileTrainer): self._get_dataloader(dataset_name, model._data_loader) model.net(model._data_var, False) - optimizer = model._build_optimizer(opt_name, opt_lr, - opt_strategy) + optimizer = model.optimizer() optimizer.minimize(model._cost) self._model[model_dict["name"]][0] = train_program self._model[model_dict["name"]][1] = startup_program diff --git a/core/utils/dataset_instance.py b/core/utils/dataset_instance.py index 3f0a3a484dfccfbc26216cc9eb09fe8443401078..5c9b0f56eadc009b3a1c3d2ec6a0f3b3b2af4187 100755 --- a/core/utils/dataset_instance.py +++ b/core/utils/dataset_instance.py @@ -14,7 +14,8 @@ from __future__ import print_function import sys - +import yaml +from paddlerec.core.utils import envs from paddlerec.core.utils.envs import lazy_instance_by_fliename from paddlerec.core.reader import SlotReader @@ -38,6 +39,11 @@ else: yaml_abs_path = sys.argv[3] +with open(yaml_abs_path, 'r') as rb: + config = yaml.load(rb.read(), Loader=yaml.FullLoader) +envs.set_global_envs() +envs.update_workspace() + if reader_name != "SlotReader": reader_class = lazy_instance_by_fliename(reader_package, reader_name) reader = reader_class(yaml_abs_path) diff --git a/models/match/dssm/config.yaml b/models/match/dssm/config.yaml index 47918eba3e9ca1c7834298db5061f900d454baa8..6ef08d9b226d3a277495771cdfbbec0422a3cc7c 100755 --- a/models/match/dssm/config.yaml +++ b/models/match/dssm/config.yaml @@ -53,13 +53,14 @@ runner: save_inference_feed_varnames: ["query", "doc_pos"] # feed vars of save inference save_inference_fetch_varnames: ["cos_sim_0.tmp_0"] # fetch vars of save inference init_model_path: "" # load model path - fetch_period: 10 + fetch_period: 2 - name: runner2 class: single_infer # num of epochs epochs: 1 # device to run training or infer device: cpu + fetch_period: 1 init_model_path: "increment/2" # load model path # runner will run all the phase in each epoch diff --git a/models/match/multiview-simnet/config.yaml b/models/match/multiview-simnet/config.yaml index 53ac4c095c0d347cca8cba1afb9866c66ab85218..43f0b27f00eb0079a4777a3eec122e588fd7c802 100755 --- a/models/match/multiview-simnet/config.yaml +++ b/models/match/multiview-simnet/config.yaml @@ -11,49 +11,73 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -evaluate: - workspace: "paddlerec.models.match.multiview-simnet" - reader: - batch_size: 2 - class: "{workspace}/evaluate_reader.py" - test_data_path: "{workspace}/data/test" -train: - trainer: - # for cluster training - strategy: "async" +# workspace +workspace: "paddlerec.models.match.multiview-simnet" - epochs: 2 - workspace: "paddlerec.models.match.multiview-simnet" +# list of dataset +dataset: +- name: dataset_train # name of dataset to distinguish different datasets + batch_size: 2 + type: DataLoader # or QueueDataset + data_path: "{workspace}/data/train" + sparse_slots: "1 2 3" +- name: dataset_infer # name + batch_size: 2 + type: DataLoader # or QueueDataset + data_path: "{workspace}/data/test" + sparse_slots: "1 2" - reader: - batch_size: 2 - class: "{workspace}/reader.py" - train_data_path: "{workspace}/data/train" - dataset_class: "DataLoader" +# hyper parameters of user-defined network +hyper_parameters: + optimizer: + class: Adam + learning_rate: 0.0001 + strategy: async + query_encoder: "bow" + title_encoder: "bow" + query_encode_dim: 128 + title_encode_dim: 128 + sparse_feature_dim: 1000001 + embedding_dim: 128 + hidden_size: 128 + margin: 0.1 - model: - models: "{workspace}/model.py" - hyper_parameters: - use_DataLoader: True - query_encoder: "bow" - title_encoder: "bow" - query_encode_dim: 128 - title_encode_dim: 128 - query_slots: 1 - title_slots: 1 - sparse_feature_dim: 1000001 - embedding_dim: 128 - hidden_size: 128 - learning_rate: 0.0001 - optimizer: adam +# select runner by name +mode: runner1 +# config of each runner. +# runner is a kind of paddle training class, which wraps the train/infer process. +runner: +- name: runner1 + class: single_train + # num of epochs + epochs: 2 + # device to run training or infer + device: cpu + save_checkpoint_interval: 1 # save model interval of epochs + save_inference_interval: 1 # save inference + save_checkpoint_path: "increment" # save checkpoint path + save_inference_path: "inference" # save inference path + save_inference_feed_varnames: [] # feed vars of save inference + save_inference_fetch_varnames: [] # fetch vars of save inference + init_model_path: "" # load model path + fetch_period: 1 +- name: runner2 + class: single_infer + # num of epochs + epochs: 1 + # device to run training or infer + device: cpu + fetch_period: 1 + init_model_path: "increment/0" # load model path - save: - increment: - dirname: "increment" - epoch_interval: 1 - save_last: True - inference: - dirname: "inference" - epoch_interval: 1 - save_last: True +# runner will run all the phase in each epoch +phase: +- name: phase1 + model: "{workspace}/model.py" # user-defined model + dataset_name: dataset_train # select dataset by name + thread_num: 1 +#- name: phase2 +# model: "{workspace}/model.py" # user-defined model +# dataset_name: dataset_infer # select dataset by name +# thread_num: 1 diff --git a/models/match/multiview-simnet/model.py b/models/match/multiview-simnet/model.py index f80a1cd0390f3c7aafc772ef535eb36b9657b439..6eecb0bd477a64975c9d2c12e7391eb3bb2bcde4 100755 --- a/models/match/multiview-simnet/model.py +++ b/models/match/multiview-simnet/model.py @@ -99,146 +99,88 @@ class SimpleEncoderFactory(object): class Model(ModelBase): def __init__(self, config): ModelBase.__init__(self, config) - self.init_config() - - def init_config(self): - self._fetch_interval = 1 - query_encoder = envs.get_global_env("hyper_parameters.query_encoder", - None, self._namespace) - title_encoder = envs.get_global_env("hyper_parameters.title_encoder", - None, self._namespace) - query_encode_dim = envs.get_global_env( - "hyper_parameters.query_encode_dim", None, self._namespace) - title_encode_dim = envs.get_global_env( - "hyper_parameters.title_encode_dim", None, self._namespace) - query_slots = envs.get_global_env("hyper_parameters.query_slots", None, - self._namespace) - title_slots = envs.get_global_env("hyper_parameters.title_slots", None, - self._namespace) - factory = SimpleEncoderFactory() - self.query_encoders = [ - factory.create(query_encoder, query_encode_dim) - for i in range(query_slots) - ] - self.title_encoders = [ - factory.create(title_encoder, title_encode_dim) - for i in range(title_slots) - ] - self.emb_size = envs.get_global_env( - "hyper_parameters.sparse_feature_dim", None, self._namespace) - self.emb_dim = envs.get_global_env("hyper_parameters.embedding_dim", - None, self._namespace) + def _init_hyper_parameters(self): + self.query_encoder = envs.get_global_env("hyper_parameters.query_encoder") + self.title_encoder = envs.get_global_env("hyper_parameters.title_encoder") + self.query_encode_dim = envs.get_global_env("hyper_parameters.query_encode_dim") + self.title_encode_dim = envs.get_global_env("hyper_parameters.title_encode_dim") + + self.emb_size = envs.get_global_env("hyper_parameters.sparse_feature_dim") + self.emb_dim = envs.get_global_env("hyper_parameters.embedding_dim") self.emb_shape = [self.emb_size, self.emb_dim] - self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size", - None, self._namespace) - self.margin = 0.1 - - def input(self, is_train=True): - self.q_slots = [ - fluid.data( - name="%d" % i, shape=[None, 1], lod_level=1, dtype='int64') - for i in range(len(self.query_encoders)) - ] - self.pt_slots = [ - fluid.data( - name="%d" % (i + len(self.query_encoders)), - shape=[None, 1], - lod_level=1, - dtype='int64') for i in range(len(self.title_encoders)) - ] - if is_train == False: - return self.q_slots + self.pt_slots + self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size") + self.margin = envs.get_global_env("hyper_parameters.margin") - self.nt_slots = [ - fluid.data( - name="%d" % - (i + len(self.query_encoders) + len(self.title_encoders)), - shape=[None, 1], - lod_level=1, - dtype='int64') for i in range(len(self.title_encoders)) + def net(self, input, is_infer=False): + factory = SimpleEncoderFactory() + self.q_slots = self._sparse_data_var[0:1] + self.query_encoders = [ + factory.create(self.query_encoder, self.query_encode_dim) + for _ in self.q_slots ] - - return self.q_slots + self.pt_slots + self.nt_slots - - def train_input(self): - res = self.input() - self._data_var = res - - use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader", - False, self._namespace) - - if self._platform != "LINUX" or use_dataloader: - self._data_loader = fluid.io.DataLoader.from_generator( - feed_list=self._data_var, - capacity=256, - use_double_buffer=False, - iterable=False) - - def get_acc(self, x, y): - less = tensor.cast(cf.less_than(x, y), dtype='float32') - label_ones = fluid.layers.fill_constant_batch_size_like( - input=x, dtype='float32', shape=[-1, 1], value=1.0) - correct = fluid.layers.reduce_sum(less) - total = fluid.layers.reduce_sum(label_ones) - acc = fluid.layers.elementwise_div(correct, total) - return acc - - def net(self): q_embs = [ fluid.embedding( input=query, size=self.emb_shape, param_attr="emb") for query in self.q_slots ] - pt_embs = [ + # encode each embedding field with encoder + q_encodes = [ + self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs) + ] + # concat multi view for query, pos_title, neg_title + q_concat = fluid.layers.concat(q_encodes) + # projection of hidden layer + q_hid = fluid.layers.fc(q_concat, + size=self.hidden_size, + param_attr='q_fc.w', + bias_attr='q_fc.b') + + + self.pt_slots = self._sparse_data_var[1:2] + self.title_encoders = [ + factory.create(self.title_encoder, self.title_encode_dim) + ] + pt_embs = [ fluid.embedding( input=title, size=self.emb_shape, param_attr="emb") for title in self.pt_slots ] - nt_embs = [ + pt_encodes = [ + self.title_encoders[i].forward(emb) + for i, emb in enumerate(pt_embs) + ] + pt_concat = fluid.layers.concat(pt_encodes) + pt_hid = fluid.layers.fc(pt_concat, + size=self.hidden_size, + param_attr='t_fc.w', + bias_attr='t_fc.b') + # cosine of hidden layers + cos_pos = fluid.layers.cos_sim(q_hid, pt_hid) + + if is_infer: + self._infer_results['query_pt_sim'] = cos_pos + return + + self.nt_slots = self._sparse_data_var[2:3] + nt_embs = [ fluid.embedding( input=title, size=self.emb_shape, param_attr="emb") for title in self.nt_slots ] - - # encode each embedding field with encoder - q_encodes = [ - self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs) - ] - pt_encodes = [ - self.title_encoders[i].forward(emb) - for i, emb in enumerate(pt_embs) - ] - nt_encodes = [ + nt_encodes = [ self.title_encoders[i].forward(emb) for i, emb in enumerate(nt_embs) ] - - # concat multi view for query, pos_title, neg_title - q_concat = fluid.layers.concat(q_encodes) - pt_concat = fluid.layers.concat(pt_encodes) - nt_concat = fluid.layers.concat(nt_encodes) - - # projection of hidden layer - q_hid = fluid.layers.fc(q_concat, - size=self.hidden_size, - param_attr='q_fc.w', - bias_attr='q_fc.b') - pt_hid = fluid.layers.fc(pt_concat, - size=self.hidden_size, - param_attr='t_fc.w', - bias_attr='t_fc.b') - nt_hid = fluid.layers.fc(nt_concat, + nt_concat = fluid.layers.concat(nt_encodes) + nt_hid = fluid.layers.fc(nt_concat, size=self.hidden_size, param_attr='t_fc.w', bias_attr='t_fc.b') + cos_neg = fluid.layers.cos_sim(q_hid, nt_hid) - # cosine of hidden layers - cos_pos = fluid.layers.cos_sim(q_hid, pt_hid) - cos_neg = fluid.layers.cos_sim(q_hid, nt_hid) - - # pairwise hinge_loss + # pairwise hinge_loss loss_part1 = fluid.layers.elementwise_sub( tensor.fill_constant_batch_size_like( input=cos_pos, @@ -254,72 +196,16 @@ class Model(ModelBase): input=loss_part2, shape=[-1, 1], value=0.0, dtype='float32'), loss_part2) - self.avg_cost = fluid.layers.mean(loss_part3) + self._cost = fluid.layers.mean(loss_part3) self.acc = self.get_acc(cos_neg, cos_pos) - - def avg_loss(self): - self._cost = self.avg_cost - - def metrics(self): - self._metrics["loss"] = self.avg_cost + self._metrics["loss"] = self._cost self._metrics["acc"] = self.acc - def train_net(self): - self.train_input() - self.net() - self.avg_loss() - self.metrics() - - def optimizer(self): - learning_rate = envs.get_global_env("hyper_parameters.learning_rate", - None, self._namespace) - optimizer = fluid.optimizer.Adam(learning_rate=learning_rate) - return optimizer - - def infer_input(self): - res = self.input(is_train=False) - self._infer_data_var = res - - self._infer_data_loader = fluid.io.DataLoader.from_generator( - feed_list=self._infer_data_var, - capacity=64, - use_double_buffer=False, - iterable=False) - - def infer_net(self): - self.infer_input() - # lookup embedding for each slot - q_embs = [ - fluid.embedding( - input=query, size=self.emb_shape, param_attr="emb") - for query in self.q_slots - ] - pt_embs = [ - fluid.embedding( - input=title, size=self.emb_shape, param_attr="emb") - for title in self.pt_slots - ] - # encode each embedding field with encoder - q_encodes = [ - self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs) - ] - pt_encodes = [ - self.title_encoders[i].forward(emb) - for i, emb in enumerate(pt_embs) - ] - # concat multi view for query, pos_title, neg_title - q_concat = fluid.layers.concat(q_encodes) - pt_concat = fluid.layers.concat(pt_encodes) - # projection of hidden layer - q_hid = fluid.layers.fc(q_concat, - size=self.hidden_size, - param_attr='q_fc.w', - bias_attr='q_fc.b') - pt_hid = fluid.layers.fc(pt_concat, - size=self.hidden_size, - param_attr='t_fc.w', - bias_attr='t_fc.b') - - # cosine of hidden layers - cos = fluid.layers.cos_sim(q_hid, pt_hid) - self._infer_results['query_pt_sim'] = cos + def get_acc(self, x, y): + less = tensor.cast(cf.less_than(x, y), dtype='float32') + label_ones = fluid.layers.fill_constant_batch_size_like( + input=x, dtype='float32', shape=[-1, 1], value=1.0) + correct = fluid.layers.reduce_sum(less) + total = fluid.layers.reduce_sum(label_ones) + acc = fluid.layers.elementwise_div(correct, total) + return acc diff --git a/models/recall/gnn/config.yaml b/models/recall/gnn/config.yaml index 50c6d401153a607a88d5eba713fc439303aad868..d200ddc37083ab68b1456ce7fd7bef4fb9870985 100755 --- a/models/recall/gnn/config.yaml +++ b/models/recall/gnn/config.yaml @@ -11,46 +11,71 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -evaluate: - workspace: "paddlerec.models.recall.gnn" - reader: - batch_size: 50 - class: "{workspace}/evaluate_reader.py" - test_data_path: "{workspace}/data/test" -train: - trainer: - # for cluster training - strategy: "async" +# workspace +workspace: "paddlerec.models.recall.gnn" - epochs: 2 - workspace: "paddlerec.models.recall.gnn" +# list of dataset +dataset: +- name: dataset_train # name of dataset to distinguish different datasets + batch_size: 100 + type: DataLoader # or QueueDataset + data_path: "{workspace}/data/train" + data_converter: "{workspace}/reader.py" +- name: dataset_infer # name + batch_size: 50 + type: DataLoader # or QueueDataset + data_path: "{workspace}/data/test" + data_converter: "{workspace}/evaluate_reader.py" - reader: - batch_size: 100 - class: "{workspace}/reader.py" - train_data_path: "{workspace}/data/train" - dataset_class: "DataLoader" +# hyper parameters of user-defined network +hyper_parameters: + optimizer: + class: Adam + learning_rate: 0.001 + decay_steps: 3 + decay_rate: 0.1 + l2: 0.00001 + sparse_feature_nums: 43098 + sparse_feature_dim: 100 + corpus_size: 719470 + gnn_propogation_steps: 1 - model: - models: "{workspace}/model.py" - hyper_parameters: - use_DataLoader: True - config_path: "{workspace}/data/config.txt" - sparse_feature_dim: 100 - gnn_propogation_steps: 1 - learning_rate: 0.001 - l2: 0.00001 - decay_steps: 3 - decay_rate: 0.1 - optimizer: adam +# select runner by name +mode: runner1 +# config of each runner. +# runner is a kind of paddle training class, which wraps the train/infer process. +runner: +- name: runner1 + class: single_train + # num of epochs + epochs: 2 + # device to run training or infer + device: cpu + save_checkpoint_interval: 1 # save model interval of epochs + save_inference_interval: 1 # save inference + save_checkpoint_path: "increment" # save checkpoint path + save_inference_path: "inference" # save inference path + save_inference_feed_varnames: [] # feed vars of save inference + save_inference_fetch_varnames: [] # fetch vars of save inference + init_model_path: "" # load model path + fetch_period: 10 +- name: runner2 + class: single_infer + # num of epochs + epochs: 1 + # device to run training or infer + device: cpu + fetch_period: 1 + init_model_path: "increment/0" # load model path - save: - increment: - dirname: "increment" - epoch_interval: 1 - save_last: True - inference: - dirname: "inference" - epoch_interval: 1 - save_last: True +# runner will run all the phase in each epoch +phase: +- name: phase1 + model: "{workspace}/model.py" # user-defined model + dataset_name: dataset_train # select dataset by name + thread_num: 1 +#- name: phase2 +# model: "{workspace}/model.py" # user-defined model +# dataset_name: dataset_infer # select dataset by name +# thread_num: 1 diff --git a/models/recall/gnn/evaluate_reader.py b/models/recall/gnn/evaluate_reader.py index b26ea8fa9fc347ce402575104dcfa6de23aa80fc..74299972adedbc452044566460ea7282c2391a18 100755 --- a/models/recall/gnn/evaluate_reader.py +++ b/models/recall/gnn/evaluate_reader.py @@ -21,10 +21,9 @@ from paddlerec.core.reader import Reader from paddlerec.core.utils import envs -class EvaluateReader(Reader): +class TrainReader(Reader): def init(self): - self.batch_size = envs.get_global_env("batch_size", None, - "evaluate.reader") + self.batch_size = envs.get_global_env("dataset.dataset_infer.batch_size") self.input = [] self.length = None diff --git a/models/recall/gnn/model.py b/models/recall/gnn/model.py index 027fbb721131e203ed22485b4d8f9bd96b8ed3a3..055cea3ae15f584a2b36d0ea66f52b92dd1256fc 100755 --- a/models/recall/gnn/model.py +++ b/models/recall/gnn/model.py @@ -25,74 +25,59 @@ from paddlerec.core.model import Model as ModelBase class Model(ModelBase): def __init__(self, config): ModelBase.__init__(self, config) - self.init_config() - def init_config(self): - self._fetch_interval = 1 - self.items_num, self.ins_num = self.config_read( - envs.get_global_env("hyper_parameters.config_path", None, - self._namespace)) - self.train_batch_size = envs.get_global_env("batch_size", None, - "train.reader") - self.evaluate_batch_size = envs.get_global_env("batch_size", None, - "evaluate.reader") - self.hidden_size = envs.get_global_env( - "hyper_parameters.sparse_feature_dim", None, self._namespace) - self.step = envs.get_global_env( - "hyper_parameters.gnn_propogation_steps", None, self._namespace) - - def config_read(self, config_path=None): - if config_path is None: - raise ValueError( - "please set train.model.hyper_parameters.config_path at first") - with open(config_path, "r") as fin: - item_nums = int(fin.readline().strip()) - ins_nums = int(fin.readline().strip()) - return item_nums, ins_nums - - def input(self, bs): - self.items = fluid.data( + def _init_hyper_parameters(self): + self.learning_rate = envs.get_global_env("hyper_parameters.optimizer.learning_rate") + self.decay_steps = envs.get_global_env("hyper_parameters.optimizer.decay_steps") + self.decay_rate = envs.get_global_env("hyper_parameters.optimizer.decay_rate") + self.l2 = envs.get_global_env("hyper_parameters.optimizer.l2") + + self.dict_size = envs.get_global_env("hyper_parameters.sparse_feature_nums") + self.corpus_size = envs.get_global_env("hyper_parameters.corpus_size") + + self.train_batch_size = envs.get_global_env("dataset.dataset_train.batch_size") + self.evaluate_batch_size = envs.get_global_env("dataset.dataset_infer.batch_size") + + self.hidden_size = envs.get_global_env("hyper_parameters.sparse_feature_dim") + self.step = envs.get_global_env("hyper_parameters.gnn_propogation_steps") + + def input_data(self, is_infer=False, **kwargs): + if is_infer: + bs = self.evaluate_batch_size + else: + bs = self.train_batch_size + items = fluid.data( name="items", shape=[bs, -1], dtype="int64") # [batch_size, uniq_max] - self.seq_index = fluid.data( + seq_index = fluid.data( name="seq_index", shape=[bs, -1, 2], dtype="int32") # [batch_size, seq_max, 2] - self.last_index = fluid.data( + last_index = fluid.data( name="last_index", shape=[bs, 2], dtype="int32") # [batch_size, 2] - self.adj_in = fluid.data( + adj_in = fluid.data( name="adj_in", shape=[bs, -1, -1], dtype="float32") # [batch_size, seq_max, seq_max] - self.adj_out = fluid.data( + adj_out = fluid.data( name="adj_out", shape=[bs, -1, -1], dtype="float32") # [batch_size, seq_max, seq_max] - self.mask = fluid.data( + mask = fluid.data( name="mask", shape=[bs, -1, 1], dtype="float32") # [batch_size, seq_max, 1] - self.label = fluid.data( + label = fluid.data( name="label", shape=[bs, 1], dtype="int64") # [batch_size, 1] res = [ - self.items, self.seq_index, self.last_index, self.adj_in, - self.adj_out, self.mask, self.label + items, seq_index, last_index, adj_in, adj_out, mask, label ] return res - def train_input(self): - res = self.input(self.train_batch_size) - self._data_var = res - - use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader", - False, self._namespace) - - if self._platform != "LINUX" or use_dataloader: - self._data_loader = fluid.io.DataLoader.from_generator( - feed_list=self._data_var, - capacity=256, - use_double_buffer=False, - iterable=False) - - def net(self, items_num, hidden_size, step, bs): - stdv = 1.0 / math.sqrt(hidden_size) + def net(self, inputs, is_infer=False): + if is_infer: + bs = self.evaluate_batch_size + else: + bs = self.train_batch_size + + stdv = 1.0 / math.sqrt(self.hidden_size) def embedding_layer(input, table_name, @@ -100,22 +85,22 @@ class Model(ModelBase): initializer_instance=None): emb = fluid.embedding( input=input, - size=[items_num, emb_dim], + size=[self.dict_size, emb_dim], param_attr=fluid.ParamAttr( - name=table_name, initializer=initializer_instance), ) + name=table_name, initializer=initializer_instance)) return emb sparse_initializer = fluid.initializer.Uniform(low=-stdv, high=stdv) - items_emb = embedding_layer(self.items, "emb", hidden_size, + items_emb = embedding_layer(inputs[0], "emb", self.hidden_size, sparse_initializer) pre_state = items_emb - for i in range(step): + for i in range(self.step): pre_state = layers.reshape( - x=pre_state, shape=[bs, -1, hidden_size]) + x=pre_state, shape=[bs, -1, self.hidden_size]) state_in = layers.fc( input=pre_state, name="state_in", - size=hidden_size, + size=self.hidden_size, act=None, num_flatten_dims=2, param_attr=fluid.ParamAttr( @@ -127,7 +112,7 @@ class Model(ModelBase): state_out = layers.fc( input=pre_state, name="state_out", - size=hidden_size, + size=self.hidden_size, act=None, num_flatten_dims=2, param_attr=fluid.ParamAttr( @@ -137,33 +122,32 @@ class Model(ModelBase): initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) # [batch_size, uniq_max, h] - state_adj_in = layers.matmul(self.adj_in, + state_adj_in = layers.matmul(inputs[3], state_in) # [batch_size, uniq_max, h] - state_adj_out = layers.matmul( - self.adj_out, state_out) # [batch_size, uniq_max, h] + state_adj_out = layers.matmul(inputs[4], state_out) # [batch_size, uniq_max, h] gru_input = layers.concat([state_adj_in, state_adj_out], axis=2) gru_input = layers.reshape( - x=gru_input, shape=[-1, hidden_size * 2]) + x=gru_input, shape=[-1, self.hidden_size * 2]) gru_fc = layers.fc(input=gru_input, name="gru_fc", - size=3 * hidden_size, + size=3 * self.hidden_size, bias_attr=False) pre_state, _, _ = fluid.layers.gru_unit( input=gru_fc, hidden=layers.reshape( - x=pre_state, shape=[-1, hidden_size]), - size=3 * hidden_size) + x=pre_state, shape=[-1, self.hidden_size]), + size=3 * self.hidden_size) - final_state = layers.reshape(pre_state, shape=[bs, -1, hidden_size]) - seq = layers.gather_nd(final_state, self.seq_index) - last = layers.gather_nd(final_state, self.last_index) + final_state = layers.reshape(pre_state, shape=[bs, -1, self.hidden_size]) + seq = layers.gather_nd(final_state, inputs[1]) + last = layers.gather_nd(final_state, inputs[2]) seq_fc = layers.fc( input=seq, name="seq_fc", - size=hidden_size, + size=self.hidden_size, bias_attr=False, act=None, num_flatten_dims=2, @@ -171,7 +155,7 @@ class Model(ModelBase): low=-stdv, high=stdv))) # [batch_size, seq_max, h] last_fc = layers.fc(input=last, name="last_fc", - size=hidden_size, + size=self.hidden_size, bias_attr=False, act=None, num_flatten_dims=1, @@ -184,7 +168,7 @@ class Model(ModelBase): add = layers.elementwise_add(seq_fc_t, last_fc) # [seq_max, batch_size, h] b = layers.create_parameter( - shape=[hidden_size], + shape=[self.hidden_size], dtype='float32', default_initializer=fluid.initializer.Constant(value=0.0)) # [h] add = layers.elementwise_add(add, b) # [seq_max, batch_size, h] @@ -202,7 +186,7 @@ class Model(ModelBase): bias_attr=False, param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( low=-stdv, high=stdv))) # [batch_size, seq_max, 1] - weight *= self.mask + weight *= inputs[5] weight_mask = layers.elementwise_mul( seq, weight, axis=0) # [batch_size, seq_max, h] global_attention = layers.reduce_sum( @@ -213,7 +197,7 @@ class Model(ModelBase): final_attention_fc = layers.fc( input=final_attention, name="final_attention_fc", - size=hidden_size, + size=self.hidden_size, bias_attr=False, act=None, param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( @@ -225,7 +209,7 @@ class Model(ModelBase): # dtype="int64", # persistable=True, # name="all_vocab") - all_vocab = np.arange(1, items_num).reshape((-1)).astype('int32') + all_vocab = np.arange(1, self.dict_size).reshape((-1)).astype('int32') all_vocab = fluid.layers.cast( x=fluid.layers.assign(all_vocab), dtype='int64') @@ -235,63 +219,34 @@ class Model(ModelBase): name="emb", initializer=fluid.initializer.Uniform( low=-stdv, high=stdv)), - size=[items_num, hidden_size]) # [all_vocab, h] + size=[self.dict_size, self.hidden_size]) # [all_vocab, h] logits = layers.matmul( x=final_attention_fc, y=all_emb, transpose_y=True) # [batch_size, all_vocab] softmax = layers.softmax_with_cross_entropy( - logits=logits, label=self.label) # [batch_size, 1] + logits=logits, label=inputs[6]) # [batch_size, 1] self.loss = layers.reduce_mean(softmax) # [1] - self.acc = layers.accuracy(input=logits, label=self.label, k=20) + self.acc = layers.accuracy(input=logits, label=inputs[6], k=20) - def avg_loss(self): self._cost = self.loss + if is_infer: + self._infer_results['acc'] = self.acc + self._infer_results['loss'] = self.loss + return - def metrics(self): - self._metrics["LOSS"] = self.loss + self._metrics["LOSS"] = self.loss self._metrics["train_acc"] = self.acc - def train_net(self): - self.train_input() - self.net(self.items_num, self.hidden_size, self.step, - self.train_batch_size) - self.avg_loss() - self.metrics() def optimizer(self): - learning_rate = envs.get_global_env("hyper_parameters.learning_rate", - None, self._namespace) - step_per_epoch = self.ins_num // self.train_batch_size - decay_steps = envs.get_global_env("hyper_parameters.decay_steps", None, - self._namespace) - decay_rate = envs.get_global_env("hyper_parameters.decay_rate", None, - self._namespace) - l2 = envs.get_global_env("hyper_parameters.l2", None, self._namespace) + step_per_epoch = self.corpus_size // self.train_batch_size optimizer = fluid.optimizer.Adam( learning_rate=fluid.layers.exponential_decay( - learning_rate=learning_rate, - decay_steps=decay_steps * step_per_epoch, - decay_rate=decay_rate), + learning_rate=self.learning_rate, + decay_steps=self.decay_steps * step_per_epoch, + decay_rate=self.decay_rate), regularization=fluid.regularizer.L2DecayRegularizer( - regularization_coeff=l2)) - + regularization_coeff=self.l2)) return optimizer - def infer_input(self): - self._reader_namespace = "evaluate.reader" - res = self.input(self.evaluate_batch_size) - self._infer_data_var = res - - self._infer_data_loader = fluid.io.DataLoader.from_generator( - feed_list=self._infer_data_var, - capacity=64, - use_double_buffer=False, - iterable=False) - - def infer_net(self): - self.infer_input() - self.net(self.items_num, self.hidden_size, self.step, - self.evaluate_batch_size) - self._infer_results['acc'] = self.acc - self._infer_results['loss'] = self.loss diff --git a/models/recall/gnn/reader.py b/models/recall/gnn/reader.py index 68170f09a7a7c84547a67f970b6e127de40b0ccc..7dfade76554565a0f5f59b0bd916b635338d14f4 100755 --- a/models/recall/gnn/reader.py +++ b/models/recall/gnn/reader.py @@ -23,9 +23,7 @@ from paddlerec.core.utils import envs class TrainReader(Reader): def init(self): - self.batch_size = envs.get_global_env("batch_size", None, - "train.reader") - + self.batch_size = envs.get_global_env("dataset.dataset_train.batch_size") self.input = [] self.length = None