提交 c782ddb7 编写于 作者: Z zhangwenhui03

Merge branch 'develop' into 'develop'

# Conflicts:
#   models/rank/din/reader.py
#!/bin/bash #!/bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################### ###################################################
# Usage: submit.sh # Usage: submit.sh
......
...@@ -15,10 +15,9 @@ ...@@ -15,10 +15,9 @@
from __future__ import print_function from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
import subprocess
import sys
import os
import copy import copy
import os
import subprocess
from paddlerec.core.engine.engine import Engine from paddlerec.core.engine.engine import Engine
from paddlerec.core.factory import TrainerFactory from paddlerec.core.factory import TrainerFactory
......
...@@ -29,4 +29,3 @@ class Engine: ...@@ -29,4 +29,3 @@ class Engine:
@abc.abstractmethod @abc.abstractmethod
def run(self): def run(self):
pass pass
...@@ -14,10 +14,11 @@ ...@@ -14,10 +14,11 @@
from __future__ import print_function from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
import subprocess
import sys
import os
import copy import copy
import os
import sys
import subprocess
from paddlerec.core.engine.engine import Engine from paddlerec.core.engine.engine import Engine
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
......
...@@ -14,10 +14,11 @@ ...@@ -14,10 +14,11 @@
from __future__ import print_function from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
import subprocess
import sys
import os
import copy import copy
import os
import sys
import subprocess
from paddlerec.core.engine.engine import Engine from paddlerec.core.engine.engine import Engine
......
...@@ -25,17 +25,8 @@ class Layer(object): ...@@ -25,17 +25,8 @@ class Layer(object):
""" """
pass pass
def generate(self, mode, param):
"""R
"""
if mode == 'fluid':
return self.generate_fluid(param)
print('unsupport this mode: ' + mode)
return None, None
@abc.abstractmethod @abc.abstractmethod
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
pass pass
...@@ -53,7 +53,7 @@ class Metric(object): ...@@ -53,7 +53,7 @@ class Metric(object):
pass pass
@abc.abstractmethod @abc.abstractmethod
def get_result_to_string(self): def __str__(self):
""" """
Return: Return:
result(string) : calculate result with string format, for output result(string) : calculate result with string format, for output
......
...@@ -13,8 +13,10 @@ ...@@ -13,8 +13,10 @@
# limitations under the License. # limitations under the License.
import math import math
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.metric import Metric from paddlerec.core.metric import Metric
...@@ -198,7 +200,7 @@ class AUCMetric(Metric): ...@@ -198,7 +200,7 @@ class AUCMetric(Metric):
""" """ """ """
return self._result return self._result
def get_result_to_string(self): def __str__(self):
""" """ """ """
result = self.get_result() result = self.get_result()
result_str = "%s AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f RMSE=%.6f " \ result_str = "%s AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f RMSE=%.6f " \
......
...@@ -47,7 +47,7 @@ class Model(object): ...@@ -47,7 +47,7 @@ class Model(object):
def get_infer_results(self): def get_infer_results(self):
return self._infer_results return self._infer_results
def get_cost_op(self): def get_avg_cost(self):
"""R """R
""" """
return self._cost return self._cost
......
...@@ -12,10 +12,11 @@ ...@@ -12,10 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import yaml
import copy import copy
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
import yaml
from paddlerec.core.model import Model from paddlerec.core.model import Model
from paddlerec.core.utils import table from paddlerec.core.utils import table
......
...@@ -13,10 +13,11 @@ ...@@ -13,10 +13,11 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.layer import Layer from paddlerec.core.layer import Layer
class EmbeddingInputLayer(Layer): class EmbeddingFuseLayer(Layer):
"""R """R
""" """
...@@ -31,7 +32,7 @@ class EmbeddingInputLayer(Layer): ...@@ -31,7 +32,7 @@ class EmbeddingInputLayer(Layer):
self._emb_dim = self._mf_dim + 3 # append show ctr lr self._emb_dim = self._mf_dim + 3 # append show ctr lr
self._emb_layers = [] self._emb_layers = []
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
show_clk = fluid.layers.concat( show_clk = fluid.layers.concat(
...@@ -63,7 +64,7 @@ class LabelInputLayer(Layer): ...@@ -63,7 +64,7 @@ class LabelInputLayer(Layer):
self._data_type = config.get('data_type', "int64") self._data_type = config.get('data_type', "int64")
self._label_idx = config['label_idx'] self._label_idx = config['label_idx']
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
label = fluid.layers.data(name=self._name, shape=[-1, self._dim], \ label = fluid.layers.data(name=self._name, shape=[-1, self._dim], \
...@@ -85,7 +86,7 @@ class TagInputLayer(Layer): ...@@ -85,7 +86,7 @@ class TagInputLayer(Layer):
self._dim = config.get('dim', 1) self._dim = config.get('dim', 1)
self._data_type = config['data_type'] self._data_type = config['data_type']
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
output = fluid.layers.data(name=self._name, shape=[-1, self._dim], \ output = fluid.layers.data(name=self._name, shape=[-1, self._dim], \
...@@ -107,7 +108,7 @@ class ParamLayer(Layer): ...@@ -107,7 +108,7 @@ class ParamLayer(Layer):
self._data_type = config.get('data_type', 'float32') self._data_type = config.get('data_type', 'float32')
self._config = config self._config = config
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
return self._config, {'inference_param': {'name': 'param', 'params': [], 'table_id': self._table_id}} return self._config, {'inference_param': {'name': 'param', 'params': [], 'table_id': self._table_id}}
...@@ -125,7 +126,7 @@ class SummaryLayer(Layer): ...@@ -125,7 +126,7 @@ class SummaryLayer(Layer):
self._data_type = config.get('data_type', 'float32') self._data_type = config.get('data_type', 'float32')
self._config = config self._config = config
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
return self._config, {'inference_param': {'name': 'summary', 'params': [], 'table_id': self._table_id}} return self._config, {'inference_param': {'name': 'summary', 'params': [], 'table_id': self._table_id}}
...@@ -143,7 +144,7 @@ class NormalizetionLayer(Layer): ...@@ -143,7 +144,7 @@ class NormalizetionLayer(Layer):
self._summary = config['summary'] self._summary = config['summary']
self._table_id = config.get('table_id', -1) self._table_id = config.get('table_id', -1)
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
input_layer = param['layer'][self._input[0]] input_layer = param['layer'][self._input[0]]
...@@ -158,7 +159,7 @@ class NormalizetionLayer(Layer): ...@@ -158,7 +159,7 @@ class NormalizetionLayer(Layer):
'params': inference_param, 'table_id': summary_layer.get('table_id', -1)}} 'params': inference_param, 'table_id': summary_layer.get('table_id', -1)}}
class NeuralLayer(Layer): class FCLayer(Layer):
"""R """R
""" """
...@@ -171,7 +172,7 @@ class NeuralLayer(Layer): ...@@ -171,7 +172,7 @@ class NeuralLayer(Layer):
self._bias = config.get('bias', True) self._bias = config.get('bias', True)
self._act_func = config.get('act_func', None) self._act_func = config.get('act_func', None)
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
param_layer = param['layer'][self._param] param_layer = param['layer'][self._param]
...@@ -199,7 +200,7 @@ class NeuralLayer(Layer): ...@@ -199,7 +200,7 @@ class NeuralLayer(Layer):
'table_id': param_layer.get('table_id', -1)}} 'table_id': param_layer.get('table_id', -1)}}
class SigmoidLossLayer(Layer): class LogLossLayer(Layer):
"""R """R
""" """
...@@ -230,7 +231,7 @@ class SigmoidLossLayer(Layer): ...@@ -230,7 +231,7 @@ class SigmoidLossLayer(Layer):
} }
} }
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
input_layer = param['layer'][self._input[0]] input_layer = param['layer'][self._input[0]]
......
...@@ -12,14 +12,14 @@ ...@@ -12,14 +12,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import sys
import abc import abc
import os
import time import time
import sys
import yaml import yaml
from paddle import fluid from paddle import fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
trainer implement.
↗ (single/cluster) CtrTrainer
Trainer
↗ (for single training) SingleTrainer/TDMSingleTrainer
↘ TranspilerTrainer → (for cluster training) ClusterTrainer/TDMClusterTrainer
↘ (for online learning training) OnlineLearningTrainer
"""
...@@ -25,7 +25,6 @@ import paddle.fluid as fluid ...@@ -25,7 +25,6 @@ import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory
from paddle.fluid.incubate.fleet.base.role_maker import PaddleCloudRoleMaker from paddle.fluid.incubate.fleet.base.role_maker import PaddleCloudRoleMaker
from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
...@@ -83,7 +82,7 @@ class ClusterTrainer(TranspileTrainer): ...@@ -83,7 +82,7 @@ class ClusterTrainer(TranspileTrainer):
strategy = self.build_strategy() strategy = self.build_strategy()
optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer = fleet.distributed_optimizer(optimizer, strategy)
optimizer.minimize(self.model.get_cost_op()) optimizer.minimize(self.model.get_avg_cost())
if fleet.is_server(): if fleet.is_server():
context['status'] = 'server_pass' context['status'] = 'server_pass'
...@@ -115,7 +114,7 @@ class ClusterTrainer(TranspileTrainer): ...@@ -115,7 +114,7 @@ class ClusterTrainer(TranspileTrainer):
program = fluid.compiler.CompiledProgram( program = fluid.compiler.CompiledProgram(
fleet.main_program).with_data_parallel( fleet.main_program).with_data_parallel(
loss_name=self.model.get_cost_op().name, loss_name=self.model.get_avg_cost().name,
build_strategy=self.strategy.get_build_strategy(), build_strategy=self.strategy.get_build_strategy(),
exec_strategy=self.strategy.get_execute_strategy()) exec_strategy=self.strategy.get_execute_strategy())
......
...@@ -11,9 +11,10 @@ ...@@ -11,9 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os import os
import numpy as np
import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker
...@@ -22,7 +23,7 @@ from paddlerec.core.utils import envs ...@@ -22,7 +23,7 @@ from paddlerec.core.utils import envs
from paddlerec.core.trainer import Trainer from paddlerec.core.trainer import Trainer
class CtrPaddleTrainer(Trainer): class CtrTrainer(Trainer):
"""R """R
""" """
...@@ -87,7 +88,7 @@ class CtrPaddleTrainer(Trainer): ...@@ -87,7 +88,7 @@ class CtrPaddleTrainer(Trainer):
optimizer = self.model.optimizer() optimizer = self.model.optimizer()
optimizer = fleet.distributed_optimizer(optimizer, strategy={"use_cvm": False}) optimizer = fleet.distributed_optimizer(optimizer, strategy={"use_cvm": False})
optimizer.minimize(self.model.get_cost_op()) optimizer.minimize(self.model.get_avg_cost())
if fleet.is_server(): if fleet.is_server():
context['status'] = 'server_pass' context['status'] = 'server_pass'
......
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
# limitations under the License. # limitations under the License.
import datetime
import json
import sys import sys
import time import time
import json
import datetime
import numpy as np
import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
...@@ -72,7 +72,7 @@ def worker_numric_max(value, env="mpi"): ...@@ -72,7 +72,7 @@ def worker_numric_max(value, env="mpi"):
return wroker_numric_opt(value, env, "max") return wroker_numric_opt(value, env, "max")
class CtrPaddleTrainer(Trainer): class CtrTrainer(Trainer):
"""R """R
""" """
...@@ -129,7 +129,7 @@ class CtrPaddleTrainer(Trainer): ...@@ -129,7 +129,7 @@ class CtrPaddleTrainer(Trainer):
model = self._exector_context[executor['name']]['model'] model = self._exector_context[executor['name']]['model']
self._metrics.update(model.get_metrics()) self._metrics.update(model.get_metrics())
runnnable_scope.append(scope) runnnable_scope.append(scope)
runnnable_cost_op.append(model.get_cost_op()) runnnable_cost_op.append(model.get_avg_cost())
for var in model._data_var: for var in model._data_var:
if var.name in data_var_name_dict: if var.name in data_var_name_dict:
continue continue
...@@ -146,7 +146,7 @@ class CtrPaddleTrainer(Trainer): ...@@ -146,7 +146,7 @@ class CtrPaddleTrainer(Trainer):
model = self._exector_context[executor['name']]['model'] model = self._exector_context[executor['name']]['model']
program = model._build_param['model']['train_program'] program = model._build_param['model']['train_program']
if not executor['is_update_sparse']: if not executor['is_update_sparse']:
program._fleet_opt["program_configs"][str(id(model.get_cost_op().block.program))]["push_sparse"] = [] program._fleet_opt["program_configs"][str(id(model.get_avg_cost().block.program))]["push_sparse"] = []
if 'train_thread_num' not in executor: if 'train_thread_num' not in executor:
executor['train_thread_num'] = self.global_config['train_thread_num'] executor['train_thread_num'] = self.global_config['train_thread_num']
with fluid.scope_guard(scope): with fluid.scope_guard(scope):
......
...@@ -18,9 +18,9 @@ Training use fluid with one node only. ...@@ -18,9 +18,9 @@ Training use fluid with one node only.
from __future__ import print_function from __future__ import print_function
import datetime
import os import os
import time import time
import datetime
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
...@@ -31,7 +31,7 @@ from paddlerec.core.utils import envs ...@@ -31,7 +31,7 @@ from paddlerec.core.utils import envs
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
class ClusterTrainer(TranspileTrainer): class OnlineLearningTrainer(TranspileTrainer):
def processor_register(self): def processor_register(self):
role = PaddleCloudRoleMaker() role = PaddleCloudRoleMaker()
fleet.init(role) fleet.init(role)
...@@ -78,7 +78,7 @@ class ClusterTrainer(TranspileTrainer): ...@@ -78,7 +78,7 @@ class ClusterTrainer(TranspileTrainer):
optimizer = self.model.optimizer() optimizer = self.model.optimizer()
strategy = self.build_strategy() strategy = self.build_strategy()
optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer = fleet.distributed_optimizer(optimizer, strategy)
optimizer.minimize(self.model.get_cost_op()) optimizer.minimize(self.model.get_avg_cost())
if fleet.is_server(): if fleet.is_server():
context['status'] = 'server_pass' context['status'] = 'server_pass'
......
...@@ -17,14 +17,14 @@ Training use fluid with one node only. ...@@ -17,14 +17,14 @@ Training use fluid with one node only.
""" """
from __future__ import print_function from __future__ import print_function
import logging
import time import time
import logging
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
import numpy as np
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("fluid") logger = logging.getLogger("fluid")
...@@ -36,7 +36,8 @@ class SingleTrainer(TranspileTrainer): ...@@ -36,7 +36,8 @@ class SingleTrainer(TranspileTrainer):
self.regist_context_processor('uninit', self.instance) self.regist_context_processor('uninit', self.instance)
self.regist_context_processor('init_pass', self.init) self.regist_context_processor('init_pass', self.init)
self.regist_context_processor('startup_pass', self.startup) self.regist_context_processor('startup_pass', self.startup)
if envs.get_platform() == "LINUX" and envs.get_global_env("dataset_class", None, "train.reader") != "DataLoader": if envs.get_platform() == "LINUX" and envs.get_global_env("dataset_class", None,
"train.reader") != "DataLoader":
self.regist_context_processor('train_pass', self.dataset_train) self.regist_context_processor('train_pass', self.dataset_train)
else: else:
self.regist_context_processor('train_pass', self.dataloader_train) self.regist_context_processor('train_pass', self.dataloader_train)
...@@ -47,7 +48,7 @@ class SingleTrainer(TranspileTrainer): ...@@ -47,7 +48,7 @@ class SingleTrainer(TranspileTrainer):
def init(self, context): def init(self, context):
self.model.train_net() self.model.train_net()
optimizer = self.model.optimizer() optimizer = self.model.optimizer()
optimizer.minimize((self.model.get_cost_op())) optimizer.minimize((self.model.get_avg_cost()))
self.fetch_vars = [] self.fetch_vars = []
self.fetch_alias = [] self.fetch_alias = []
...@@ -74,7 +75,7 @@ class SingleTrainer(TranspileTrainer): ...@@ -74,7 +75,7 @@ class SingleTrainer(TranspileTrainer):
program = fluid.compiler.CompiledProgram( program = fluid.compiler.CompiledProgram(
fluid.default_main_program()).with_data_parallel( fluid.default_main_program()).with_data_parallel(
loss_name=self.model.get_cost_op().name) loss_name=self.model.get_avg_cost().name)
metrics_varnames = [] metrics_varnames = []
metrics_format = [] metrics_format = []
...@@ -122,8 +123,8 @@ class SingleTrainer(TranspileTrainer): ...@@ -122,8 +123,8 @@ class SingleTrainer(TranspileTrainer):
fetch_info=self.fetch_alias, fetch_info=self.fetch_alias,
print_period=self.fetch_period) print_period=self.fetch_period)
end_time = time.time() end_time = time.time()
times = end_time-begin_time times = end_time - begin_time
print("epoch {} using time {}, speed {:.2f} lines/s".format(i, times, ins/times)) print("epoch {} using time {}, speed {:.2f} lines/s".format(i, times, ins / times))
self.save(i, "train", is_fleet=False) self.save(i, "train", is_fleet=False)
context['status'] = 'infer_pass' context['status'] = 'infer_pass'
......
...@@ -17,17 +17,16 @@ Training use fluid with one node only. ...@@ -17,17 +17,16 @@ Training use fluid with one node only.
""" """
from __future__ import print_function from __future__ import print_function
import logging import logging
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory
from paddle.fluid.incubate.fleet.base.role_maker import PaddleCloudRoleMaker
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.trainers.cluster_trainer import ClusterTrainer from paddlerec.core.trainers.cluster_trainer import ClusterTrainer
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("fluid") logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
......
...@@ -18,12 +18,11 @@ Training use fluid with one node only. ...@@ -18,12 +18,11 @@ Training use fluid with one node only.
from __future__ import print_function from __future__ import print_function
import logging import logging
import paddle.fluid as fluid
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer import numpy as np
import paddle.fluid as fluid
from paddlerec.core.trainers.single_trainer import SingleTrainer from paddlerec.core.trainers.single_trainer import SingleTrainer
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
import numpy as np
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("fluid") logger = logging.getLogger("fluid")
......
...@@ -147,8 +147,8 @@ class TranspileTrainer(Trainer): ...@@ -147,8 +147,8 @@ class TranspileTrainer(Trainer):
if not need_save(epoch_id, save_interval, False): if not need_save(epoch_id, save_interval, False):
return return
# print("save inference model is not supported now.") # print("save inference model is not supported now.")
# return # return
feed_varnames = envs.get_global_env( feed_varnames = envs.get_global_env(
"save.inference.feed_varnames", None, namespace) "save.inference.feed_varnames", None, namespace)
...@@ -248,7 +248,7 @@ class TranspileTrainer(Trainer): ...@@ -248,7 +248,7 @@ class TranspileTrainer(Trainer):
'evaluate_model_path', "", namespace='evaluate'))] 'evaluate_model_path', "", namespace='evaluate'))]
is_return_numpy = envs.get_global_env( is_return_numpy = envs.get_global_env(
'is_return_numpy', True, namespace='evaluate') 'is_return_numpy', True, namespace='evaluate')
for (epoch, model_dir) in model_list: for (epoch, model_dir) in model_list:
print("Begin to infer No.{} model, model_dir: {}".format( print("Begin to infer No.{} model, model_dir: {}".format(
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
from __future__ import print_function from __future__ import print_function
import os import os
import sys
from paddlerec.core.utils.envs import lazy_instance_by_fliename from paddlerec.core.utils.envs import lazy_instance_by_fliename
from paddlerec.core.utils.envs import get_global_env from paddlerec.core.utils.envs import get_global_env
......
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
# limitations under the License. # limitations under the License.
import abc import abc
import time
import datetime import datetime
import time
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -22,7 +22,7 @@ from paddlerec.core.utils import fs as fs ...@@ -22,7 +22,7 @@ from paddlerec.core.utils import fs as fs
from paddlerec.core.utils import util as util from paddlerec.core.utils import util as util
class Dataset(object): class DatasetHolder(object):
""" """
Dataset Base Dataset Base
""" """
...@@ -62,7 +62,7 @@ class Dataset(object): ...@@ -62,7 +62,7 @@ class Dataset(object):
pass pass
class TimeSplitDataset(Dataset): class TimeSplitDatasetHolder(DatasetHolder):
""" """
Dataset with time split dir. root_path/$DAY/$HOUR Dataset with time split dir. root_path/$DAY/$HOUR
""" """
...@@ -142,16 +142,6 @@ class TimeSplitDataset(Dataset): ...@@ -142,16 +142,6 @@ class TimeSplitDataset(Dataset):
data_time = data_time + datetime.timedelta(minutes=self._split_interval) data_time = data_time + datetime.timedelta(minutes=self._split_interval)
return data_file_list return data_file_list
class FluidTimeSplitDataset(TimeSplitDataset):
"""
A Dataset with time split for PaddleFluid
"""
def __init__(self, config):
""" """
TimeSplitDataset.__init__(self, config)
def _alloc_dataset(self, file_list): def _alloc_dataset(self, file_list):
""" """ """ """
dataset = fluid.DatasetFactory().create_dataset(self._config['dataset_type']) dataset = fluid.DatasetFactory().create_dataset(self._config['dataset_type'])
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import sys import sys
......
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os from contextlib import closing
import copy import copy
import sys import os
import socket import socket
from contextlib import closing import sys
global_envs = {} global_envs = {}
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import os import os
from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient
...@@ -28,12 +29,12 @@ class LocalFSClient(object): ...@@ -28,12 +29,12 @@ class LocalFSClient(object):
""" """
Util for local disk file_system io Util for local disk file_system io
""" """
def __init__(self): def __init__(self):
"""R """R
""" """
pass pass
def write(self, content, path, mode): def write(self, content, path, mode):
""" """
write to file write to file
...@@ -43,7 +44,7 @@ class LocalFSClient(object): ...@@ -43,7 +44,7 @@ class LocalFSClient(object):
mode(string): w/a w:clear_write a:append_write mode(string): w/a w:clear_write a:append_write
""" """
temp_dir = os.path.dirname(path) temp_dir = os.path.dirname(path)
if not os.path.exists(temp_dir): if not os.path.exists(temp_dir):
os.makedirs(temp_dir) os.makedirs(temp_dir)
f = open(path, mode) f = open(path, mode)
f.write(content) f.write(content)
...@@ -75,7 +76,7 @@ class LocalFSClient(object): ...@@ -75,7 +76,7 @@ class LocalFSClient(object):
"""R """R
""" """
os.system("rm -rf " + path) os.system("rm -rf " + path)
def is_exist(self, path): def is_exist(self, path):
"""R """R
""" """
...@@ -94,13 +95,14 @@ class FileHandler(object): ...@@ -94,13 +95,14 @@ class FileHandler(object):
""" """
A Smart file handler. auto judge local/afs by path A Smart file handler. auto judge local/afs by path
""" """
def __init__(self, config): def __init__(self, config):
"""R """R
""" """
if 'fs_name' in config: if 'fs_name' in config:
hadoop_home="$HADOOP_HOME" hadoop_home = "$HADOOP_HOME"
hdfs_configs = { hdfs_configs = {
"hadoop.job.ugi": config['fs_ugi'], "hadoop.job.ugi": config['fs_ugi'],
"fs.default.name": config['fs_name'] "fs.default.name": config['fs_name']
} }
self._hdfs_client = HDFSClient(hadoop_home, hdfs_configs) self._hdfs_client = HDFSClient(hadoop_home, hdfs_configs)
...@@ -131,7 +133,8 @@ class FileHandler(object): ...@@ -131,7 +133,8 @@ class FileHandler(object):
if mode.find('a') >= 0: if mode.find('a') >= 0:
org_content = self._hdfs_client.cat(dest_path) org_content = self._hdfs_client.cat(dest_path)
content = content + org_content content = content + org_content
self._local_fs_client.write(content, temp_local_file, mode) #fleet hdfs_client only support upload, so write tmp file self._local_fs_client.write(content, temp_local_file,
mode) # fleet hdfs_client only support upload, so write tmp file
self._hdfs_client.delete(dest_path + ".tmp") self._hdfs_client.delete(dest_path + ".tmp")
self._hdfs_client.upload(dest_path + ".tmp", temp_local_file) self._hdfs_client.upload(dest_path + ".tmp", temp_local_file)
self._hdfs_client.delete(dest_path + ".bak") self._hdfs_client.delete(dest_path + ".bak")
...@@ -139,7 +142,7 @@ class FileHandler(object): ...@@ -139,7 +142,7 @@ class FileHandler(object):
self._hdfs_client.rename(dest_path + ".tmp", dest_path) self._hdfs_client.rename(dest_path + ".tmp", dest_path)
else: else:
self._local_fs_client.write(content, dest_path, mode) self._local_fs_client.write(content, dest_path, mode)
def cat(self, path): def cat(self, path):
"""R """R
""" """
...@@ -148,7 +151,7 @@ class FileHandler(object): ...@@ -148,7 +151,7 @@ class FileHandler(object):
return hdfs_cat return hdfs_cat
else: else:
return self._local_fs_client.cat(path) return self._local_fs_client.cat(path)
def ls(self, path): def ls(self, path):
"""R """R
""" """
...@@ -160,7 +163,7 @@ class FileHandler(object): ...@@ -160,7 +163,7 @@ class FileHandler(object):
files = self._local_fs_client.ls(path) files = self._local_fs_client.ls(path)
files = [path + '/' + fi for fi in files] # absulte path files = [path + '/' + fi for fi in files] # absulte path
return files return files
def cp(self, org_path, dest_path): def cp(self, org_path, dest_path):
"""R """R
""" """
...@@ -170,6 +173,6 @@ class FileHandler(object): ...@@ -170,6 +173,6 @@ class FileHandler(object):
return self._local_fs_client.cp(org_path, dest_path) return self._local_fs_client.cp(org_path, dest_path)
if not org_is_afs and dest_is_afs: if not org_is_afs and dest_is_afs:
return self._hdfs_client.upload(dest_path, org_path) return self._hdfs_client.upload(dest_path, org_path)
if org_is_afs and not dest_is_afs: if org_is_afs and not dest_is_afs:
return self._hdfs_client.download(org_path, dest_path) return self._hdfs_client.download(org_path, dest_path)
print("Not Suppor hdfs cp currently") print("Not Suppor hdfs cp currently")
...@@ -12,16 +12,13 @@ ...@@ -12,16 +12,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import copy
import yaml
class TableMeta(object): class TableMeta(object):
""" """
Simple ParamTable Meta, Contain table_id Simple ParamTable Meta, Contain table_id
""" """
TableId = 1 TableId = 1
@staticmethod @staticmethod
def alloc_new_table(table_id): def alloc_new_table(table_id):
""" """
......
...@@ -12,11 +12,12 @@ ...@@ -12,11 +12,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import datetime
import os import os
import time import time
import datetime
from paddle import fluid from paddle import fluid
from paddlerec.core.utils import fs as fs from paddlerec.core.utils import fs as fs
......
...@@ -153,7 +153,7 @@ class Model(object): ...@@ -153,7 +153,7 @@ class Model(object):
def infer_net(self): def infer_net(self):
pass pass
def get_cost_op(self): def get_avg_cost(self):
return self._cost return self._cost
``` ```
......
...@@ -13,15 +13,9 @@ ...@@ -13,15 +13,9 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
import paddle.fluid as fluid
import paddle.fluid.layers.nn as nn
import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.control_flow as cf
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
...@@ -36,7 +30,7 @@ class Model(ModelBase): ...@@ -36,7 +30,7 @@ class Model(ModelBase):
def train_net(self): def train_net(self):
""" network definition """ """ network definition """
data = fluid.data(name="input", shape=[None, self.max_len], dtype='int64') data = fluid.data(name="input", shape=[None, self.max_len], dtype='int64')
label = fluid.data(name="label", shape=[None, 1], dtype='int64') label = fluid.data(name="label", shape=[None, 1], dtype='int64')
seq_len = fluid.data(name="seq_len", shape=[None], dtype='int64') seq_len = fluid.data(name="seq_len", shape=[None], dtype='int64')
...@@ -60,12 +54,12 @@ class Model(ModelBase): ...@@ -60,12 +54,12 @@ class Model(ModelBase):
prediction = fluid.layers.fc(input=[fc_1], size=self.class_dim, act="softmax") prediction = fluid.layers.fc(input=[fc_1], size=self.class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label) cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label) acc = fluid.layers.accuracy(input=prediction, label=label)
self.cost = avg_cost self.cost = avg_cost
self._metrics["acc"] = acc self._metrics["acc"] = acc
def get_cost_op(self): def get_avg_cost(self):
return self.cost return self.cost
def get_metrics(self): def get_metrics(self):
......
...@@ -12,31 +12,22 @@ ...@@ -12,31 +12,22 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import re
import sys import sys
import collections
import os
import six
import time
import numpy as np
import paddle.fluid as fluid
import paddle
import csv
import io
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
pass pass
def _process_line(self, l): def _process_line(self, l):
l = l.strip().split(" ") l = l.strip().split(" ")
data = l[0:10] data = l[0:10]
seq_len = l[10:11] seq_len = l[10:11]
label = l[11:] label = l[11:]
return data, label, seq_len return data, label, seq_len
def generate_sample(self, line): def generate_sample(self, line):
def data_iter(): def data_iter():
...@@ -47,6 +38,7 @@ class TrainReader(Reader): ...@@ -47,6 +38,7 @@ class TrainReader(Reader):
data = [int(i) for i in data] data = [int(i) for i in data]
label = [int(i) for i in label] label = [int(i) for i in label]
seq_len = [int(i) for i in seq_len] seq_len = [int(i) for i in seq_len]
print >>sys.stderr, str([('data', data), ('label', label), ('seq_len', seq_len)]) print >> sys.stderr, str([('data', data), ('label', label), ('seq_len', seq_len)])
yield [('data', data), ('label', label), ('seq_len', seq_len)] yield [('data', data), ('label', label), ('seq_len', seq_len)]
return data_iter return data_iter
...@@ -12,30 +12,27 @@ ...@@ -12,30 +12,27 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers.nn as nn import paddle.fluid.layers.nn as nn
import paddle.fluid.layers.tensor as tensor import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.control_flow as cf import paddle.fluid.layers.control_flow as cf
from paddlerec.core.model import Model as ModelBase
from paddlerec.core.utils import envs
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
self.cost = None self.cost = None
self.metrics = {} self.metrics = {}
self.vocab_text_size = 11447#envs.get_global_env("vocab_text_size", None, self._namespace) self.vocab_text_size = envs.get_global_env("vocab_text_size", None, self._namespace)
self.vocab_tag_size = 4#envs.get_global_env("vocab_tag_size", None, self._namespace) self.vocab_tag_size = envs.get_global_env("vocab_tag_size", None, self._namespace)
self.emb_dim = 10#envs.get_global_env("emb_dim", None, self._namespace) self.emb_dim = envs.get_global_env("emb_dim", None, self._namespace)
self.hid_dim = 1000#envs.get_global_env("hid_dim", None, self._namespace) self.hid_dim = envs.get_global_env("hid_dim", None, self._namespace)
self.win_size = 5#envs.get_global_env("win_size", None, self._namespace) self.win_size = envs.get_global_env("win_size", None, self._namespace)
self.margin = 0.1#envs.get_global_env("margin", None, self._namespace) self.margin = envs.get_global_env("margin", None, self._namespace)
self.neg_size = 3#envs.get_global_env("neg_size", None, self._namespace) self.neg_size = envs.get_global_env("neg_size", None, self._namespace)
print self.emb_dim
def train_net(self): def train_net(self):
""" network definition """ """ network definition """
...@@ -92,18 +89,16 @@ class Model(ModelBase): ...@@ -92,18 +89,16 @@ class Model(ModelBase):
self.metrics["correct"] = correct self.metrics["correct"] = correct
self.metrics["cos_pos"] = cos_pos self.metrics["cos_pos"] = cos_pos
def get_cost_op(self): def get_avg_cost(self):
return self.cost return self.cost
def get_metrics(self): def get_metrics(self):
return self.metrics return self.metrics
def optimizer(self): def optimizer(self):
learning_rate = 0.01#envs.get_global_env("hyper_parameters.base_lr", None, self._namespace) learning_rate = envs.get_global_env("hyper_parameters.base_lr", None, self._namespace)
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=learning_rate) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=learning_rate)
#sgd_optimizer.minimize(avg_cost)
return sgd_optimizer return sgd_optimizer
def infer_net(self, parameter_list): def infer_net(self, parameter_list):
self.train_net() self.train_net()
...@@ -12,26 +12,19 @@ ...@@ -12,26 +12,19 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import re
import sys import sys
import collections
import os
import six
import time
import numpy as np import numpy as np
import paddle.fluid as fluid
import paddle
import csv
import io
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
pass pass
def _process_line(self, l): def _process_line(self, l):
tag_size = 4 tag_size = 4
neg_size = 3 neg_size = 3
l = l.strip().split(",") l = l.strip().split(",")
...@@ -54,10 +47,7 @@ class TrainReader(Reader): ...@@ -54,10 +47,7 @@ class TrainReader(Reader):
neg_index = rand_i neg_index = rand_i
neg_tag.append(neg_index) neg_tag.append(neg_index)
sum_n += 1 sum_n += 1
# if n > 0 and len(text) > n: return text, pos_tag, neg_tag
# #yield None
# return None, None, None
return text, pos_tag, neg_tag
def generate_sample(self, line): def generate_sample(self, line):
def data_iter(): def data_iter():
...@@ -66,4 +56,5 @@ class TrainReader(Reader): ...@@ -66,4 +56,5 @@ class TrainReader(Reader):
yield None yield None
return return
yield [('text', text), ('pos_tag', pos_tag), ('neg_tag', neg_tag)] yield [('text', text), ('pos_tag', pos_tag), ('neg_tag', neg_tag)]
return data_iter return data_iter
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -25,11 +24,12 @@ class Model(ModelBase): ...@@ -25,11 +24,12 @@ class Model(ModelBase):
def input(self): def input(self):
TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None, self._namespace) TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None, self._namespace)
Neg = envs.get_global_env("hyper_parameters.NEG", None, self._namespace) Neg = envs.get_global_env("hyper_parameters.NEG", None, self._namespace)
self.query = fluid.data(name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0) self.query = fluid.data(name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_pos = fluid.data(name="doc_pos", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0) self.doc_pos = fluid.data(name="doc_pos", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_negs = [fluid.data(name="doc_neg_" + str(i), shape=[-1, TRIGRAM_D], dtype="float32", lod_level=0) for i in range(Neg)] self.doc_negs = [fluid.data(name="doc_neg_" + str(i), shape=[-1, TRIGRAM_D], dtype="float32", lod_level=0) for i
in range(Neg)]
self._data_var.append(self.query) self._data_var.append(self.query)
self._data_var.append(self.doc_pos) self._data_var.append(self.doc_pos)
for input in self.doc_negs: for input in self.doc_negs:
...@@ -38,40 +38,40 @@ class Model(ModelBase): ...@@ -38,40 +38,40 @@ class Model(ModelBase):
if self._platform != "LINUX": if self._platform != "LINUX":
self._data_loader = fluid.io.DataLoader.from_generator( self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False) feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False)
def net(self, is_infer=False): def net(self, is_infer=False):
hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace) hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace)
hidden_acts = envs.get_global_env("hyper_parameters.fc_acts", None, self._namespace) hidden_acts = envs.get_global_env("hyper_parameters.fc_acts", None, self._namespace)
def fc(data, hidden_layers, hidden_acts, names): def fc(data, hidden_layers, hidden_acts, names):
fc_inputs = [data] fc_inputs = [data]
for i in range(len(hidden_layers)): for i in range(len(hidden_layers)):
xavier=fluid.initializer.Xavier(uniform=True, fan_in=fc_inputs[-1].shape[1], fan_out=hidden_layers[i]) xavier = fluid.initializer.Xavier(uniform=True, fan_in=fc_inputs[-1].shape[1], fan_out=hidden_layers[i])
out = fluid.layers.fc(input=fc_inputs[-1], out = fluid.layers.fc(input=fc_inputs[-1],
size=hidden_layers[i], size=hidden_layers[i],
act=hidden_acts[i], act=hidden_acts[i],
param_attr=xavier, param_attr=xavier,
bias_attr=xavier, bias_attr=xavier,
name=names[i]) name=names[i])
fc_inputs.append(out) fc_inputs.append(out)
return fc_inputs[-1] return fc_inputs[-1]
query_fc = fc(self.query, hidden_layers, hidden_acts, ['query_l1', 'query_l2', 'query_l3']) query_fc = fc(self.query, hidden_layers, hidden_acts, ['query_l1', 'query_l2', 'query_l3'])
doc_pos_fc = fc(self.doc_pos, hidden_layers, hidden_acts, ['doc_pos_l1', 'doc_pos_l2', 'doc_pos_l3']) doc_pos_fc = fc(self.doc_pos, hidden_layers, hidden_acts, ['doc_pos_l1', 'doc_pos_l2', 'doc_pos_l3'])
self.R_Q_D_p = fluid.layers.cos_sim(query_fc, doc_pos_fc) self.R_Q_D_p = fluid.layers.cos_sim(query_fc, doc_pos_fc)
if is_infer: if is_infer:
return return
R_Q_D_ns = [] R_Q_D_ns = []
for i, doc_neg in enumerate(self.doc_negs): for i, doc_neg in enumerate(self.doc_negs):
doc_neg_fc_i = fc(doc_neg, hidden_layers, hidden_acts, ['doc_neg_l1_' + str(i), 'doc_neg_l2_' + str(i), 'doc_neg_l3_' + str(i)]) doc_neg_fc_i = fc(doc_neg, hidden_layers, hidden_acts,
['doc_neg_l1_' + str(i), 'doc_neg_l2_' + str(i), 'doc_neg_l3_' + str(i)])
R_Q_D_ns.append(fluid.layers.cos_sim(query_fc, doc_neg_fc_i)) R_Q_D_ns.append(fluid.layers.cos_sim(query_fc, doc_neg_fc_i))
concat_Rs = fluid.layers.concat(input=[self.R_Q_D_p] + R_Q_D_ns, axis=-1) concat_Rs = fluid.layers.concat(input=[self.R_Q_D_p] + R_Q_D_ns, axis=-1)
prob = fluid.layers.softmax(concat_Rs, axis=1) prob = fluid.layers.softmax(concat_Rs, axis=1)
hit_prob = fluid.layers.slice(prob, axes=[0,1], starts=[0,0], ends=[4, 1]) hit_prob = fluid.layers.slice(prob, axes=[0, 1], starts=[0, 0], ends=[4, 1])
loss = -fluid.layers.reduce_sum(fluid.layers.log(hit_prob)) loss = -fluid.layers.reduce_sum(fluid.layers.log(hit_prob))
self.avg_cost = fluid.layers.mean(x=loss) self.avg_cost = fluid.layers.mean(x=loss)
...@@ -101,10 +101,10 @@ class Model(ModelBase): ...@@ -101,10 +101,10 @@ class Model(ModelBase):
self.doc_pos = fluid.data(name="doc_pos", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0) self.doc_pos = fluid.data(name="doc_pos", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self._infer_data_var = [self.query, self.doc_pos] self._infer_data_var = [self.query, self.doc_pos]
self._infer_data_loader = fluid.io.DataLoader.from_generator( self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False) feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
def infer_net(self): def infer_net(self):
self.infer_input() self.infer_input()
self.net(is_infer=True) self.net(is_infer=True)
self.infer_results() self.infer_results()
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class EvaluateReader(Reader): class EvaluateReader(Reader):
......
...@@ -11,10 +11,10 @@ ...@@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
...@@ -37,7 +37,7 @@ class TrainReader(Reader): ...@@ -37,7 +37,7 @@ class TrainReader(Reader):
neg_docs = [] neg_docs = []
for i in range(len(features) - 2): for i in range(len(features) - 2):
feature_names.append('doc_neg_' + str(i)) feature_names.append('doc_neg_' + str(i))
neg_docs.append(map(float, features[i+2].split(','))) neg_docs.append(map(float, features[i + 2].split(',')))
yield zip(feature_names, [query] + [pos_doc] + neg_docs) yield zip(feature_names, [query] + [pos_doc] + neg_docs)
......
#! /bin/bash #! /bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e set -e
echo "begin to prepare data" echo "begin to prepare data"
......
...@@ -11,18 +11,15 @@ ...@@ -11,18 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import io
import copy
import random
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
class EvaluateReader(Reader): class EvaluateReader(Reader):
def init(self): def init(self):
self.query_slots = envs.get_global_env("hyper_parameters.query_slots", None, "train.model") self.query_slots = envs.get_global_env("hyper_parameters.query_slots", None, "train.model")
self.title_slots = envs.get_global_env("hyper_parameters.title_slots", None, "train.model") self.title_slots = envs.get_global_env("hyper_parameters.title_slots", None, "train.model")
self.all_slots = [] self.all_slots = []
for i in range(self.query_slots): for i in range(self.query_slots):
...@@ -52,6 +49,7 @@ class EvaluateReader(Reader): ...@@ -52,6 +49,7 @@ class EvaluateReader(Reader):
if visit: if visit:
self._all_slots_dict[slot][0] = False self._all_slots_dict[slot][0] = False
else: else:
output[index][1].append(padding) output[index][1].append(padding)
yield output yield output
return data_iter return data_iter
...@@ -14,10 +14,12 @@ ...@@ -14,10 +14,12 @@
import random import random
class Dataset: class Dataset:
def __init__(self): def __init__(self):
pass pass
class SyntheticDataset(Dataset): class SyntheticDataset(Dataset):
def __init__(self, sparse_feature_dim, query_slot_num, title_slot_num, dataset_size=10000): def __init__(self, sparse_feature_dim, query_slot_num, title_slot_num, dataset_size=10000):
# ids are randomly generated # ids are randomly generated
...@@ -39,7 +41,7 @@ class SyntheticDataset(Dataset): ...@@ -39,7 +41,7 @@ class SyntheticDataset(Dataset):
for i in range(self.query_slot_num): for i in range(self.query_slot_num):
qslot = generate_ids(self.ids_per_slot, qslot = generate_ids(self.ids_per_slot,
self.sparse_feature_dim) self.sparse_feature_dim)
qslot = [str(fea) + ':' + str(i) for fea in qslot] qslot = [str(fea) + ':' + str(i) for fea in qslot]
query_slots += qslot query_slots += qslot
for i in range(self.title_slot_num): for i in range(self.title_slot_num):
pt_slot = generate_ids(self.ids_per_slot, pt_slot = generate_ids(self.ids_per_slot,
...@@ -50,7 +52,8 @@ class SyntheticDataset(Dataset): ...@@ -50,7 +52,8 @@ class SyntheticDataset(Dataset):
for i in range(self.title_slot_num): for i in range(self.title_slot_num):
nt_slot = generate_ids(self.ids_per_slot, nt_slot = generate_ids(self.ids_per_slot,
self.sparse_feature_dim) self.sparse_feature_dim)
nt_slot = [str(fea) + ':' + str(i + self.query_slot_num + self.title_slot_num) for fea in nt_slot] nt_slot = [str(fea) + ':' + str(i + self.query_slot_num + self.title_slot_num) for fea in
nt_slot]
neg_title_slots += nt_slot neg_title_slots += nt_slot
yield query_slots + pos_title_slots + neg_title_slots yield query_slots + pos_title_slots + neg_title_slots
else: else:
...@@ -67,6 +70,7 @@ class SyntheticDataset(Dataset): ...@@ -67,6 +70,7 @@ class SyntheticDataset(Dataset):
def test(self): def test(self):
return self._reader_creator(False) return self._reader_creator(False)
if __name__ == '__main__': if __name__ == '__main__':
sparse_feature_dim = 1000001 sparse_feature_dim = 1000001
query_slots = 1 query_slots = 1
...@@ -75,7 +79,7 @@ if __name__ == '__main__': ...@@ -75,7 +79,7 @@ if __name__ == '__main__':
dataset = SyntheticDataset(sparse_feature_dim, query_slots, title_slots, dataset_size) dataset = SyntheticDataset(sparse_feature_dim, query_slots, title_slots, dataset_size)
train_reader = dataset.train() train_reader = dataset.train()
test_reader = dataset.test() test_reader = dataset.test()
with open("data/train/train.txt", 'w') as fout: with open("data/train/train.txt", 'w') as fout:
for data in train_reader(): for data in train_reader():
fout.write(' '.join(data)) fout.write(' '.join(data))
......
...@@ -12,16 +12,14 @@ ...@@ -12,16 +12,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import math
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle.fluid.layers.tensor as tensor import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.control_flow as cf import paddle.fluid.layers.control_flow as cf
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
class BowEncoder(object): class BowEncoder(object):
""" bow-encoder """ """ bow-encoder """
...@@ -97,13 +95,14 @@ class SimpleEncoderFactory(object): ...@@ -97,13 +95,14 @@ class SimpleEncoderFactory(object):
rnn_encode = GrnnEncoder(hidden_size=enc_hid_size) rnn_encode = GrnnEncoder(hidden_size=enc_hid_size)
return rnn_encode return rnn_encode
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
self.init_config() self.init_config()
def init_config(self): def init_config(self):
self._fetch_interval = 1 self._fetch_interval = 1
query_encoder = envs.get_global_env("hyper_parameters.query_encoder", None, self._namespace) query_encoder = envs.get_global_env("hyper_parameters.query_encoder", None, self._namespace)
title_encoder = envs.get_global_env("hyper_parameters.title_encoder", None, self._namespace) title_encoder = envs.get_global_env("hyper_parameters.title_encoder", None, self._namespace)
query_encode_dim = envs.get_global_env("hyper_parameters.query_encode_dim", None, self._namespace) query_encode_dim = envs.get_global_env("hyper_parameters.query_encode_dim", None, self._namespace)
...@@ -115,19 +114,19 @@ class Model(ModelBase): ...@@ -115,19 +114,19 @@ class Model(ModelBase):
factory.create(query_encoder, query_encode_dim) factory.create(query_encoder, query_encode_dim)
for i in range(query_slots) for i in range(query_slots)
] ]
self.title_encoders = [ self.title_encoders = [
factory.create(title_encoder, title_encode_dim) factory.create(title_encoder, title_encode_dim)
for i in range(title_slots) for i in range(title_slots)
] ]
self.emb_size = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace) self.emb_size = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
self.emb_dim = envs.get_global_env("hyper_parameters.embedding_dim", None, self._namespace) self.emb_dim = envs.get_global_env("hyper_parameters.embedding_dim", None, self._namespace)
self.emb_shape = [self.emb_size, self.emb_dim] self.emb_shape = [self.emb_size, self.emb_dim]
self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, self._namespace) self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, self._namespace)
self.margin = 0.1 self.margin = 0.1
def input(self, is_train=True): def input(self, is_train=True):
self.q_slots = [ self.q_slots = [
fluid.data( fluid.data(
name="%d" % i, shape=[None, 1], lod_level=1, dtype='int64') name="%d" % i, shape=[None, 1], lod_level=1, dtype='int64')
for i in range(len(self.query_encoders)) for i in range(len(self.query_encoders))
...@@ -138,22 +137,23 @@ class Model(ModelBase): ...@@ -138,22 +137,23 @@ class Model(ModelBase):
for i in range(len(self.title_encoders)) for i in range(len(self.title_encoders))
] ]
if is_train == False: if is_train == False:
return self.q_slots + self.pt_slots return self.q_slots + self.pt_slots
self.nt_slots = [ self.nt_slots = [
fluid.data( fluid.data(
name="%d" % (i + len(self.query_encoders) + len(self.title_encoders)), shape=[None, 1], lod_level=1, dtype='int64') name="%d" % (i + len(self.query_encoders) + len(self.title_encoders)), shape=[None, 1], lod_level=1,
dtype='int64')
for i in range(len(self.title_encoders)) for i in range(len(self.title_encoders))
] ]
return self.q_slots + self.pt_slots + self.nt_slots return self.q_slots + self.pt_slots + self.nt_slots
def train_input(self): def train_input(self):
res = self.input() res = self.input()
self._data_var = res self._data_var = res
use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader", False, self._namespace) use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader", False, self._namespace)
if self._platform != "LINUX" or use_dataloader: if self._platform != "LINUX" or use_dataloader:
self._data_loader = fluid.io.DataLoader.from_generator( self._data_loader = fluid.io.DataLoader.from_generator(
...@@ -161,15 +161,15 @@ class Model(ModelBase): ...@@ -161,15 +161,15 @@ class Model(ModelBase):
def get_acc(self, x, y): def get_acc(self, x, y):
less = tensor.cast(cf.less_than(x, y), dtype='float32') less = tensor.cast(cf.less_than(x, y), dtype='float32')
label_ones = fluid.layers.fill_constant_batch_size_like( label_ones = fluid.layers.fill_constant_batch_size_like(
input=x, dtype='float32', shape=[-1, 1], value=1.0) input=x, dtype='float32', shape=[-1, 1], value=1.0)
correct = fluid.layers.reduce_sum(less) correct = fluid.layers.reduce_sum(less)
total = fluid.layers.reduce_sum(label_ones) total = fluid.layers.reduce_sum(label_ones)
acc = fluid.layers.elementwise_div(correct, total) acc = fluid.layers.elementwise_div(correct, total)
return acc return acc
def net(self): def net(self):
q_embs = [ q_embs = [
fluid.embedding( fluid.embedding(
input=query, size=self.emb_shape, param_attr="emb") input=query, size=self.emb_shape, param_attr="emb")
for query in self.q_slots for query in self.q_slots
...@@ -184,8 +184,8 @@ class Model(ModelBase): ...@@ -184,8 +184,8 @@ class Model(ModelBase):
input=title, size=self.emb_shape, param_attr="emb") input=title, size=self.emb_shape, param_attr="emb")
for title in self.nt_slots for title in self.nt_slots
] ]
# encode each embedding field with encoder # encode each embedding field with encoder
q_encodes = [ q_encodes = [
self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs) self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs)
] ]
...@@ -201,7 +201,7 @@ class Model(ModelBase): ...@@ -201,7 +201,7 @@ class Model(ModelBase):
pt_concat = fluid.layers.concat(pt_encodes) pt_concat = fluid.layers.concat(pt_encodes)
nt_concat = fluid.layers.concat(nt_encodes) nt_concat = fluid.layers.concat(nt_encodes)
# projection of hidden layer # projection of hidden layer
q_hid = fluid.layers.fc(q_concat, q_hid = fluid.layers.fc(q_concat,
size=self.hidden_size, size=self.hidden_size,
param_attr='q_fc.w', param_attr='q_fc.w',
...@@ -219,7 +219,7 @@ class Model(ModelBase): ...@@ -219,7 +219,7 @@ class Model(ModelBase):
cos_pos = fluid.layers.cos_sim(q_hid, pt_hid) cos_pos = fluid.layers.cos_sim(q_hid, pt_hid)
cos_neg = fluid.layers.cos_sim(q_hid, nt_hid) cos_neg = fluid.layers.cos_sim(q_hid, nt_hid)
# pairwise hinge_loss # pairwise hinge_loss
loss_part1 = fluid.layers.elementwise_sub( loss_part1 = fluid.layers.elementwise_sub(
tensor.fill_constant_batch_size_like( tensor.fill_constant_batch_size_like(
input=cos_pos, input=cos_pos,
...@@ -236,7 +236,7 @@ class Model(ModelBase): ...@@ -236,7 +236,7 @@ class Model(ModelBase):
loss_part2) loss_part2)
self.avg_cost = fluid.layers.mean(loss_part3) self.avg_cost = fluid.layers.mean(loss_part3)
self.acc = self.get_acc(cos_neg, cos_pos) self.acc = self.get_acc(cos_neg, cos_pos)
def avg_loss(self): def avg_loss(self):
self._cost = self.avg_cost self._cost = self.avg_cost
...@@ -253,19 +253,19 @@ class Model(ModelBase): ...@@ -253,19 +253,19 @@ class Model(ModelBase):
def optimizer(self): def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.learning_rate", None, self._namespace) learning_rate = envs.get_global_env("hyper_parameters.learning_rate", None, self._namespace)
optimizer = fluid.optimizer.Adam(learning_rate=learning_rate) optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
return optimizer return optimizer
def infer_input(self): def infer_input(self):
res = self.input(is_train=False) res = self.input(is_train=False)
self._infer_data_var = res self._infer_data_var = res
self._infer_data_loader = fluid.io.DataLoader.from_generator( self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False) feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
def infer_net(self): def infer_net(self):
self.infer_input() self.infer_input()
# lookup embedding for each slot # lookup embedding for each slot
q_embs = [ q_embs = [
fluid.embedding( fluid.embedding(
input=query, size=self.emb_shape, param_attr="emb") input=query, size=self.emb_shape, param_attr="emb")
...@@ -276,14 +276,14 @@ class Model(ModelBase): ...@@ -276,14 +276,14 @@ class Model(ModelBase):
input=title, size=self.emb_shape, param_attr="emb") input=title, size=self.emb_shape, param_attr="emb")
for title in self.pt_slots for title in self.pt_slots
] ]
# encode each embedding field with encoder # encode each embedding field with encoder
q_encodes = [ q_encodes = [
self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs) self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs)
] ]
pt_encodes = [ pt_encodes = [
self.title_encoders[i].forward(emb) for i, emb in enumerate(pt_embs) self.title_encoders[i].forward(emb) for i, emb in enumerate(pt_embs)
] ]
# concat multi view for query, pos_title, neg_title # concat multi view for query, pos_title, neg_title
q_concat = fluid.layers.concat(q_encodes) q_concat = fluid.layers.concat(q_encodes)
pt_concat = fluid.layers.concat(pt_encodes) pt_concat = fluid.layers.concat(pt_encodes)
# projection of hidden layer # projection of hidden layer
......
...@@ -11,18 +11,15 @@ ...@@ -11,18 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import io
import copy
import random
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
self.query_slots = envs.get_global_env("hyper_parameters.query_slots", None, "train.model") self.query_slots = envs.get_global_env("hyper_parameters.query_slots", None, "train.model")
self.title_slots = envs.get_global_env("hyper_parameters.title_slots", None, "train.model") self.title_slots = envs.get_global_env("hyper_parameters.title_slots", None, "train.model")
self.all_slots = [] self.all_slots = []
for i in range(self.query_slots): for i in range(self.query_slots):
...@@ -55,6 +52,7 @@ class TrainReader(Reader): ...@@ -55,6 +52,7 @@ class TrainReader(Reader):
if visit: if visit:
self._all_slots_dict[slot][0] = False self._all_slots_dict[slot][0] = False
else: else:
output[index][1].append(padding) output[index][1].append(padding)
yield output yield output
return data_iter return data_iter
...@@ -13,19 +13,19 @@ ...@@ -13,19 +13,19 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
from collections import defaultdict from collections import defaultdict
import numpy as np
from paddlerec.core.reader import Reader
class EvaluateReader(Reader): class EvaluateReader(Reader):
def init(self): def init(self):
all_field_id = ['101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124', '125', '126', '127', '128', '129', all_field_id = ['101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124', '125', '126', '127', '128',
'129',
'205', '206', '207', '210', '216', '508', '509', '702', '853', '301'] '205', '206', '207', '210', '216', '508', '509', '702', '853', '301']
self.all_field_id_dict = defaultdict(int) self.all_field_id_dict = defaultdict(int)
for i,field_id in enumerate(all_field_id): for i, field_id in enumerate(all_field_id):
self.all_field_id_dict[field_id] = [False,i] self.all_field_id_dict[field_id] = [False, i]
def generate_sample(self, line): def generate_sample(self, line):
""" """
...@@ -39,25 +39,26 @@ class EvaluateReader(Reader): ...@@ -39,25 +39,26 @@ class EvaluateReader(Reader):
features = line.strip().split(',') features = line.strip().split(',')
ctr = int(features[1]) ctr = int(features[1])
cvr = int(features[2]) cvr = int(features[2])
padding = 0 padding = 0
output = [(field_id,[]) for field_id in self.all_field_id_dict] output = [(field_id, []) for field_id in self.all_field_id_dict]
for elem in features[4:]: for elem in features[4:]:
field_id,feat_id = elem.strip().split(':') field_id, feat_id = elem.strip().split(':')
if field_id not in self.all_field_id_dict: if field_id not in self.all_field_id_dict:
continue continue
self.all_field_id_dict[field_id][0] = True self.all_field_id_dict[field_id][0] = True
index = self.all_field_id_dict[field_id][1] index = self.all_field_id_dict[field_id][1]
output[index][1].append(int(feat_id)) output[index][1].append(int(feat_id))
for field_id in self.all_field_id_dict: for field_id in self.all_field_id_dict:
visited,index = self.all_field_id_dict[field_id] visited, index = self.all_field_id_dict[field_id]
if visited: if visited:
self.all_field_id_dict[field_id][0] = False self.all_field_id_dict[field_id][0] = False
else: else:
output[index][1].append(padding) output[index][1].append(padding)
output.append(('ctr', [ctr])) output.append(('ctr', [ctr]))
output.append(('cvr', [cvr])) output.append(('cvr', [cvr]))
yield output yield output
return reader return reader
...@@ -11,21 +11,22 @@ ...@@ -11,21 +11,22 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
from collections import defaultdict from collections import defaultdict
import numpy as np
from paddlerec.core.reader import Reader
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
all_field_id = ['101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124', '125', '126', '127', '128', '129', all_field_id = ['101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124', '125', '126', '127', '128',
'129',
'205', '206', '207', '210', '216', '508', '509', '702', '853', '301'] '205', '206', '207', '210', '216', '508', '509', '702', '853', '301']
self.all_field_id_dict = defaultdict(int) self.all_field_id_dict = defaultdict(int)
for i,field_id in enumerate(all_field_id): for i, field_id in enumerate(all_field_id):
self.all_field_id_dict[field_id] = [False,i] self.all_field_id_dict[field_id] = [False, i]
def generate_sample(self, line): def generate_sample(self, line):
""" """
...@@ -37,30 +38,31 @@ class TrainReader(Reader): ...@@ -37,30 +38,31 @@ class TrainReader(Reader):
This function needs to be implemented by the user, based on data format This function needs to be implemented by the user, based on data format
""" """
features = line.strip().split(',') features = line.strip().split(',')
#ctr = list(map(int, features[1])) # ctr = list(map(int, features[1]))
#cvr = list(map(int, features[2])) # cvr = list(map(int, features[2]))
ctr = int(features[1]) ctr = int(features[1])
cvr = int(features[2]) cvr = int(features[2])
padding = 0 padding = 0
output = [(field_id,[]) for field_id in self.all_field_id_dict] output = [(field_id, []) for field_id in self.all_field_id_dict]
for elem in features[4:]: for elem in features[4:]:
field_id,feat_id = elem.strip().split(':') field_id, feat_id = elem.strip().split(':')
if field_id not in self.all_field_id_dict: if field_id not in self.all_field_id_dict:
continue continue
self.all_field_id_dict[field_id][0] = True self.all_field_id_dict[field_id][0] = True
index = self.all_field_id_dict[field_id][1] index = self.all_field_id_dict[field_id][1]
#feat_id = list(map(int, feat_id)) # feat_id = list(map(int, feat_id))
output[index][1].append(int(feat_id)) output[index][1].append(int(feat_id))
for field_id in self.all_field_id_dict: for field_id in self.all_field_id_dict:
visited,index = self.all_field_id_dict[field_id] visited, index = self.all_field_id_dict[field_id]
if visited: if visited:
self.all_field_id_dict[field_id][0] = False self.all_field_id_dict[field_id][0] = False
else: else:
output[index][1].append(padding) output[index][1].append(padding)
output.append(('ctr', [ctr])) output.append(('ctr', [ctr]))
output.append(('cvr', [cvr])) output.append(('cvr', [cvr]))
yield output yield output
return reader return reader
...@@ -12,83 +12,84 @@ ...@@ -12,83 +12,84 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
import numpy as np
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
def fc(self,tag, data, out_dim, active='prelu'): def fc(self, tag, data, out_dim, active='prelu'):
init_stddev = 1.0 init_stddev = 1.0
scales = 1.0 / np.sqrt(data.shape[1]) scales = 1.0 / np.sqrt(data.shape[1])
p_attr = fluid.param_attr.ParamAttr(name='%s_weight' % tag, p_attr = fluid.param_attr.ParamAttr(name='%s_weight' % tag,
initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=init_stddev * scales)) initializer=fluid.initializer.NormalInitializer(loc=0.0,
scale=init_stddev * scales))
b_attr = fluid.ParamAttr(name='%s_bias' % tag, initializer=fluid.initializer.Constant(0.1)) b_attr = fluid.ParamAttr(name='%s_bias' % tag, initializer=fluid.initializer.Constant(0.1))
out = fluid.layers.fc(input=data, out = fluid.layers.fc(input=data,
size=out_dim, size=out_dim,
act=active, act=active,
param_attr=p_attr, param_attr=p_attr,
bias_attr =b_attr, bias_attr=b_attr,
name=tag) name=tag)
return out return out
def input_data(self): def input_data(self):
sparse_input_ids = [ sparse_input_ids = [
fluid.data(name="field_" + str(i), shape=[-1, 1], dtype="int64", lod_level=1) for i in range(0,23) fluid.data(name="field_" + str(i), shape=[-1, 1], dtype="int64", lod_level=1) for i in range(0, 23)
] ]
label_ctr = fluid.data(name="ctr", shape=[-1, 1], dtype="int64") label_ctr = fluid.data(name="ctr", shape=[-1, 1], dtype="int64")
label_cvr = fluid.data(name="cvr", shape=[-1, 1], dtype="int64") label_cvr = fluid.data(name="cvr", shape=[-1, 1], dtype="int64")
inputs = sparse_input_ids + [label_ctr] + [label_cvr] inputs = sparse_input_ids + [label_ctr] + [label_cvr]
self._data_var.extend(inputs) self._data_var.extend(inputs)
return inputs return inputs
def net(self, inputs, is_infer=False): def net(self, inputs, is_infer=False):
vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace) vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace)
embed_size = envs.get_global_env("hyper_parameters.embed_size", None, self._namespace) embed_size = envs.get_global_env("hyper_parameters.embed_size", None, self._namespace)
emb = [] emb = []
for data in inputs[0:-2]: for data in inputs[0:-2]:
feat_emb = fluid.embedding(input=data, feat_emb = fluid.embedding(input=data,
size=[vocab_size, embed_size], size=[vocab_size, embed_size],
param_attr=fluid.ParamAttr(name='dis_emb', param_attr=fluid.ParamAttr(name='dis_emb',
learning_rate=5, learning_rate=5,
initializer=fluid.initializer.Xavier(fan_in=embed_size,fan_out=embed_size) initializer=fluid.initializer.Xavier(
), fan_in=embed_size, fan_out=embed_size)
is_sparse=True) ),
field_emb = fluid.layers.sequence_pool(input=feat_emb,pool_type='sum') is_sparse=True)
field_emb = fluid.layers.sequence_pool(input=feat_emb, pool_type='sum')
emb.append(field_emb) emb.append(field_emb)
concat_emb = fluid.layers.concat(emb, axis=1) concat_emb = fluid.layers.concat(emb, axis=1)
# ctr # ctr
active = 'relu' active = 'relu'
ctr_fc1 = self.fc('ctr_fc1', concat_emb, 200, active) ctr_fc1 = self.fc('ctr_fc1', concat_emb, 200, active)
ctr_fc2 = self.fc('ctr_fc2', ctr_fc1, 80, active) ctr_fc2 = self.fc('ctr_fc2', ctr_fc1, 80, active)
ctr_out = self.fc('ctr_out', ctr_fc2, 2, 'softmax') ctr_out = self.fc('ctr_out', ctr_fc2, 2, 'softmax')
# cvr # cvr
cvr_fc1 = self.fc('cvr_fc1', concat_emb, 200, active) cvr_fc1 = self.fc('cvr_fc1', concat_emb, 200, active)
cvr_fc2 = self.fc('cvr_fc2', cvr_fc1, 80, active) cvr_fc2 = self.fc('cvr_fc2', cvr_fc1, 80, active)
cvr_out = self.fc('cvr_out', cvr_fc2, 2,'softmax') cvr_out = self.fc('cvr_out', cvr_fc2, 2, 'softmax')
ctr_clk = inputs[-2] ctr_clk = inputs[-2]
ctcvr_buy = inputs[-1] ctcvr_buy = inputs[-1]
ctr_prop_one = fluid.layers.slice(ctr_out, axes=[1], starts=[1], ends=[2]) ctr_prop_one = fluid.layers.slice(ctr_out, axes=[1], starts=[1], ends=[2])
cvr_prop_one = fluid.layers.slice(cvr_out, axes=[1], starts=[1], ends=[2]) cvr_prop_one = fluid.layers.slice(cvr_out, axes=[1], starts=[1], ends=[2])
ctcvr_prop_one = fluid.layers.elementwise_mul(ctr_prop_one, cvr_prop_one) ctcvr_prop_one = fluid.layers.elementwise_mul(ctr_prop_one, cvr_prop_one)
ctcvr_prop = fluid.layers.concat(input=[1-ctcvr_prop_one,ctcvr_prop_one], axis = 1) ctcvr_prop = fluid.layers.concat(input=[1 - ctcvr_prop_one, ctcvr_prop_one], axis=1)
auc_ctr, batch_auc_ctr, auc_states_ctr = fluid.layers.auc(input=ctr_out, label=ctr_clk) auc_ctr, batch_auc_ctr, auc_states_ctr = fluid.layers.auc(input=ctr_out, label=ctr_clk)
auc_ctcvr, batch_auc_ctcvr, auc_states_ctcvr = fluid.layers.auc(input=ctcvr_prop, label=ctcvr_buy) auc_ctcvr, batch_auc_ctcvr, auc_states_ctcvr = fluid.layers.auc(input=ctcvr_prop, label=ctcvr_buy)
...@@ -98,27 +99,23 @@ class Model(ModelBase): ...@@ -98,27 +99,23 @@ class Model(ModelBase):
self._infer_results["AUC_ctcvr"] = auc_ctcvr self._infer_results["AUC_ctcvr"] = auc_ctcvr
return return
loss_ctr = fluid.layers.cross_entropy(input=ctr_out, label=ctr_clk) loss_ctr = fluid.layers.cross_entropy(input=ctr_out, label=ctr_clk)
loss_ctcvr = fluid.layers.cross_entropy(input=ctcvr_prop, label=ctcvr_buy) loss_ctcvr = fluid.layers.cross_entropy(input=ctcvr_prop, label=ctcvr_buy)
cost = loss_ctr + loss_ctcvr cost = loss_ctr + loss_ctcvr
avg_cost = fluid.layers.mean(cost) avg_cost = fluid.layers.mean(cost)
self._cost = avg_cost self._cost = avg_cost
self._metrics["AUC_ctr"] = auc_ctr self._metrics["AUC_ctr"] = auc_ctr
self._metrics["BATCH_AUC_ctr"] = batch_auc_ctr self._metrics["BATCH_AUC_ctr"] = batch_auc_ctr
self._metrics["AUC_ctcvr"] = auc_ctcvr self._metrics["AUC_ctcvr"] = auc_ctcvr
self._metrics["BATCH_AUC_ctcvr"] = batch_auc_ctcvr self._metrics["BATCH_AUC_ctcvr"] = batch_auc_ctcvr
def train_net(self): def train_net(self):
input_data = self.input_data() input_data = self.input_data()
self.net(input_data) self.net(input_data)
def infer_net(self): def infer_net(self):
self._infer_data_var = self.input_data() self._infer_data_var = self.input_data()
self._infer_data_loader = fluid.io.DataLoader.from_generator( self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False) feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
self.net(self._infer_data_var, is_infer=True) self.net(self._infer_data_var, is_infer=True)
...@@ -11,11 +11,10 @@ ...@@ -11,11 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
class EvaluateReader(Reader): class EvaluateReader(Reader):
......
...@@ -11,11 +11,10 @@ ...@@ -11,11 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
class TrainReader(Reader): class TrainReader(Reader):
...@@ -44,8 +43,8 @@ class TrainReader(Reader): ...@@ -44,8 +43,8 @@ class TrainReader(Reader):
label_marital = [1, 0] label_marital = [1, 0]
elif int(l[0]) == 1: elif int(l[0]) == 1:
label_marital = [0, 1] label_marital = [0, 1]
#label_income = np.array(label_income) # label_income = np.array(label_income)
#label_marital = np.array(label_marital) # label_marital = np.array(label_marital)
feature_name = ["input", "label_income", "label_marital"] feature_name = ["input", "label_income", "label_marital"]
yield zip(feature_name, [data] + [label_income] + [label_marital]) yield zip(feature_name, [data] + [label_income] + [label_marital])
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -37,22 +36,21 @@ class Model(ModelBase): ...@@ -37,22 +36,21 @@ class Model(ModelBase):
if is_infer: if is_infer:
self._infer_data_var = [input_data, label_income, label_marital] self._infer_data_var = [input_data, label_income, label_marital]
self._infer_data_loader = fluid.io.DataLoader.from_generator( self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False) feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
self._data_var.extend([input_data, label_income, label_marital]) self._data_var.extend([input_data, label_income, label_marital])
# f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper # f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper
expert_outputs = [] expert_outputs = []
for i in range(0, expert_num): for i in range(0, expert_num):
expert_output = fluid.layers.fc(input=input_data, expert_output = fluid.layers.fc(input=input_data,
size=expert_size, size=expert_size,
act='relu', act='relu',
bias_attr=fluid.ParamAttr(learning_rate=1.0), bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='expert_' + str(i)) name='expert_' + str(i))
expert_outputs.append(expert_output) expert_outputs.append(expert_output)
expert_concat = fluid.layers.concat(expert_outputs, axis=1) expert_concat = fluid.layers.concat(expert_outputs, axis=1)
expert_concat = fluid.layers.reshape(expert_concat,[-1, expert_num, expert_size]) expert_concat = fluid.layers.reshape(expert_concat, [-1, expert_num, expert_size])
# g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper # g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper
output_layers = [] output_layers = []
for i in range(0, gate_num): for i in range(0, gate_num):
...@@ -62,52 +60,53 @@ class Model(ModelBase): ...@@ -62,52 +60,53 @@ class Model(ModelBase):
bias_attr=fluid.ParamAttr(learning_rate=1.0), bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='gate_' + str(i)) name='gate_' + str(i))
# f^{k}(x) = sum_{i=1}^{n}(g^{k}(x)_{i} * f_{i}(x)) # f^{k}(x) = sum_{i=1}^{n}(g^{k}(x)_{i} * f_{i}(x))
cur_gate_expert = fluid.layers.elementwise_mul(expert_concat, cur_gate, axis=0) cur_gate_expert = fluid.layers.elementwise_mul(expert_concat, cur_gate, axis=0)
cur_gate_expert = fluid.layers.reduce_sum(cur_gate_expert, dim=1) cur_gate_expert = fluid.layers.reduce_sum(cur_gate_expert, dim=1)
# Build tower layer # Build tower layer
cur_tower = fluid.layers.fc(input=cur_gate_expert, cur_tower = fluid.layers.fc(input=cur_gate_expert,
size=tower_size, size=tower_size,
act='relu', act='relu',
name='task_layer_' + str(i)) name='task_layer_' + str(i))
out = fluid.layers.fc(input=cur_tower, out = fluid.layers.fc(input=cur_tower,
size=2, size=2,
act='softmax', act='softmax',
name='out_' + str(i)) name='out_' + str(i))
output_layers.append(out) output_layers.append(out)
pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15) pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15)
pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15) pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15)
label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2]) label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2])
label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2]) label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2])
auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income, label=fluid.layers.cast(x=label_income_1, dtype='int64')) auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income,
auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital, label=fluid.layers.cast(x=label_marital_1, dtype='int64')) label=fluid.layers.cast(x=label_income_1,
dtype='int64'))
auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital,
label=fluid.layers.cast(x=label_marital_1,
dtype='int64'))
if is_infer: if is_infer:
self._infer_results["AUC_income"] = auc_income self._infer_results["AUC_income"] = auc_income
self._infer_results["AUC_marital"] = auc_marital self._infer_results["AUC_marital"] = auc_marital
return return
cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True) cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income, soft_label=True)
cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True) cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital, soft_label=True)
avg_cost_income = fluid.layers.mean(x=cost_income) avg_cost_income = fluid.layers.mean(x=cost_income)
avg_cost_marital = fluid.layers.mean(x=cost_marital) avg_cost_marital = fluid.layers.mean(x=cost_marital)
cost = avg_cost_income + avg_cost_marital cost = avg_cost_income + avg_cost_marital
self._cost = cost self._cost = cost
self._metrics["AUC_income"] = auc_income self._metrics["AUC_income"] = auc_income
self._metrics["BATCH_AUC_income"] = batch_auc_1 self._metrics["BATCH_AUC_income"] = batch_auc_1
self._metrics["AUC_marital"] = auc_marital self._metrics["AUC_marital"] = auc_marital
self._metrics["BATCH_AUC_marital"] = batch_auc_2 self._metrics["BATCH_AUC_marital"] = batch_auc_2
def train_net(self): def train_net(self):
self.MMOE() self.MMOE()
def infer_net(self): def infer_net(self):
self.MMOE(is_infer=True) self.MMOE(is_infer=True)
...@@ -11,11 +11,10 @@ ...@@ -11,11 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
class EvaluateReader(Reader): class EvaluateReader(Reader):
......
...@@ -11,11 +11,10 @@ ...@@ -11,11 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
class TrainReader(Reader): class TrainReader(Reader):
...@@ -44,8 +43,8 @@ class TrainReader(Reader): ...@@ -44,8 +43,8 @@ class TrainReader(Reader):
label_marital = [1, 0] label_marital = [1, 0]
elif int(l[0]) == 1: elif int(l[0]) == 1:
label_marital = [0, 1] label_marital = [0, 1]
#label_income = np.array(label_income) # label_income = np.array(label_income)
#label_marital = np.array(label_marital) # label_marital = np.array(label_marital)
feature_name = ["input", "label_income", "label_marital"] feature_name = ["input", "label_income", "label_marital"]
yield zip(feature_name, [data] + [label_income] + [label_marital]) yield zip(feature_name, [data] + [label_income] + [label_marital])
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -33,65 +32,65 @@ class Model(ModelBase): ...@@ -33,65 +32,65 @@ class Model(ModelBase):
input_data = fluid.data(name="input", shape=[-1, feature_size], dtype="float32") input_data = fluid.data(name="input", shape=[-1, feature_size], dtype="float32")
label_income = fluid.data(name="label_income", shape=[-1, 2], dtype="float32", lod_level=0) label_income = fluid.data(name="label_income", shape=[-1, 2], dtype="float32", lod_level=0)
label_marital = fluid.data(name="label_marital", shape=[-1, 2], dtype="float32", lod_level=0) label_marital = fluid.data(name="label_marital", shape=[-1, 2], dtype="float32", lod_level=0)
if is_infer: if is_infer:
self._infer_data_var = [input_data, label_income, label_marital] self._infer_data_var = [input_data, label_income, label_marital]
self._infer_data_loader = fluid.io.DataLoader.from_generator( self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False) feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
self._data_var.extend([input_data, label_income, label_marital]) self._data_var.extend([input_data, label_income, label_marital])
bottom_output = fluid.layers.fc(input=input_data, bottom_output = fluid.layers.fc(input=input_data,
size=bottom_size, size=bottom_size,
act='relu', act='relu',
bias_attr=fluid.ParamAttr(learning_rate=1.0), bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='bottom_output') name='bottom_output')
# Build tower layer from bottom layer # Build tower layer from bottom layer
output_layers = [] output_layers = []
for index in range(tower_nums): for index in range(tower_nums):
tower_layer = fluid.layers.fc(input=bottom_output, tower_layer = fluid.layers.fc(input=bottom_output,
size=tower_size, size=tower_size,
act='relu', act='relu',
name='task_layer_' + str(index)) name='task_layer_' + str(index))
output_layer = fluid.layers.fc(input=tower_layer, output_layer = fluid.layers.fc(input=tower_layer,
size=2, size=2,
act='softmax', act='softmax',
name='output_layer_' + str(index)) name='output_layer_' + str(index))
output_layers.append(output_layer) output_layers.append(output_layer)
pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15) pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15)
pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15) pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15)
label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2]) label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2])
label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2]) label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2])
auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income, label=fluid.layers.cast(x=label_income_1, dtype='int64')) auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income,
auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital, label=fluid.layers.cast(x=label_marital_1, dtype='int64')) label=fluid.layers.cast(x=label_income_1,
dtype='int64'))
auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital,
label=fluid.layers.cast(x=label_marital_1,
dtype='int64'))
if is_infer: if is_infer:
self._infer_results["AUC_income"] = auc_income self._infer_results["AUC_income"] = auc_income
self._infer_results["AUC_marital"] = auc_marital self._infer_results["AUC_marital"] = auc_marital
return return
cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True) cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income, soft_label=True)
cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True) cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital, soft_label=True)
cost = fluid.layers.elementwise_add(cost_income, cost_marital, axis=1) cost = fluid.layers.elementwise_add(cost_income, cost_marital, axis=1)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
self._cost = avg_cost self._cost = avg_cost
self._metrics["AUC_income"] = auc_income self._metrics["AUC_income"] = auc_income
self._metrics["BATCH_AUC_income"] = batch_auc_1 self._metrics["BATCH_AUC_income"] = batch_auc_1
self._metrics["AUC_marital"] = auc_marital self._metrics["AUC_marital"] = auc_marital
self._metrics["BATCH_AUC_marital"] = batch_auc_2 self._metrics["BATCH_AUC_marital"] = batch_auc_2
def train_net(self): def train_net(self):
self.model() self.model()
def infer_net(self): def infer_net(self):
self.model(is_infer=True) self.model(is_infer=True)
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
......
...@@ -11,18 +11,19 @@ ...@@ -11,18 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import math import math
import sys import os
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try: try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
import pickle import pickle
from collections import Counter
import os from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
...@@ -45,7 +46,7 @@ class TrainReader(Reader): ...@@ -45,7 +46,7 @@ class TrainReader(Reader):
self.label_feat_names = target + dense_feat_names + sparse_feat_names self.label_feat_names = target + dense_feat_names + sparse_feat_names
self.cat_feat_idx_dict_list = [{} for _ in range(26)] self.cat_feat_idx_dict_list = [{} for _ in range(26)]
# TODO: set vocabulary dictionary # TODO: set vocabulary dictionary
vocab_dir = envs.get_global_env("feat_dict_name", None, "train.reader") vocab_dir = envs.get_global_env("feat_dict_name", None, "train.reader")
for i in range(26): for i in range(26):
...@@ -53,7 +54,7 @@ class TrainReader(Reader): ...@@ -53,7 +54,7 @@ class TrainReader(Reader):
for line in open( for line in open(
os.path.join(vocab_dir, 'C' + str(i + 1) + '.txt')): os.path.join(vocab_dir, 'C' + str(i + 1) + '.txt')):
self.cat_feat_idx_dict_list[i][line.strip()] = lookup_idx self.cat_feat_idx_dict_list[i][line.strip()] = lookup_idx
lookup_idx += 1 lookup_idx += 1
def _process_line(self, line): def _process_line(self, line):
features = line.rstrip('\n').split('\t') features = line.rstrip('\n').split('\t')
...@@ -71,20 +72,21 @@ class TrainReader(Reader): ...@@ -71,20 +72,21 @@ class TrainReader(Reader):
if idx == 2 else math.log(1 + float(features[idx]))) if idx == 2 else math.log(1 + float(features[idx])))
for idx in self.cat_idx_: for idx in self.cat_idx_:
if features[idx] == '' or features[ if features[idx] == '' or features[
idx] not in self.cat_feat_idx_dict_list[idx - 14]: idx] not in self.cat_feat_idx_dict_list[idx - 14]:
label_feat_list[idx].append(0) label_feat_list[idx].append(0)
else: else:
label_feat_list[idx].append(self.cat_feat_idx_dict_list[ label_feat_list[idx].append(self.cat_feat_idx_dict_list[
idx - 14][features[idx]]) idx - 14][features[idx]])
label_feat_list[0].append(int(features[0])) label_feat_list[0].append(int(features[0]))
return label_feat_list return label_feat_list
def generate_sample(self, line): def generate_sample(self, line):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
""" """
def data_iter(): def data_iter():
label_feat_list = self._process_line(line) label_feat_list = self._process_line(line)
yield list(zip(self.label_feat_names, label_feat_list)) yield list(zip(self.label_feat_names, label_feat_list))
return data_iter return data_iter
\ No newline at end of file
...@@ -12,17 +12,18 @@ ...@@ -12,17 +12,18 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from collections import OrderedDict
import paddle.fluid as fluid import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
from collections import OrderedDict
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
def init_network(self): def init_network(self):
self.cross_num = envs.get_global_env("hyper_parameters.cross_num", None, self._namespace) self.cross_num = envs.get_global_env("hyper_parameters.cross_num", None, self._namespace)
self.dnn_hidden_units = envs.get_global_env("hyper_parameters.dnn_hidden_units", None, self._namespace) self.dnn_hidden_units = envs.get_global_env("hyper_parameters.dnn_hidden_units", None, self._namespace)
...@@ -49,7 +50,7 @@ class Model(ModelBase): ...@@ -49,7 +50,7 @@ class Model(ModelBase):
self.net_input = None self.net_input = None
self.loss = None self.loss = None
def _create_embedding_input(self, data_dict): def _create_embedding_input(self, data_dict):
# sparse embedding # sparse embedding
sparse_emb_dict = OrderedDict((name, fluid.embedding( sparse_emb_dict = OrderedDict((name, fluid.embedding(
...@@ -77,7 +78,7 @@ class Model(ModelBase): ...@@ -77,7 +78,7 @@ class Model(ModelBase):
net_input = fluid.layers.concat([dense_input, sparse_input], axis=-1) net_input = fluid.layers.concat([dense_input, sparse_input], axis=-1)
return net_input return net_input
def _deep_net(self, input, hidden_units, use_bn=False, is_test=False): def _deep_net(self, input, hidden_units, use_bn=False, is_test=False):
for units in hidden_units: for units in hidden_units:
input = fluid.layers.fc(input=input, size=units) input = fluid.layers.fc(input=input, size=units)
...@@ -94,7 +95,7 @@ class Model(ModelBase): ...@@ -94,7 +95,7 @@ class Model(ModelBase):
[input_dim], dtype='float32', name=prefix + "_b") [input_dim], dtype='float32', name=prefix + "_b")
xw = fluid.layers.reduce_sum(x * w, dim=1, keep_dim=True) # (N, 1) xw = fluid.layers.reduce_sum(x * w, dim=1, keep_dim=True) # (N, 1)
return x0 * xw + b + x, w return x0 * xw + b + x, w
def _cross_net(self, input, num_corss_layers): def _cross_net(self, input, num_corss_layers):
x = x0 = input x = x0 = input
l2_reg_cross_list = [] l2_reg_cross_list = []
...@@ -105,10 +106,10 @@ class Model(ModelBase): ...@@ -105,10 +106,10 @@ class Model(ModelBase):
fluid.layers.concat( fluid.layers.concat(
l2_reg_cross_list, axis=-1)) l2_reg_cross_list, axis=-1))
return x, l2_reg_cross_loss return x, l2_reg_cross_loss
def _l2_loss(self, w): def _l2_loss(self, w):
return fluid.layers.reduce_sum(fluid.layers.square(w)) return fluid.layers.reduce_sum(fluid.layers.square(w))
def train_net(self): def train_net(self):
self.init_network() self.init_network()
self.target_input = fluid.data( self.target_input = fluid.data(
...@@ -117,14 +118,14 @@ class Model(ModelBase): ...@@ -117,14 +118,14 @@ class Model(ModelBase):
for feat_name in self.feat_dims_dict: for feat_name in self.feat_dims_dict:
data_dict[feat_name] = fluid.data( data_dict[feat_name] = fluid.data(
name=feat_name, shape=[None, 1], dtype='float32') name=feat_name, shape=[None, 1], dtype='float32')
self.net_input = self._create_embedding_input(data_dict) self.net_input = self._create_embedding_input(data_dict)
deep_out = self._deep_net(self.net_input, self.dnn_hidden_units, self.dnn_use_bn, False) deep_out = self._deep_net(self.net_input, self.dnn_hidden_units, self.dnn_use_bn, False)
cross_out, l2_reg_cross_loss = self._cross_net(self.net_input, cross_out, l2_reg_cross_loss = self._cross_net(self.net_input,
self.cross_num) self.cross_num)
last_out = fluid.layers.concat([deep_out, cross_out], axis=-1) last_out = fluid.layers.concat([deep_out, cross_out], axis=-1)
logit = fluid.layers.fc(last_out, 1) logit = fluid.layers.fc(last_out, 1)
...@@ -140,7 +141,6 @@ class Model(ModelBase): ...@@ -140,7 +141,6 @@ class Model(ModelBase):
input=prob_2d, label=label_int, slide_steps=0) input=prob_2d, label=label_int, slide_steps=0)
self._metrics["AUC"] = auc_var self._metrics["AUC"] = auc_var
self._metrics["BATCH_AUC"] = batch_auc_var self._metrics["BATCH_AUC"] = batch_auc_var
# logloss # logloss
logloss = fluid.layers.log_loss(self.prob, self.target_input) logloss = fluid.layers.log_loss(self.prob, self.target_input)
......
...@@ -11,15 +11,18 @@ ...@@ -11,15 +11,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try: try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
import pickle import pickle
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
...@@ -35,7 +38,7 @@ class TrainReader(Reader): ...@@ -35,7 +38,7 @@ class TrainReader(Reader):
self.categorical_range_ = range(14, 40) self.categorical_range_ = range(14, 40)
# load preprocessed feature dict # load preprocessed feature dict
self.feat_dict_name = envs.get_global_env("feat_dict_name", None, "train.reader") self.feat_dict_name = envs.get_global_env("feat_dict_name", None, "train.reader")
self.feat_dict_ = pickle.load(open(self.feat_dict_name, 'rb')) self.feat_dict_ = pickle.load(open(self.feat_dict_name, 'rb'))
def _process_line(self, line): def _process_line(self, line):
features = line.rstrip('\n').split('\t') features = line.rstrip('\n').split('\t')
...@@ -59,13 +62,14 @@ class TrainReader(Reader): ...@@ -59,13 +62,14 @@ class TrainReader(Reader):
feat_value.append(1.0) feat_value.append(1.0)
label = [int(features[0])] label = [int(features[0])]
return feat_idx, feat_value, label return feat_idx, feat_value, label
def generate_sample(self, line): def generate_sample(self, line):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
""" """
def data_iter(): def data_iter():
feat_idx, feat_value, label = self._process_line(line) feat_idx, feat_value, label = self._process_line(line)
yield [('feat_idx', feat_idx), ('feat_value', feat_value), ('label', label)] yield [('feat_idx', feat_idx), ('feat_value', feat_value), ('label', label)]
return data_iter return data_iter
\ No newline at end of file
...@@ -12,9 +12,10 @@ ...@@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid
import math import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
...@@ -28,26 +29,27 @@ class Model(ModelBase): ...@@ -28,26 +29,27 @@ class Model(ModelBase):
is_distributed = True if envs.get_trainer() == "CtrTrainer" else False is_distributed = True if envs.get_trainer() == "CtrTrainer" else False
sparse_feature_number = envs.get_global_env("hyper_parameters.sparse_feature_number", None, self._namespace) sparse_feature_number = envs.get_global_env("hyper_parameters.sparse_feature_number", None, self._namespace)
sparse_feature_dim = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace) sparse_feature_dim = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
# ------------------------- network input -------------------------- # ------------------------- network input --------------------------
num_field = envs.get_global_env("hyper_parameters.num_field", None, self._namespace) num_field = envs.get_global_env("hyper_parameters.num_field", None, self._namespace)
raw_feat_idx = fluid.data(name='feat_idx', shape=[None, num_field], dtype='int64') # None * num_field(defalut:39) raw_feat_idx = fluid.data(name='feat_idx', shape=[None, num_field],
raw_feat_value = fluid.data(name='feat_value', shape=[None, num_field], dtype='float32') # None * num_field dtype='int64') # None * num_field(defalut:39)
raw_feat_value = fluid.data(name='feat_value', shape=[None, num_field], dtype='float32') # None * num_field
self.label = fluid.data(name='label', shape=[None, 1], dtype='float32') # None * 1 self.label = fluid.data(name='label', shape=[None, 1], dtype='float32') # None * 1
feat_idx = fluid.layers.reshape(raw_feat_idx,[-1, 1]) # (None * num_field) * 1 feat_idx = fluid.layers.reshape(raw_feat_idx, [-1, 1]) # (None * num_field) * 1
feat_value = fluid.layers.reshape(raw_feat_value, [-1, num_field, 1]) # None * num_field * 1 feat_value = fluid.layers.reshape(raw_feat_value, [-1, num_field, 1]) # None * num_field * 1
# ------------------------- set _data_var -------------------------- # ------------------------- set _data_var --------------------------
self._data_var.append(raw_feat_idx) self._data_var.append(raw_feat_idx)
self._data_var.append(raw_feat_value) self._data_var.append(raw_feat_value)
self._data_var.append(self.label) self._data_var.append(self.label)
if self._platform != "LINUX": if self._platform != "LINUX":
self._data_loader = fluid.io.DataLoader.from_generator( self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False) feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False)
#------------------------- first order term -------------------------- # ------------------------- first order term --------------------------
reg = envs.get_global_env("hyper_parameters.reg", 1e-4, self._namespace) reg = envs.get_global_env("hyper_parameters.reg", 1e-4, self._namespace)
first_weights_re = fluid.embedding( first_weights_re = fluid.embedding(
...@@ -65,7 +67,7 @@ class Model(ModelBase): ...@@ -65,7 +67,7 @@ class Model(ModelBase):
first_weights_re, shape=[-1, num_field, 1]) # None * num_field * 1 first_weights_re, shape=[-1, num_field, 1]) # None * num_field * 1
y_first_order = fluid.layers.reduce_sum((first_weights * feat_value), 1) y_first_order = fluid.layers.reduce_sum((first_weights * feat_value), 1)
#------------------------- second order term -------------------------- # ------------------------- second order term --------------------------
feat_embeddings_re = fluid.embedding( feat_embeddings_re = fluid.embedding(
input=feat_idx, input=feat_idx,
...@@ -80,12 +82,12 @@ class Model(ModelBase): ...@@ -80,12 +82,12 @@ class Model(ModelBase):
feat_embeddings = fluid.layers.reshape( feat_embeddings = fluid.layers.reshape(
feat_embeddings_re, feat_embeddings_re,
shape=[-1, num_field, shape=[-1, num_field,
sparse_feature_dim]) # None * num_field * embedding_size sparse_feature_dim]) # None * num_field * embedding_size
feat_embeddings = feat_embeddings * feat_value # None * num_field * embedding_size feat_embeddings = feat_embeddings * feat_value # None * num_field * embedding_size
# sum_square part # sum_square part
summed_features_emb = fluid.layers.reduce_sum(feat_embeddings, summed_features_emb = fluid.layers.reduce_sum(feat_embeddings,
1) # None * embedding_size 1) # None * embedding_size
summed_features_emb_square = fluid.layers.square( summed_features_emb_square = fluid.layers.square(
summed_features_emb) # None * embedding_size summed_features_emb) # None * embedding_size
...@@ -99,13 +101,12 @@ class Model(ModelBase): ...@@ -99,13 +101,12 @@ class Model(ModelBase):
summed_features_emb_square - squared_sum_features_emb, 1, summed_features_emb_square - squared_sum_features_emb, 1,
keep_dim=True) # None * 1 keep_dim=True) # None * 1
# ------------------------- DNN --------------------------
#------------------------- DNN --------------------------
layer_sizes = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace) layer_sizes = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace)
act = envs.get_global_env("hyper_parameters.act", None, self._namespace) act = envs.get_global_env("hyper_parameters.act", None, self._namespace)
y_dnn = fluid.layers.reshape(feat_embeddings, y_dnn = fluid.layers.reshape(feat_embeddings,
[-1, num_field * sparse_feature_dim]) [-1, num_field * sparse_feature_dim])
for s in layer_sizes: for s in layer_sizes:
y_dnn = fluid.layers.fc( y_dnn = fluid.layers.fc(
input=y_dnn, input=y_dnn,
...@@ -127,28 +128,28 @@ class Model(ModelBase): ...@@ -127,28 +128,28 @@ class Model(ModelBase):
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.TruncatedNormalInitializer( initializer=fluid.initializer.TruncatedNormalInitializer(
loc=0.0, scale=init_value_))) loc=0.0, scale=init_value_)))
#------------------------- DeepFM -------------------------- # ------------------------- DeepFM --------------------------
self.predict = fluid.layers.sigmoid(y_first_order + y_second_order + y_dnn) self.predict = fluid.layers.sigmoid(y_first_order + y_second_order + y_dnn)
def train_net(self): def train_net(self):
self.deepfm_net() self.deepfm_net()
#------------------------- Cost(logloss) -------------------------- # ------------------------- Cost(logloss) --------------------------
cost = fluid.layers.log_loss(input=self.predict, label=self.label) cost = fluid.layers.log_loss(input=self.predict, label=self.label)
avg_cost = fluid.layers.reduce_sum(cost) avg_cost = fluid.layers.reduce_sum(cost)
self._cost = avg_cost self._cost = avg_cost
#------------------------- Metric(Auc) -------------------------- # ------------------------- Metric(Auc) --------------------------
predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1) predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1)
label_int = fluid.layers.cast(self.label, 'int64') label_int = fluid.layers.cast(self.label, 'int64')
auc_var, batch_auc_var, _ = fluid.layers.auc(input=predict_2d, auc_var, batch_auc_var, _ = fluid.layers.auc(input=predict_2d,
label=label_int, label=label_int,
slide_steps=0) slide_steps=0)
self._metrics["AUC"] = auc_var self._metrics["AUC"] = auc_var
self._metrics["BATCH_AUC"] = batch_auc_var self._metrics["BATCH_AUC"] = batch_auc_var
...@@ -158,4 +159,4 @@ class Model(ModelBase): ...@@ -158,4 +159,4 @@ class Model(ModelBase):
return optimizer return optimizer
def infer_net(self, parameter_list): def infer_net(self, parameter_list):
self.deepfm_net() self.deepfm_net()
\ No newline at end of file
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
......
...@@ -13,24 +13,28 @@ ...@@ -13,24 +13,28 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
import os import os
import random import random
try: try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
import pickle import pickle
import numpy as np
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
self.train_data_path = envs.get_global_env("train_data_path", None, "train.reader") self.train_data_path = envs.get_global_env("train_data_path", None, "train.reader")
self.res = [] self.res = []
self.max_len = 0 self.max_len = 0
data_file_list = os.listdir(self.train_data_path) data_file_list = os.listdir(self.train_data_path)
for i in range(0, len(data_file_list)): for i in range(0, len(data_file_list)):
train_data_file = os.path.join(self.train_data_path, data_file_list[i]) train_data_file = os.path.join(self.train_data_path, data_file_list[i])
with open(train_data_file, "r") as fin: with open(train_data_file, "r") as fin:
for line in fin: for line in fin:
...@@ -43,9 +47,6 @@ class TrainReader(Reader): ...@@ -43,9 +47,6 @@ class TrainReader(Reader):
self.batch_size = envs.get_global_env("batch_size", 32, "train.reader") self.batch_size = envs.get_global_env("batch_size", 32, "train.reader")
self.group_size = self.batch_size * 20 self.group_size = self.batch_size * 20
def _process_line(self, line): def _process_line(self, line):
line = line.strip().split(';') line = line.strip().split(';')
hist = line[0].split() hist = line[0].split()
...@@ -54,22 +55,22 @@ class TrainReader(Reader): ...@@ -54,22 +55,22 @@ class TrainReader(Reader):
cate = [int(i) for i in cate] cate = [int(i) for i in cate]
return [hist, cate, [int(line[2])], [int(line[3])], [float(line[4])]] return [hist, cate, [int(line[2])], [int(line[3])], [float(line[4])]]
def generate_sample(self, line): def generate_sample(self, line):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
""" """
def data_iter(): def data_iter():
#feat_idx, feat_value, label = self._process_line(line) # feat_idx, feat_value, label = self._process_line(line)
yield self._process_line(line) yield self._process_line(line)
return data_iter return data_iter
def pad_batch_data(self, input, max_len): def pad_batch_data(self, input, max_len):
res = np.array([x + [0] * (max_len - len(x)) for x in input]) res = np.array([x + [0] * (max_len - len(x)) for x in input])
res = res.astype("int64").reshape([-1, max_len]) res = res.astype("int64").reshape([-1, max_len])
return res return res
def make_data(self, b): def make_data(self, b):
max_len = max(len(x[0]) for x in b) max_len = max(len(x[0]) for x in b)
item = self.pad_batch_data([x[0] for x in b], max_len) item = self.pad_batch_data([x[0] for x in b], max_len)
...@@ -77,7 +78,7 @@ class TrainReader(Reader): ...@@ -77,7 +78,7 @@ class TrainReader(Reader):
len_array = [len(x[0]) for x in b] len_array = [len(x[0]) for x in b]
mask = np.array( mask = np.array(
[[0] * x + [-1e9] * (max_len - x) for x in len_array]).reshape( [[0] * x + [-1e9] * (max_len - x) for x in len_array]).reshape(
[-1, max_len, 1]) [-1, max_len, 1])
target_item_seq = np.array( target_item_seq = np.array(
[[x[2]] * max_len for x in b]).astype("int64").reshape([-1, max_len]) [[x[2]] * max_len for x in b]).astype("int64").reshape([-1, max_len])
target_cat_seq = np.array( target_cat_seq = np.array(
...@@ -89,7 +90,7 @@ class TrainReader(Reader): ...@@ -89,7 +90,7 @@ class TrainReader(Reader):
target_item_seq[i], target_cat_seq[i] target_item_seq[i], target_cat_seq[i]
]) ])
return res return res
def batch_reader(self, reader, batch_size, group_size): def batch_reader(self, reader, batch_size, group_size):
def batch_reader(): def batch_reader():
bg = [] bg = []
...@@ -111,7 +112,7 @@ class TrainReader(Reader): ...@@ -111,7 +112,7 @@ class TrainReader(Reader):
yield self.make_data(b) yield self.make_data(b)
return batch_reader return batch_reader
def base_read(self, file_dir): def base_read(self, file_dir):
res = [] res = []
for train_file in file_dir: for train_file in file_dir:
...@@ -122,10 +123,8 @@ class TrainReader(Reader): ...@@ -122,10 +123,8 @@ class TrainReader(Reader):
cate = line[1].split() cate = line[1].split()
res.append([hist, cate, line[2], line[3], float(line[4])]) res.append([hist, cate, line[2], line[3], float(line[4])])
return res return res
def generate_batch_from_trainfiles(self, files): def generate_batch_from_trainfiles(self, files):
data_set = self.base_read(files) data_set = self.base_read(files)
random.shuffle(data_set) random.shuffle(data_set)
return self.batch_reader(data_set, self.batch_size, self.batch_size * 20) return self.batch_reader(data_set, self.batch_size, self.batch_size * 20)
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import math import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
......
...@@ -12,9 +12,10 @@ ...@@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid
import math import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
...@@ -22,32 +23,39 @@ from paddlerec.core.model import Model as ModelBase ...@@ -22,32 +23,39 @@ from paddlerec.core.model import Model as ModelBase
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
def wide_part(self, data): def wide_part(self, data):
out = fluid.layers.fc(input=data, out = fluid.layers.fc(input=data,
size=1, size=1,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0 / math.sqrt(data.shape[1])), param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0,
regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4)), scale=1.0 / math.sqrt(
act=None, data.shape[
name='wide') 1])),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)),
act=None,
name='wide')
return out return out
def fc(self, data, hidden_units, active, tag): def fc(self, data, hidden_units, active, tag):
output = fluid.layers.fc(input=data, output = fluid.layers.fc(input=data,
size=hidden_units, size=hidden_units,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0 / math.sqrt(data.shape[1]))), param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0,
act=active, scale=1.0 / math.sqrt(
name=tag) data.shape[
1]))),
act=active,
name=tag)
return output return output
def deep_part(self, data, hidden1_units, hidden2_units, hidden3_units): def deep_part(self, data, hidden1_units, hidden2_units, hidden3_units):
l1 = self.fc(data, hidden1_units, 'relu', 'l1') l1 = self.fc(data, hidden1_units, 'relu', 'l1')
l2 = self.fc(l1, hidden2_units, 'relu', 'l2') l2 = self.fc(l1, hidden2_units, 'relu', 'l2')
l3 = self.fc(l2, hidden3_units, 'relu', 'l3') l3 = self.fc(l2, hidden3_units, 'relu', 'l3')
return l3 return l3
def train_net(self): def train_net(self):
wide_input = fluid.data(name='wide_input', shape=[None, 8], dtype='float32') wide_input = fluid.data(name='wide_input', shape=[None, 8], dtype='float32')
deep_input = fluid.data(name='deep_input', shape=[None, 58], dtype='float32') deep_input = fluid.data(name='deep_input', shape=[None, 58], dtype='float32')
...@@ -61,31 +69,33 @@ class Model(ModelBase): ...@@ -61,31 +69,33 @@ class Model(ModelBase):
hidden3_units = envs.get_global_env("hyper_parameters.hidden3_units", 25, self._namespace) hidden3_units = envs.get_global_env("hyper_parameters.hidden3_units", 25, self._namespace)
wide_output = self.wide_part(wide_input) wide_output = self.wide_part(wide_input)
deep_output = self.deep_part(deep_input, hidden1_units, hidden2_units, hidden3_units) deep_output = self.deep_part(deep_input, hidden1_units, hidden2_units, hidden3_units)
wide_model = fluid.layers.fc(input=wide_output, wide_model = fluid.layers.fc(input=wide_output,
size=1, size=1,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0)), param_attr=fluid.ParamAttr(
act=None, initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0)),
name='w_wide') act=None,
name='w_wide')
deep_model = fluid.layers.fc(input=deep_output, deep_model = fluid.layers.fc(input=deep_output,
size=1, size=1,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0)), param_attr=fluid.ParamAttr(
act=None, initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0)),
name='w_deep') act=None,
name='w_deep')
prediction = fluid.layers.elementwise_add(wide_model, deep_model) prediction = fluid.layers.elementwise_add(wide_model, deep_model)
pred = fluid.layers.sigmoid(fluid.layers.clip(prediction, min=-15.0, max=15.0), name="prediction") pred = fluid.layers.sigmoid(fluid.layers.clip(prediction, min=-15.0, max=15.0), name="prediction")
num_seqs = fluid.layers.create_tensor(dtype='int64') num_seqs = fluid.layers.create_tensor(dtype='int64')
acc = fluid.layers.accuracy(input=pred, label=fluid.layers.cast(x=label, dtype='int64'), total=num_seqs) acc = fluid.layers.accuracy(input=pred, label=fluid.layers.cast(x=label, dtype='int64'), total=num_seqs)
auc_var, batch_auc, auc_states = fluid.layers.auc(input=pred, label=fluid.layers.cast(x=label, dtype='int64')) auc_var, batch_auc, auc_states = fluid.layers.auc(input=pred, label=fluid.layers.cast(x=label, dtype='int64'))
self._metrics["AUC"] = auc_var self._metrics["AUC"] = auc_var
self._metrics["BATCH_AUC"] = batch_auc self._metrics["BATCH_AUC"] = batch_auc
self._metrics["ACC"] = acc self._metrics["ACC"] = acc
cost = fluid.layers.sigmoid_cross_entropy_with_logits(x=prediction, label=label) cost = fluid.layers.sigmoid_cross_entropy_with_logits(x=prediction, label=label)
avg_cost = fluid.layers.mean(cost) avg_cost = fluid.layers.mean(cost)
self._cost = avg_cost self._cost = avg_cost
...@@ -95,4 +105,4 @@ class Model(ModelBase): ...@@ -95,4 +105,4 @@ class Model(ModelBase):
return optimizer return optimizer
def infer_net(self, parameter_list): def infer_net(self, parameter_list):
self.deepfm_net() self.deepfm_net()
\ No newline at end of file
...@@ -11,15 +11,17 @@ ...@@ -11,15 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try: try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
import pickle import pickle
from paddlerec.core.reader import Reader
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
pass pass
...@@ -28,16 +30,17 @@ class TrainReader(Reader): ...@@ -28,16 +30,17 @@ class TrainReader(Reader):
line = line.strip().split(',') line = line.strip().split(',')
features = list(map(float, line)) features = list(map(float, line))
wide_feat = features[0:8] wide_feat = features[0:8]
deep_feat = features[8:58+8] deep_feat = features[8:58 + 8]
label = features[-1] label = features[-1]
return wide_feat, deep_feat, [label] return wide_feat, deep_feat, [label]
def generate_sample(self, line): def generate_sample(self, line):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
""" """
def data_iter(): def data_iter():
wide_feat, deep_deat, label = self._process_line(line) wide_feat, deep_deat, label = self._process_line(line)
yield [('wide_input', wide_feat), ('deep_input', deep_deat), ('label', label)] yield [('wide_input', wide_feat), ('deep_input', deep_deat), ('label', label)]
return data_iter return data_iter
\ No newline at end of file
...@@ -11,19 +11,21 @@ ...@@ -11,19 +11,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try: try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
import pickle import pickle
class TrainReader(Reader): from paddlerec.core.reader import Reader
class TrainReader(Reader):
def init(self): def init(self):
pass pass
def _process_line(self, line): def _process_line(self, line):
features = line.strip('\n').split('\t') features = line.strip('\n').split('\t')
feat_idx = [] feat_idx = []
...@@ -33,11 +35,11 @@ class TrainReader(Reader): ...@@ -33,11 +35,11 @@ class TrainReader(Reader):
feat_value.append(1.0) feat_value.append(1.0)
label = [int(features[0])] label = [int(features[0])]
return feat_idx, feat_value, label return feat_idx, feat_value, label
def generate_sample(self, line): def generate_sample(self, line):
def data_iter(): def data_iter():
feat_idx, feat_value, label = self._process_line(line) feat_idx, feat_value, label = self._process_line(line)
yield [('feat_idx', feat_idx), ('feat_value', feat_value), ('label', yield [('feat_idx', feat_idx), ('feat_value', feat_value), ('label',
label)] label)]
return data_iter return data_iter
\ No newline at end of file
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
...@@ -27,13 +26,13 @@ class Model(ModelBase): ...@@ -27,13 +26,13 @@ class Model(ModelBase):
init_value_ = 0.1 init_value_ = 0.1
initer = fluid.initializer.TruncatedNormalInitializer( initer = fluid.initializer.TruncatedNormalInitializer(
loc=0.0, scale=init_value_) loc=0.0, scale=init_value_)
is_distributed = True if envs.get_trainer() == "CtrTrainer" else False is_distributed = True if envs.get_trainer() == "CtrTrainer" else False
sparse_feature_number = envs.get_global_env("hyper_parameters.sparse_feature_number", None, self._namespace) sparse_feature_number = envs.get_global_env("hyper_parameters.sparse_feature_number", None, self._namespace)
sparse_feature_dim = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace) sparse_feature_dim = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
# ------------------------- network input -------------------------- # ------------------------- network input --------------------------
num_field = envs.get_global_env("hyper_parameters.num_field", None, self._namespace) num_field = envs.get_global_env("hyper_parameters.num_field", None, self._namespace)
raw_feat_idx = fluid.data(name='feat_idx', shape=[None, num_field], dtype='int64') raw_feat_idx = fluid.data(name='feat_idx', shape=[None, num_field], dtype='int64')
raw_feat_value = fluid.data(name='feat_value', shape=[None, num_field], dtype='float32') raw_feat_value = fluid.data(name='feat_value', shape=[None, num_field], dtype='float32')
...@@ -52,16 +51,16 @@ class Model(ModelBase): ...@@ -52,16 +51,16 @@ class Model(ModelBase):
feat_embeddings, feat_embeddings,
[-1, num_field, sparse_feature_dim]) # None * num_field * embedding_size [-1, num_field, sparse_feature_dim]) # None * num_field * embedding_size
feat_embeddings = feat_embeddings * feat_value # None * num_field * embedding_size feat_embeddings = feat_embeddings * feat_value # None * num_field * embedding_size
# ------------------------- set _data_var -------------------------- # ------------------------- set _data_var --------------------------
self._data_var.append(raw_feat_idx) self._data_var.append(raw_feat_idx)
self._data_var.append(raw_feat_value) self._data_var.append(raw_feat_value)
self._data_var.append(self.label) self._data_var.append(self.label)
if self._platform != "LINUX": if self._platform != "LINUX":
self._data_loader = fluid.io.DataLoader.from_generator( self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False) feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False)
# -------------------- linear -------------------- # -------------------- linear --------------------
weights_linear = fluid.embedding( weights_linear = fluid.embedding(
...@@ -79,7 +78,7 @@ class Model(ModelBase): ...@@ -79,7 +78,7 @@ class Model(ModelBase):
default_initializer=fluid.initializer.ConstantInitializer(value=0)) default_initializer=fluid.initializer.ConstantInitializer(value=0))
y_linear = fluid.layers.reduce_sum( y_linear = fluid.layers.reduce_sum(
(weights_linear * feat_value), 1) + b_linear (weights_linear * feat_value), 1) + b_linear
# -------------------- CIN -------------------- # -------------------- CIN --------------------
layer_sizes_cin = envs.get_global_env("hyper_parameters.layer_sizes_cin", None, self._namespace) layer_sizes_cin = envs.get_global_env("hyper_parameters.layer_sizes_cin", None, self._namespace)
...@@ -90,7 +89,7 @@ class Model(ModelBase): ...@@ -90,7 +89,7 @@ class Model(ModelBase):
X_0 = fluid.layers.reshape( X_0 = fluid.layers.reshape(
fluid.layers.transpose(Xs[0], [0, 2, 1]), fluid.layers.transpose(Xs[0], [0, 2, 1]),
[-1, sparse_feature_dim, num_field, [-1, sparse_feature_dim, num_field,
1]) # None, embedding_size, num_field, 1 1]) # None, embedding_size, num_field, 1
X_k = fluid.layers.reshape( X_k = fluid.layers.reshape(
fluid.layers.transpose(Xs[-1], [0, 2, 1]), fluid.layers.transpose(Xs[-1], [0, 2, 1]),
[-1, sparse_feature_dim, 1, last_s]) # None, embedding_size, 1, last_s [-1, sparse_feature_dim, 1, last_s]) # None, embedding_size, 1, last_s
...@@ -136,7 +135,7 @@ class Model(ModelBase): ...@@ -136,7 +135,7 @@ class Model(ModelBase):
layer_sizes_dnn = envs.get_global_env("hyper_parameters.layer_sizes_dnn", None, self._namespace) layer_sizes_dnn = envs.get_global_env("hyper_parameters.layer_sizes_dnn", None, self._namespace)
act = envs.get_global_env("hyper_parameters.act", None, self._namespace) act = envs.get_global_env("hyper_parameters.act", None, self._namespace)
y_dnn = fluid.layers.reshape(feat_embeddings, y_dnn = fluid.layers.reshape(feat_embeddings,
[-1, num_field * sparse_feature_dim]) [-1, num_field * sparse_feature_dim])
for s in layer_sizes_dnn: for s in layer_sizes_dnn:
y_dnn = fluid.layers.fc(input=y_dnn, y_dnn = fluid.layers.fc(input=y_dnn,
size=s, size=s,
...@@ -152,7 +151,7 @@ class Model(ModelBase): ...@@ -152,7 +151,7 @@ class Model(ModelBase):
# ------------------- xDeepFM ------------------ # ------------------- xDeepFM ------------------
self.predict = fluid.layers.sigmoid(y_linear + y_cin + y_dnn) self.predict = fluid.layers.sigmoid(y_linear + y_cin + y_dnn)
def train_net(self): def train_net(self):
self.xdeepfm_net() self.xdeepfm_net()
...@@ -164,15 +163,15 @@ class Model(ModelBase): ...@@ -164,15 +163,15 @@ class Model(ModelBase):
predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1) predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1)
label_int = fluid.layers.cast(self.label, 'int64') label_int = fluid.layers.cast(self.label, 'int64')
auc_var, batch_auc_var, _ = fluid.layers.auc(input=predict_2d, auc_var, batch_auc_var, _ = fluid.layers.auc(input=predict_2d,
label=label_int, label=label_int,
slide_steps=0) slide_steps=0)
self._metrics["AUC"] = auc_var self._metrics["AUC"] = auc_var
self._metrics["BATCH_AUC"] = batch_auc_var self._metrics["BATCH_AUC"] = batch_auc_var
def optimizer(self): def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.learning_rate", None, self._namespace) learning_rate = envs.get_global_env("hyper_parameters.learning_rate", None, self._namespace)
optimizer = fluid.optimizer.Adam(learning_rate, lazy_mode=True) optimizer = fluid.optimizer.Adam(learning_rate, lazy_mode=True)
return optimizer return optimizer
def infer_net(self, parameter_list): def infer_net(self, parameter_list):
self.xdeepfm_net() self.xdeepfm_net()
\ No newline at end of file
#! /bin/bash #! /bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e set -e
echo "begin to download data" echo "begin to download data"
......
...@@ -11,10 +11,12 @@ ...@@ -11,10 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import io
import copy import copy
import random import random
import numpy as np
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -22,17 +24,17 @@ from paddlerec.core.utils import envs ...@@ -22,17 +24,17 @@ from paddlerec.core.utils import envs
class EvaluateReader(Reader): class EvaluateReader(Reader):
def init(self): def init(self):
self.batch_size = envs.get_global_env("batch_size", None, "evaluate.reader") self.batch_size = envs.get_global_env("batch_size", None, "evaluate.reader")
self.input = [] self.input = []
self.length = None self.length = None
def base_read(self, files): def base_read(self, files):
res = [] res = []
for f in files: for f in files:
with open(f, "r") as fin: with open(f, "r") as fin:
for line in fin: for line in fin:
line = line.strip().split('\t') line = line.strip().split('\t')
res.append(tuple([map(int, line[0].split(',')), int(line[1])])) res.append(tuple([map(int, line[0].split(',')), int(line[1])]))
return res return res
def make_data(self, cur_batch, batch_size): def make_data(self, cur_batch, batch_size):
...@@ -120,10 +122,11 @@ class EvaluateReader(Reader): ...@@ -120,10 +122,11 @@ class EvaluateReader(Reader):
else: else:
# Due to fixed batch_size, discard the remaining ins # Due to fixed batch_size, discard the remaining ins
return return
#cur_batch = remain_data[i:] # cur_batch = remain_data[i:]
#yield self.make_data(cur_batch, group_remain % batch_size) # yield self.make_data(cur_batch, group_remain % batch_size)
return _reader return _reader
def generate_batch_from_trainfiles(self, files): def generate_batch_from_trainfiles(self, files):
self.input = self.base_read(files) self.input = self.base_read(files)
self.length = len(self.input) self.length = len(self.input)
...@@ -132,4 +135,5 @@ class EvaluateReader(Reader): ...@@ -132,4 +135,5 @@ class EvaluateReader(Reader):
def generate_sample(self, line): def generate_sample(self, line):
def data_iter(): def data_iter():
yield [] yield []
return data_iter return data_iter
...@@ -12,8 +12,9 @@ ...@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import math import math
import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
...@@ -25,19 +26,19 @@ class Model(ModelBase): ...@@ -25,19 +26,19 @@ class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
self.init_config() self.init_config()
def init_config(self): def init_config(self):
self._fetch_interval = 1 self._fetch_interval = 1
self.items_num, self.ins_num = self.config_read(envs.get_global_env("hyper_parameters.config_path", None, self._namespace)) self.items_num, self.ins_num = self.config_read(
envs.get_global_env("hyper_parameters.config_path", None, self._namespace))
self.train_batch_size = envs.get_global_env("batch_size", None, "train.reader") self.train_batch_size = envs.get_global_env("batch_size", None, "train.reader")
self.evaluate_batch_size = envs.get_global_env("batch_size", None, "evaluate.reader") self.evaluate_batch_size = envs.get_global_env("batch_size", None, "evaluate.reader")
self.hidden_size = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace) self.hidden_size = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
self.step = envs.get_global_env("hyper_parameters.gnn_propogation_steps", None, self._namespace) self.step = envs.get_global_env("hyper_parameters.gnn_propogation_steps", None, self._namespace)
def config_read(self, config_path=None): def config_read(self, config_path=None):
if config_path is None: if config_path is None:
raise ValueError("please set train.model.hyper_parameters.config_path at first") raise ValueError("please set train.model.hyper_parameters.config_path at first")
with open(config_path, "r") as fin: with open(config_path, "r") as fin:
item_nums = int(fin.readline().strip()) item_nums = int(fin.readline().strip())
ins_nums = int(fin.readline().strip()) ins_nums = int(fin.readline().strip())
...@@ -47,49 +48,49 @@ class Model(ModelBase): ...@@ -47,49 +48,49 @@ class Model(ModelBase):
self.items = fluid.data( self.items = fluid.data(
name="items", name="items",
shape=[bs, -1], shape=[bs, -1],
dtype="int64") #[batch_size, uniq_max] dtype="int64") # [batch_size, uniq_max]
self.seq_index = fluid.data( self.seq_index = fluid.data(
name="seq_index", name="seq_index",
shape=[bs, -1, 2], shape=[bs, -1, 2],
dtype="int32") #[batch_size, seq_max, 2] dtype="int32") # [batch_size, seq_max, 2]
self.last_index = fluid.data( self.last_index = fluid.data(
name="last_index", name="last_index",
shape=[bs, 2], shape=[bs, 2],
dtype="int32") #[batch_size, 2] dtype="int32") # [batch_size, 2]
self.adj_in = fluid.data( self.adj_in = fluid.data(
name="adj_in", name="adj_in",
shape=[bs, -1, -1], shape=[bs, -1, -1],
dtype="float32") #[batch_size, seq_max, seq_max] dtype="float32") # [batch_size, seq_max, seq_max]
self.adj_out = fluid.data( self.adj_out = fluid.data(
name="adj_out", name="adj_out",
shape=[bs, -1, -1], shape=[bs, -1, -1],
dtype="float32") #[batch_size, seq_max, seq_max] dtype="float32") # [batch_size, seq_max, seq_max]
self.mask = fluid.data( self.mask = fluid.data(
name="mask", name="mask",
shape=[bs, -1, 1], shape=[bs, -1, 1],
dtype="float32") #[batch_size, seq_max, 1] dtype="float32") # [batch_size, seq_max, 1]
self.label = fluid.data( self.label = fluid.data(
name="label", name="label",
shape=[bs, 1], shape=[bs, 1],
dtype="int64") #[batch_size, 1] dtype="int64") # [batch_size, 1]
res = [self.items, self.seq_index, self.last_index, self.adj_in, self.adj_out, self.mask, self.label] res = [self.items, self.seq_index, self.last_index, self.adj_in, self.adj_out, self.mask, self.label]
return res return res
def train_input(self): def train_input(self):
res = self.input(self.train_batch_size) res = self.input(self.train_batch_size)
self._data_var = res self._data_var = res
use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader", False, self._namespace) use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader", False, self._namespace)
if self._platform != "LINUX" or use_dataloader: if self._platform != "LINUX" or use_dataloader:
self._data_loader = fluid.io.DataLoader.from_generator( self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=256, use_double_buffer=False, iterable=False) feed_list=self._data_var, capacity=256, use_double_buffer=False, iterable=False)
def net(self, items_num, hidden_size, step, bs): def net(self, items_num, hidden_size, step, bs):
stdv = 1.0 / math.sqrt(hidden_size) stdv = 1.0 / math.sqrt(hidden_size)
def embedding_layer(input, table_name, emb_dim, initializer_instance=None): def embedding_layer(input, table_name, emb_dim, initializer_instance=None):
emb = fluid.embedding( emb = fluid.embedding(
input=input, input=input,
size=[items_num, emb_dim], size=[items_num, emb_dim],
...@@ -97,10 +98,10 @@ class Model(ModelBase): ...@@ -97,10 +98,10 @@ class Model(ModelBase):
name=table_name, name=table_name,
initializer=initializer_instance), initializer=initializer_instance),
) )
return emb return emb
sparse_initializer = fluid.initializer.Uniform(low=-stdv, high=stdv) sparse_initializer = fluid.initializer.Uniform(low=-stdv, high=stdv)
items_emb = embedding_layer(self.items, "emb", hidden_size, sparse_initializer) items_emb = embedding_layer(self.items, "emb", hidden_size, sparse_initializer)
pre_state = items_emb pre_state = items_emb
for i in range(step): for i in range(step):
pre_state = layers.reshape(x=pre_state, shape=[bs, -1, hidden_size]) pre_state = layers.reshape(x=pre_state, shape=[bs, -1, hidden_size])
...@@ -113,7 +114,7 @@ class Model(ModelBase): ...@@ -113,7 +114,7 @@ class Model(ModelBase):
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv)), low=-stdv, high=stdv)),
bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, uniq_max, h] low=-stdv, high=stdv))) # [batch_size, uniq_max, h]
state_out = layers.fc( state_out = layers.fc(
input=pre_state, input=pre_state,
name="state_out", name="state_out",
...@@ -123,13 +124,13 @@ class Model(ModelBase): ...@@ -123,13 +124,13 @@ class Model(ModelBase):
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv)), low=-stdv, high=stdv)),
bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, uniq_max, h] low=-stdv, high=stdv))) # [batch_size, uniq_max, h]
state_adj_in = layers.matmul(self.adj_in, state_in) #[batch_size, uniq_max, h] state_adj_in = layers.matmul(self.adj_in, state_in) # [batch_size, uniq_max, h]
state_adj_out = layers.matmul(self.adj_out, state_out) #[batch_size, uniq_max, h] state_adj_out = layers.matmul(self.adj_out, state_out) # [batch_size, uniq_max, h]
gru_input = layers.concat([state_adj_in, state_adj_out], axis=2) gru_input = layers.concat([state_adj_in, state_adj_out], axis=2)
gru_input = layers.reshape(x=gru_input, shape=[-1, hidden_size * 2]) gru_input = layers.reshape(x=gru_input, shape=[-1, hidden_size * 2])
gru_fc = layers.fc( gru_fc = layers.fc(
input=gru_input, input=gru_input,
...@@ -140,11 +141,11 @@ class Model(ModelBase): ...@@ -140,11 +141,11 @@ class Model(ModelBase):
input=gru_fc, input=gru_fc,
hidden=layers.reshape(x=pre_state, shape=[-1, hidden_size]), hidden=layers.reshape(x=pre_state, shape=[-1, hidden_size]),
size=3 * hidden_size) size=3 * hidden_size)
final_state = layers.reshape(pre_state, shape=[bs, -1, hidden_size]) final_state = layers.reshape(pre_state, shape=[bs, -1, hidden_size])
seq = layers.gather_nd(final_state, self.seq_index) seq = layers.gather_nd(final_state, self.seq_index)
last = layers.gather_nd(final_state, self.last_index) last = layers.gather_nd(final_state, self.last_index)
seq_fc = layers.fc( seq_fc = layers.fc(
input=seq, input=seq,
name="seq_fc", name="seq_fc",
...@@ -154,7 +155,7 @@ class Model(ModelBase): ...@@ -154,7 +155,7 @@ class Model(ModelBase):
num_flatten_dims=2, num_flatten_dims=2,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, seq_max, h] low=-stdv, high=stdv))) # [batch_size, seq_max, h]
last_fc = layers.fc( last_fc = layers.fc(
input=last, input=last,
name="last_fc", name="last_fc",
...@@ -164,22 +165,22 @@ class Model(ModelBase): ...@@ -164,22 +165,22 @@ class Model(ModelBase):
num_flatten_dims=1, num_flatten_dims=1,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[bathc_size, h] low=-stdv, high=stdv))) # [bathc_size, h]
seq_fc_t = layers.transpose( seq_fc_t = layers.transpose(
seq_fc, perm=[1, 0, 2]) #[seq_max, batch_size, h] seq_fc, perm=[1, 0, 2]) # [seq_max, batch_size, h]
add = layers.elementwise_add( add = layers.elementwise_add(
seq_fc_t, last_fc) #[seq_max, batch_size, h] seq_fc_t, last_fc) # [seq_max, batch_size, h]
b = layers.create_parameter( b = layers.create_parameter(
shape=[hidden_size], shape=[hidden_size],
dtype='float32', dtype='float32',
default_initializer=fluid.initializer.Constant(value=0.0)) #[h] default_initializer=fluid.initializer.Constant(value=0.0)) # [h]
add = layers.elementwise_add(add, b) #[seq_max, batch_size, h] add = layers.elementwise_add(add, b) # [seq_max, batch_size, h]
add_sigmoid = layers.sigmoid(add) #[seq_max, batch_size, h] add_sigmoid = layers.sigmoid(add) # [seq_max, batch_size, h]
add_sigmoid = layers.transpose( add_sigmoid = layers.transpose(
add_sigmoid, perm=[1, 0, 2]) #[batch_size, seq_max, h] add_sigmoid, perm=[1, 0, 2]) # [batch_size, seq_max, h]
weight = layers.fc( weight = layers.fc(
input=add_sigmoid, input=add_sigmoid,
name="weight_fc", name="weight_fc",
...@@ -189,13 +190,13 @@ class Model(ModelBase): ...@@ -189,13 +190,13 @@ class Model(ModelBase):
bias_attr=False, bias_attr=False,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, seq_max, 1] low=-stdv, high=stdv))) # [batch_size, seq_max, 1]
weight *= self.mask weight *= self.mask
weight_mask = layers.elementwise_mul(seq, weight, axis=0) #[batch_size, seq_max, h] weight_mask = layers.elementwise_mul(seq, weight, axis=0) # [batch_size, seq_max, h]
global_attention = layers.reduce_sum(weight_mask, dim=1) #[batch_size, h] global_attention = layers.reduce_sum(weight_mask, dim=1) # [batch_size, h]
final_attention = layers.concat( final_attention = layers.concat(
[global_attention, last], axis=1) #[batch_size, 2*h] [global_attention, last], axis=1) # [batch_size, 2*h]
final_attention_fc = layers.fc( final_attention_fc = layers.fc(
input=final_attention, input=final_attention,
name="final_attention_fc", name="final_attention_fc",
...@@ -203,14 +204,14 @@ class Model(ModelBase): ...@@ -203,14 +204,14 @@ class Model(ModelBase):
bias_attr=False, bias_attr=False,
act=None, act=None,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, h] low=-stdv, high=stdv))) # [batch_size, h]
# all_vocab = layers.create_global_var( # all_vocab = layers.create_global_var(
# shape=[items_num - 1], # shape=[items_num - 1],
# value=0, # value=0,
# dtype="int64", # dtype="int64",
# persistable=True, # persistable=True,
# name="all_vocab") # name="all_vocab")
all_vocab = np.arange(1, items_num).reshape((-1)).astype('int32') all_vocab = np.arange(1, items_num).reshape((-1)).astype('int32')
all_vocab = fluid.layers.cast(x=fluid.layers.assign(all_vocab), dtype='int64') all_vocab = fluid.layers.cast(x=fluid.layers.assign(all_vocab), dtype='int64')
...@@ -220,13 +221,13 @@ class Model(ModelBase): ...@@ -220,13 +221,13 @@ class Model(ModelBase):
name="emb", name="emb",
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv)), low=-stdv, high=stdv)),
size=[items_num, hidden_size]) #[all_vocab, h] size=[items_num, hidden_size]) # [all_vocab, h]
logits = layers.matmul( logits = layers.matmul(
x=final_attention_fc, y=all_emb, x=final_attention_fc, y=all_emb,
transpose_y=True) #[batch_size, all_vocab] transpose_y=True) # [batch_size, all_vocab]
softmax = layers.softmax_with_cross_entropy( softmax = layers.softmax_with_cross_entropy(
logits=logits, label=self.label) #[batch_size, 1] logits=logits, label=self.label) # [batch_size, 1]
self.loss = layers.reduce_mean(softmax) # [1] self.loss = layers.reduce_mean(softmax) # [1]
self.acc = layers.accuracy(input=logits, label=self.label, k=20) self.acc = layers.accuracy(input=logits, label=self.label, k=20)
...@@ -249,7 +250,7 @@ class Model(ModelBase): ...@@ -249,7 +250,7 @@ class Model(ModelBase):
decay_steps = envs.get_global_env("hyper_parameters.decay_steps", None, self._namespace) decay_steps = envs.get_global_env("hyper_parameters.decay_steps", None, self._namespace)
decay_rate = envs.get_global_env("hyper_parameters.decay_rate", None, self._namespace) decay_rate = envs.get_global_env("hyper_parameters.decay_rate", None, self._namespace)
l2 = envs.get_global_env("hyper_parameters.l2", None, self._namespace) l2 = envs.get_global_env("hyper_parameters.l2", None, self._namespace)
optimizer = fluid.optimizer.Adam( optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay( learning_rate=fluid.layers.exponential_decay(
learning_rate=learning_rate, learning_rate=learning_rate,
decay_steps=decay_steps * step_per_epoch, decay_steps=decay_steps * step_per_epoch,
...@@ -257,18 +258,18 @@ class Model(ModelBase): ...@@ -257,18 +258,18 @@ class Model(ModelBase):
regularization=fluid.regularizer.L2DecayRegularizer( regularization=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=l2)) regularization_coeff=l2))
return optimizer return optimizer
def infer_input(self): def infer_input(self):
self._reader_namespace = "evaluate.reader" self._reader_namespace = "evaluate.reader"
res = self.input(self.evaluate_batch_size) res = self.input(self.evaluate_batch_size)
self._infer_data_var = res self._infer_data_var = res
self._infer_data_loader = fluid.io.DataLoader.from_generator( self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False) feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
def infer_net(self): def infer_net(self):
self.infer_input() self.infer_input()
self.net(self.items_num, self.hidden_size, self.step, self.evaluate_batch_size) self.net(self.items_num, self.hidden_size, self.step, self.evaluate_batch_size)
self._infer_results['acc'] = self.acc self._infer_results['acc'] = self.acc
self._infer_results['loss'] = self.loss self._infer_results['loss'] = self.loss
...@@ -11,10 +11,12 @@ ...@@ -11,10 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import io
import copy import copy
import random import random
import numpy as np
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -22,17 +24,17 @@ from paddlerec.core.utils import envs ...@@ -22,17 +24,17 @@ from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
self.batch_size = envs.get_global_env("batch_size", None, "train.reader") self.batch_size = envs.get_global_env("batch_size", None, "train.reader")
self.input = [] self.input = []
self.length = None self.length = None
def base_read(self, files): def base_read(self, files):
res = [] res = []
for f in files: for f in files:
with open(f, "r") as fin: with open(f, "r") as fin:
for line in fin: for line in fin:
line = line.strip().split('\t') line = line.strip().split('\t')
res.append(tuple([map(int, line[0].split(',')), int(line[1])])) res.append(tuple([map(int, line[0].split(',')), int(line[1])]))
return res return res
def make_data(self, cur_batch, batch_size): def make_data(self, cur_batch, batch_size):
...@@ -120,10 +122,11 @@ class TrainReader(Reader): ...@@ -120,10 +122,11 @@ class TrainReader(Reader):
else: else:
# Due to fixed batch_size, discard the remaining ins # Due to fixed batch_size, discard the remaining ins
return return
#cur_batch = remain_data[i:] # cur_batch = remain_data[i:]
#yield self.make_data(cur_batch, group_remain % batch_size) # yield self.make_data(cur_batch, group_remain % batch_size)
return _reader return _reader
def generate_batch_from_trainfiles(self, files): def generate_batch_from_trainfiles(self, files):
self.input = self.base_read(files) self.input = self.base_read(files)
self.length = len(self.input) self.length = len(self.input)
...@@ -132,4 +135,5 @@ class TrainReader(Reader): ...@@ -132,4 +135,5 @@ class TrainReader(Reader):
def generate_sample(self, line): def generate_sample(self, line):
def data_iter(): def data_iter():
yield [] yield []
return data_iter return data_iter
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -87,10 +86,8 @@ class Model(ModelBase): ...@@ -87,10 +86,8 @@ class Model(ModelBase):
self._metrics["cost"] = avg_cost self._metrics["cost"] = avg_cost
self._metrics["acc"] = acc self._metrics["acc"] = acc
def train_net(self): def train_net(self):
self.all_vocab_network() self.all_vocab_network()
def infer_net(self): def infer_net(self):
self.all_vocab_network(is_infer=True) self.all_vocab_network(is_infer=True)
...@@ -11,10 +11,10 @@ ...@@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class EvaluateReader(Reader): class EvaluateReader(Reader):
......
...@@ -11,10 +11,10 @@ ...@@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
......
...@@ -12,15 +12,12 @@ ...@@ -12,15 +12,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
import paddle.fluid.layers.tensor as tensor import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.io as io
import paddle.fluid.layers.control_flow as cf import paddle.fluid.layers.control_flow as cf
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
class BowEncoder(object): class BowEncoder(object):
...@@ -54,6 +51,7 @@ class GrnnEncoder(object): ...@@ -54,6 +51,7 @@ class GrnnEncoder(object):
bias_attr=self.param_name + ".bias") bias_attr=self.param_name + ".bias")
return fluid.layers.sequence_pool(input=gru_h, pool_type='max') return fluid.layers.sequence_pool(input=gru_h, pool_type='max')
class PairwiseHingeLoss(object): class PairwiseHingeLoss(object):
def __init__(self, margin=0.8): def __init__(self, margin=0.8):
self.margin = margin self.margin = margin
...@@ -70,6 +68,7 @@ class PairwiseHingeLoss(object): ...@@ -70,6 +68,7 @@ class PairwiseHingeLoss(object):
loss_part2) loss_part2)
return loss_part3 return loss_part3
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
...@@ -80,7 +79,6 @@ class Model(ModelBase): ...@@ -80,7 +79,6 @@ class Model(ModelBase):
return correct return correct
def train(self): def train(self):
vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace) vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace)
emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, self._namespace) emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, self._namespace)
hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, self._namespace) hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, self._namespace)
...@@ -124,16 +122,14 @@ class Model(ModelBase): ...@@ -124,16 +122,14 @@ class Model(ModelBase):
hinge_loss = self.pairwise_hinge_loss.forward(cos_pos, cos_neg) hinge_loss = self.pairwise_hinge_loss.forward(cos_pos, cos_neg)
avg_cost = fluid.layers.mean(hinge_loss) avg_cost = fluid.layers.mean(hinge_loss)
correct = self.get_correct(cos_neg, cos_pos) correct = self.get_correct(cos_neg, cos_pos)
self._cost = avg_cost self._cost = avg_cost
self._metrics["correct"] = correct self._metrics["correct"] = correct
self._metrics["hinge_loss"] = hinge_loss self._metrics["hinge_loss"] = hinge_loss
def train_net(self): def train_net(self):
self.train() self.train()
def infer(self): def infer(self):
vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace) vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace)
emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, self._namespace) emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, self._namespace)
...@@ -146,7 +142,7 @@ class Model(ModelBase): ...@@ -146,7 +142,7 @@ class Model(ModelBase):
pos_label = fluid.data(name="pos_label", shape=[None, 1], dtype="int64") pos_label = fluid.data(name="pos_label", shape=[None, 1], dtype="int64")
self._infer_data_var = [user_data, all_item_data, pos_label] self._infer_data_var = [user_data, all_item_data, pos_label]
self._infer_data_loader = fluid.io.DataLoader.from_generator( self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False) feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
user_emb = fluid.embedding( user_emb = fluid.embedding(
input=user_data, size=[vocab_size, emb_dim], param_attr="emb.item") input=user_data, size=[vocab_size, emb_dim], param_attr="emb.item")
...@@ -173,6 +169,5 @@ class Model(ModelBase): ...@@ -173,6 +169,5 @@ class Model(ModelBase):
self._infer_results['recall20'] = acc self._infer_results['recall20'] = acc
def infer_net(self): def infer_net(self):
self.infer() self.infer()
...@@ -11,19 +11,19 @@ ...@@ -11,19 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import numpy as np
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
import random
import numpy as np
class EvaluateReader(Reader): class EvaluateReader(Reader):
def init(self): def init(self):
self.vocab_size = envs.get_global_env("vocab_size", 10, "train.model.hyper_parameters") self.vocab_size = envs.get_global_env("vocab_size", 10, "train.model.hyper_parameters")
def generate_sample(self, line): def generate_sample(self, line):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
...@@ -39,6 +39,6 @@ class EvaluateReader(Reader): ...@@ -39,6 +39,6 @@ class EvaluateReader(Reader):
src = conv_ids[:boundary] src = conv_ids[:boundary]
pos_tgt = [conv_ids[boundary]] pos_tgt = [conv_ids[boundary]]
feature_name = ["user", "all_item", "p_item"] feature_name = ["user", "all_item", "p_item"]
yield zip(feature_name, [src] + [np.arange(self.vocab_size).astype("int64").tolist()]+ [pos_tgt]) yield zip(feature_name, [src] + [np.arange(self.vocab_size).astype("int64").tolist()] + [pos_tgt])
return reader return reader
...@@ -11,12 +11,13 @@ ...@@ -11,12 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import random import random
from paddlerec.core.reader import Reader
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
...@@ -25,7 +26,6 @@ class TrainReader(Reader): ...@@ -25,7 +26,6 @@ class TrainReader(Reader):
def sample_neg_from_seq(self, seq): def sample_neg_from_seq(self, seq):
return seq[random.randint(0, len(seq) - 1)] return seq[random.randint(0, len(seq) - 1)]
def generate_sample(self, line): def generate_sample(self, line):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
......
#! /bin/bash #! /bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# download train_data # download train_data
mkdir raw_data mkdir raw_data
wget --no-check-certificate https://paddlerec.bj.bcebos.com/word2vec/1-billion-word-language-modeling-benchmark-r13output.tar wget --no-check-certificate https://paddlerec.bj.bcebos.com/word2vec/1-billion-word-language-modeling-benchmark-r13output.tar
......
...@@ -13,13 +13,15 @@ ...@@ -13,13 +13,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import io
import math
import os import os
import random import random
import re import re
import six import six
import argparse import argparse
import io
import math
prog = re.compile("[^a-z ]", flags=0) prog = re.compile("[^a-z ]", flags=0)
...@@ -73,7 +75,7 @@ def parse_args(): ...@@ -73,7 +75,7 @@ def parse_args():
def text_strip(text): def text_strip(text):
#English Preprocess Rule # English Preprocess Rule
return prog.sub("", text.lower()) return prog.sub("", text.lower())
...@@ -115,7 +117,7 @@ def filter_corpus(args): ...@@ -115,7 +117,7 @@ def filter_corpus(args):
word_all_count = 0 word_all_count = 0
id_counts = [] id_counts = []
word_id = 0 word_id = 0
#read dict # read dict
with io.open(args.dict_path, 'r', encoding='utf-8') as f: with io.open(args.dict_path, 'r', encoding='utf-8') as f:
for line in f: for line in f:
word, count = line.split()[0], int(line.split()[1]) word, count = line.split()[0], int(line.split()[1])
...@@ -125,13 +127,13 @@ def filter_corpus(args): ...@@ -125,13 +127,13 @@ def filter_corpus(args):
id_counts.append(count) id_counts.append(count)
word_all_count += count word_all_count += count
#write word2id file # write word2id file
print("write word2id file to : " + args.dict_path + "_word_to_id_") print("write word2id file to : " + args.dict_path + "_word_to_id_")
with io.open( with io.open(
args.dict_path + "_word_to_id_", 'w+', encoding='utf-8') as fid: args.dict_path + "_word_to_id_", 'w+', encoding='utf-8') as fid:
for k, v in word_to_id_.items(): for k, v in word_to_id_.items():
fid.write(k + " " + str(v) + '\n') fid.write(k + " " + str(v) + '\n')
#filter corpus and convert id # filter corpus and convert id
if not os.path.exists(args.output_corpus_dir): if not os.path.exists(args.output_corpus_dir):
os.makedirs(args.output_corpus_dir) os.makedirs(args.output_corpus_dir)
for file in os.listdir(args.input_corpus_dir): for file in os.listdir(args.input_corpus_dir):
...@@ -152,9 +154,9 @@ def filter_corpus(args): ...@@ -152,9 +154,9 @@ def filter_corpus(args):
count_w = id_counts[idx] count_w = id_counts[idx]
corpus_size = word_all_count corpus_size = word_all_count
keep_prob = ( keep_prob = (
math.sqrt(count_w / math.sqrt(count_w /
(args.downsample * corpus_size)) + 1 (args.downsample * corpus_size)) + 1
) * (args.downsample * corpus_size) / count_w ) * (args.downsample * corpus_size) / count_w
r_value = random.random() r_value = random.random()
if r_value > keep_prob: if r_value > keep_prob:
continue continue
...@@ -200,7 +202,7 @@ def build_dict(args): ...@@ -200,7 +202,7 @@ def build_dict(args):
for item in item_to_remove: for item in item_to_remove:
unk_sum += word_count[item] unk_sum += word_count[item]
del word_count[item] del word_count[item]
#sort by count # sort by count
word_count[native_to_unicode('<UNK>')] = unk_sum word_count[native_to_unicode('<UNK>')] = unk_sum
word_count = sorted( word_count = sorted(
word_count.items(), key=lambda word_count: -word_count[1]) word_count.items(), key=lambda word_count: -word_count[1])
...@@ -222,17 +224,18 @@ def data_split(args): ...@@ -222,17 +224,18 @@ def data_split(args):
for file_ in files: for file_ in files:
with open(os.path.join(raw_data_dir, file_), 'r') as f: with open(os.path.join(raw_data_dir, file_), 'r') as f:
contents.extend(f.readlines()) contents.extend(f.readlines())
num = int(args.file_nums) num = int(args.file_nums)
lines_per_file = len(contents) / num lines_per_file = len(contents) / num
print("contents: ", str(len(contents))) print("contents: ", str(len(contents)))
print("lines_per_file: ", str(lines_per_file)) print("lines_per_file: ", str(lines_per_file))
for i in range(1, num+1): for i in range(1, num + 1):
with open(os.path.join(new_data_dir, "part_" + str(i)), 'w') as fout: with open(os.path.join(new_data_dir, "part_" + str(i)), 'w') as fout:
data = contents[(i-1)*lines_per_file:min(i*lines_per_file,len(contents))] data = contents[(i - 1) * lines_per_file:min(i * lines_per_file, len(contents))]
for line in data: for line in data:
fout.write(line) fout.write(line)
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = parse_args()
......
...@@ -11,16 +11,18 @@ ...@@ -11,16 +11,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import io import io
import six import six
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
class EvaluateReader(Reader): class EvaluateReader(Reader):
def init(self): def init(self):
dict_path = envs.get_global_env("word_id_dict_path", None, "evaluate.reader") dict_path = envs.get_global_env("word_id_dict_path", None, "evaluate.reader")
self.word_to_id = dict() self.word_to_id = dict()
self.id_to_word = dict() self.id_to_word = dict()
with io.open(dict_path, 'r', encoding='utf-8') as f: with io.open(dict_path, 'r', encoding='utf-8') as f:
...@@ -46,19 +48,16 @@ class EvaluateReader(Reader): ...@@ -46,19 +48,16 @@ class EvaluateReader(Reader):
if isinstance(s, str): if isinstance(s, str):
return True return True
return False return False
def _to_unicode(self, s, ignore_errors=False): def _to_unicode(self, s, ignore_errors=False):
if self._is_unicode(s): if self._is_unicode(s):
return s return s
error_mode = "ignore" if ignore_errors else "strict" error_mode = "ignore" if ignore_errors else "strict"
return s.decode("utf-8", errors=error_mode) return s.decode("utf-8", errors=error_mode)
def strip_lines(self, line, vocab): def strip_lines(self, line, vocab):
return self._replace_oov(vocab, self.native_to_unicode(line)) return self._replace_oov(vocab, self.native_to_unicode(line))
def _replace_oov(self, original_vocab, line): def _replace_oov(self, original_vocab, line):
"""Replace out-of-vocab words with "<UNK>". """Replace out-of-vocab words with "<UNK>".
This maintains compatibility with published results. This maintains compatibility with published results.
...@@ -76,5 +75,7 @@ class EvaluateReader(Reader): ...@@ -76,5 +75,7 @@ class EvaluateReader(Reader):
def reader(): def reader():
features = self.strip_lines(line.lower(), self.word_to_id) features = self.strip_lines(line.lower(), self.word_to_id)
features = features.split() features = features.split()
yield [('analogy_a', [self.word_to_id[features[0]]]), ('analogy_b', [self.word_to_id[features[1]]]), ('analogy_c', [self.word_to_id[features[2]]]), ('analogy_d', [self.word_to_id[features[3]]])] yield [('analogy_a', [self.word_to_id[features[0]]]), ('analogy_b', [self.word_to_id[features[1]]]),
('analogy_c', [self.word_to_id[features[2]]]), ('analogy_d', [self.word_to_id[features[3]]])]
return reader return reader
...@@ -11,8 +11,11 @@ ...@@ -11,8 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import io import io
import numpy as np
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -37,7 +40,7 @@ class NumpyRandomInt(object): ...@@ -37,7 +40,7 @@ class NumpyRandomInt(object):
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
dict_path = envs.get_global_env("word_count_dict_path", None, "train.reader") dict_path = envs.get_global_env("word_count_dict_path", None, "train.reader")
self.window_size = envs.get_global_env("hyper_parameters.window_size", None, "train.model") self.window_size = envs.get_global_env("hyper_parameters.window_size", None, "train.model")
self.neg_num = envs.get_global_env("hyper_parameters.neg_num", None, "train.model") self.neg_num = envs.get_global_env("hyper_parameters.neg_num", None, "train.model")
self.with_shuffle_batch = envs.get_global_env("hyper_parameters.with_shuffle_batch", None, "train.model") self.with_shuffle_batch = envs.get_global_env("hyper_parameters.with_shuffle_batch", None, "train.model")
...@@ -72,7 +75,7 @@ class TrainReader(Reader): ...@@ -72,7 +75,7 @@ class TrainReader(Reader):
start_point = 0 start_point = 0
end_point = idx + target_window end_point = idx + target_window
targets = words[start_point:idx] + words[idx + 1:end_point + 1] targets = words[start_point:idx] + words[idx + 1:end_point + 1]
return targets return targets
def generate_sample(self, line): def generate_sample(self, line):
def reader(): def reader():
...@@ -84,7 +87,7 @@ class TrainReader(Reader): ...@@ -84,7 +87,7 @@ class TrainReader(Reader):
output = [('input_word', [int(target_id)]), ('true_label', [int(context_id)])] output = [('input_word', [int(target_id)]), ('true_label', [int(context_id)])]
if not self.with_shuffle_batch: if not self.with_shuffle_batch:
neg_array = self.cs.searchsorted(np.random.sample(self.neg_num)) neg_array = self.cs.searchsorted(np.random.sample(self.neg_num))
output += [('neg_label', [int(str(i)) for i in neg_array ])] output += [('neg_label', [int(str(i)) for i in neg_array])]
yield output yield output
return reader
return reader
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
import paddle.fluid as fluid import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
...@@ -134,7 +134,7 @@ class Model(ModelBase): ...@@ -134,7 +134,7 @@ class Model(ModelBase):
sample_nodes_emb = [ sample_nodes_emb = [
fluid.layers.reshape(sample_nodes_emb[i], fluid.layers.reshape(sample_nodes_emb[i],
[-1, self.neg_sampling_list[i] + [-1, self.neg_sampling_list[i] +
self.output_positive, self.node_emb_size] self.output_positive, self.node_emb_size]
) for i in range(self.max_layers) ) for i in range(self.max_layers)
] ]
...@@ -229,7 +229,7 @@ class Model(ModelBase): ...@@ -229,7 +229,7 @@ class Model(ModelBase):
act=self.act, act=self.act,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="trans.layer_fc.weight." + str(i)), name="trans.layer_fc.weight." + str(i)),
bias_attr=fluid.ParamAttr(name="trans.layer_fc.bias."+str(i)), bias_attr=fluid.ParamAttr(name="trans.layer_fc.bias." + str(i)),
) for i in range(self.max_layers) ) for i in range(self.max_layers)
] ]
...@@ -268,8 +268,8 @@ class Model(ModelBase): ...@@ -268,8 +268,8 @@ class Model(ModelBase):
num_flatten_dims=2, num_flatten_dims=2,
act=self.act, act=self.act,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="cls.concat_fc.weight."+str(i)), name="cls.concat_fc.weight." + str(i)),
bias_attr=fluid.ParamAttr(name="cls.concat_fc.bias."+str(i)) bias_attr=fluid.ParamAttr(name="cls.concat_fc.bias." + str(i))
) for i in range(self.max_layers) ) for i in range(self.max_layers)
] ]
...@@ -348,7 +348,7 @@ class Model(ModelBase): ...@@ -348,7 +348,7 @@ class Model(ModelBase):
current_layer_node_num = self.first_layer_node.shape[1] current_layer_node_num = self.first_layer_node.shape[1]
else: else:
current_layer_node_num = current_layer_node.shape[1] * \ current_layer_node_num = current_layer_node.shape[1] * \
current_layer_node.shape[2] current_layer_node.shape[2]
current_layer_node = fluid.layers.reshape( current_layer_node = fluid.layers.reshape(
current_layer_node, [-1, current_layer_node_num]) current_layer_node, [-1, current_layer_node_num])
...@@ -458,7 +458,7 @@ class Model(ModelBase): ...@@ -458,7 +458,7 @@ class Model(ModelBase):
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="trans.layer_fc.weight." + str(layer_idx)), name="trans.layer_fc.weight." + str(layer_idx)),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
name="trans.layer_fc.bias."+str(layer_idx)), name="trans.layer_fc.bias." + str(layer_idx)),
) )
return input_layer_fc_out return input_layer_fc_out
...@@ -479,6 +479,6 @@ class Model(ModelBase): ...@@ -479,6 +479,6 @@ class Model(ModelBase):
num_flatten_dims=2, num_flatten_dims=2,
act=self.act, act=self.act,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="cls.concat_fc.weight."+str(layer_idx)), name="cls.concat_fc.weight." + str(layer_idx)),
bias_attr=fluid.ParamAttr(name="cls.concat_fc.bias."+str(layer_idx))) bias_attr=fluid.ParamAttr(name="cls.concat_fc.bias." + str(layer_idx)))
return hidden_states_fc return hidden_states_fc
...@@ -28,6 +28,7 @@ class EvaluateReader(Reader): ...@@ -28,6 +28,7 @@ class EvaluateReader(Reader):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
""" """
def reader(): def reader():
""" """
This function needs to be implemented by the user, based on data format This function needs to be implemented by the user, based on data format
......
...@@ -28,6 +28,7 @@ class TrainReader(Reader): ...@@ -28,6 +28,7 @@ class TrainReader(Reader):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
""" """
def reader(): def reader():
""" """
This function needs to be implemented by the user, based on data format This function needs to be implemented by the user, based on data format
......
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import argparse
import os import os
import subprocess import subprocess
import tempfile
import argparse
import tempfile
import yaml import yaml
from paddlerec.core.factory import TrainerFactory from paddlerec.core.factory import TrainerFactory
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" """
setup for paddle-rec. setup for paddle-rec.
""" """
import os import os
from setuptools import setup, find_packages from setuptools import setup, find_packages
import tempfile
import shutil import shutil
import tempfile
requires = [ requires = [
"paddlepaddle == 1.7.2", "paddlepaddle == 1.7.2",
...@@ -19,7 +36,7 @@ about["__author__"] = "paddle-dev" ...@@ -19,7 +36,7 @@ about["__author__"] = "paddle-dev"
about["__author_email__"] = "paddle-dev@baidu.com" about["__author_email__"] = "paddle-dev@baidu.com"
about["__url__"] = "https://github.com/PaddlePaddle/PaddleRec" about["__url__"] = "https://github.com/PaddlePaddle/PaddleRec"
readme = "..." readme = ""
def run_cmd(command): def run_cmd(command):
......
...@@ -12,15 +12,16 @@ ...@@ -12,15 +12,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import functools
import os import os
import time import platform
import sys
import shutil import shutil
import time
import requests import requests
import sys
import tarfile import tarfile
import zipfile import zipfile
import platform
import functools
lasttime = time.time() lasttime = time.time()
FLUSH_INTERVAL = 0.1 FLUSH_INTERVAL = 0.1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册