提交 76471fca 编写于 作者: F frankwhzhang

Merge branch 'develop' of http://gitlab.baidu.com/tangwei12/paddlerec into develop

#!/bin/bash #!/bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################### ###################################################
# Usage: submit.sh # Usage: submit.sh
......
...@@ -15,10 +15,9 @@ ...@@ -15,10 +15,9 @@
from __future__ import print_function from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
import subprocess
import sys
import os
import copy import copy
import os
import subprocess
from paddlerec.core.engine.engine import Engine from paddlerec.core.engine.engine import Engine
from paddlerec.core.factory import TrainerFactory from paddlerec.core.factory import TrainerFactory
......
...@@ -29,4 +29,3 @@ class Engine: ...@@ -29,4 +29,3 @@ class Engine:
@abc.abstractmethod @abc.abstractmethod
def run(self): def run(self):
pass pass
...@@ -14,10 +14,11 @@ ...@@ -14,10 +14,11 @@
from __future__ import print_function from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
import subprocess
import sys
import os
import copy import copy
import os
import sys
import subprocess
from paddlerec.core.engine.engine import Engine from paddlerec.core.engine.engine import Engine
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
......
...@@ -14,10 +14,11 @@ ...@@ -14,10 +14,11 @@
from __future__ import print_function from __future__ import print_function
from __future__ import unicode_literals from __future__ import unicode_literals
import subprocess
import sys
import os
import copy import copy
import os
import sys
import subprocess
from paddlerec.core.engine.engine import Engine from paddlerec.core.engine.engine import Engine
......
...@@ -25,17 +25,8 @@ class Layer(object): ...@@ -25,17 +25,8 @@ class Layer(object):
""" """
pass pass
def generate(self, mode, param):
"""R
"""
if mode == 'fluid':
return self.generate_fluid(param)
print('unsupport this mode: ' + mode)
return None, None
@abc.abstractmethod @abc.abstractmethod
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
pass pass
...@@ -53,7 +53,7 @@ class Metric(object): ...@@ -53,7 +53,7 @@ class Metric(object):
pass pass
@abc.abstractmethod @abc.abstractmethod
def get_result_to_string(self): def __str__(self):
""" """
Return: Return:
result(string) : calculate result with string format, for output result(string) : calculate result with string format, for output
......
...@@ -13,8 +13,10 @@ ...@@ -13,8 +13,10 @@
# limitations under the License. # limitations under the License.
import math import math
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.metric import Metric from paddlerec.core.metric import Metric
...@@ -198,7 +200,7 @@ class AUCMetric(Metric): ...@@ -198,7 +200,7 @@ class AUCMetric(Metric):
""" """ """ """
return self._result return self._result
def get_result_to_string(self): def __str__(self):
""" """ """ """
result = self.get_result() result = self.get_result()
result_str = "%s AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f RMSE=%.6f " \ result_str = "%s AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f RMSE=%.6f " \
......
...@@ -47,7 +47,7 @@ class Model(object): ...@@ -47,7 +47,7 @@ class Model(object):
def get_infer_results(self): def get_infer_results(self):
return self._infer_results return self._infer_results
def get_cost_op(self): def get_avg_cost(self):
"""R """R
""" """
return self._cost return self._cost
......
...@@ -12,10 +12,11 @@ ...@@ -12,10 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import yaml
import copy import copy
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
import yaml
from paddlerec.core.model import Model from paddlerec.core.model import Model
from paddlerec.core.utils import table from paddlerec.core.utils import table
......
...@@ -13,10 +13,11 @@ ...@@ -13,10 +13,11 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.layer import Layer from paddlerec.core.layer import Layer
class EmbeddingInputLayer(Layer): class EmbeddingFuseLayer(Layer):
"""R """R
""" """
...@@ -31,7 +32,7 @@ class EmbeddingInputLayer(Layer): ...@@ -31,7 +32,7 @@ class EmbeddingInputLayer(Layer):
self._emb_dim = self._mf_dim + 3 # append show ctr lr self._emb_dim = self._mf_dim + 3 # append show ctr lr
self._emb_layers = [] self._emb_layers = []
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
show_clk = fluid.layers.concat( show_clk = fluid.layers.concat(
...@@ -63,7 +64,7 @@ class LabelInputLayer(Layer): ...@@ -63,7 +64,7 @@ class LabelInputLayer(Layer):
self._data_type = config.get('data_type', "int64") self._data_type = config.get('data_type', "int64")
self._label_idx = config['label_idx'] self._label_idx = config['label_idx']
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
label = fluid.layers.data(name=self._name, shape=[-1, self._dim], \ label = fluid.layers.data(name=self._name, shape=[-1, self._dim], \
...@@ -85,7 +86,7 @@ class TagInputLayer(Layer): ...@@ -85,7 +86,7 @@ class TagInputLayer(Layer):
self._dim = config.get('dim', 1) self._dim = config.get('dim', 1)
self._data_type = config['data_type'] self._data_type = config['data_type']
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
output = fluid.layers.data(name=self._name, shape=[-1, self._dim], \ output = fluid.layers.data(name=self._name, shape=[-1, self._dim], \
...@@ -107,7 +108,7 @@ class ParamLayer(Layer): ...@@ -107,7 +108,7 @@ class ParamLayer(Layer):
self._data_type = config.get('data_type', 'float32') self._data_type = config.get('data_type', 'float32')
self._config = config self._config = config
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
return self._config, {'inference_param': {'name': 'param', 'params': [], 'table_id': self._table_id}} return self._config, {'inference_param': {'name': 'param', 'params': [], 'table_id': self._table_id}}
...@@ -125,7 +126,7 @@ class SummaryLayer(Layer): ...@@ -125,7 +126,7 @@ class SummaryLayer(Layer):
self._data_type = config.get('data_type', 'float32') self._data_type = config.get('data_type', 'float32')
self._config = config self._config = config
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
return self._config, {'inference_param': {'name': 'summary', 'params': [], 'table_id': self._table_id}} return self._config, {'inference_param': {'name': 'summary', 'params': [], 'table_id': self._table_id}}
...@@ -143,7 +144,7 @@ class NormalizetionLayer(Layer): ...@@ -143,7 +144,7 @@ class NormalizetionLayer(Layer):
self._summary = config['summary'] self._summary = config['summary']
self._table_id = config.get('table_id', -1) self._table_id = config.get('table_id', -1)
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
input_layer = param['layer'][self._input[0]] input_layer = param['layer'][self._input[0]]
...@@ -158,7 +159,7 @@ class NormalizetionLayer(Layer): ...@@ -158,7 +159,7 @@ class NormalizetionLayer(Layer):
'params': inference_param, 'table_id': summary_layer.get('table_id', -1)}} 'params': inference_param, 'table_id': summary_layer.get('table_id', -1)}}
class NeuralLayer(Layer): class FCLayer(Layer):
"""R """R
""" """
...@@ -171,7 +172,7 @@ class NeuralLayer(Layer): ...@@ -171,7 +172,7 @@ class NeuralLayer(Layer):
self._bias = config.get('bias', True) self._bias = config.get('bias', True)
self._act_func = config.get('act_func', None) self._act_func = config.get('act_func', None)
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
param_layer = param['layer'][self._param] param_layer = param['layer'][self._param]
...@@ -199,7 +200,7 @@ class NeuralLayer(Layer): ...@@ -199,7 +200,7 @@ class NeuralLayer(Layer):
'table_id': param_layer.get('table_id', -1)}} 'table_id': param_layer.get('table_id', -1)}}
class SigmoidLossLayer(Layer): class LogLossLayer(Layer):
"""R """R
""" """
...@@ -230,7 +231,7 @@ class SigmoidLossLayer(Layer): ...@@ -230,7 +231,7 @@ class SigmoidLossLayer(Layer):
} }
} }
def generate_fluid(self, param): def generate(self, param):
"""R """R
""" """
input_layer = param['layer'][self._input[0]] input_layer = param['layer'][self._input[0]]
......
...@@ -12,14 +12,14 @@ ...@@ -12,14 +12,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os
import sys
import abc import abc
import os
import time import time
import sys
import yaml import yaml
from paddle import fluid from paddle import fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
trainer implement.
↗ (single/cluster) CtrTrainer
Trainer
↗ (for single training) SingleTrainer/TDMSingleTrainer
↘ TranspilerTrainer → (for cluster training) ClusterTrainer/TDMClusterTrainer
↘ (for online learning training) OnlineLearningTrainer
"""
...@@ -25,7 +25,6 @@ import paddle.fluid as fluid ...@@ -25,7 +25,6 @@ import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory
from paddle.fluid.incubate.fleet.base.role_maker import PaddleCloudRoleMaker from paddle.fluid.incubate.fleet.base.role_maker import PaddleCloudRoleMaker
from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
...@@ -83,7 +82,7 @@ class ClusterTrainer(TranspileTrainer): ...@@ -83,7 +82,7 @@ class ClusterTrainer(TranspileTrainer):
strategy = self.build_strategy() strategy = self.build_strategy()
optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer = fleet.distributed_optimizer(optimizer, strategy)
optimizer.minimize(self.model.get_cost_op()) optimizer.minimize(self.model.get_avg_cost())
if fleet.is_server(): if fleet.is_server():
context['status'] = 'server_pass' context['status'] = 'server_pass'
...@@ -115,7 +114,7 @@ class ClusterTrainer(TranspileTrainer): ...@@ -115,7 +114,7 @@ class ClusterTrainer(TranspileTrainer):
program = fluid.compiler.CompiledProgram( program = fluid.compiler.CompiledProgram(
fleet.main_program).with_data_parallel( fleet.main_program).with_data_parallel(
loss_name=self.model.get_cost_op().name, loss_name=self.model.get_avg_cost().name,
build_strategy=self.strategy.get_build_strategy(), build_strategy=self.strategy.get_build_strategy(),
exec_strategy=self.strategy.get_execute_strategy()) exec_strategy=self.strategy.get_execute_strategy())
......
...@@ -11,9 +11,10 @@ ...@@ -11,9 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os import os
import numpy as np
import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker
...@@ -22,7 +23,7 @@ from paddlerec.core.utils import envs ...@@ -22,7 +23,7 @@ from paddlerec.core.utils import envs
from paddlerec.core.trainer import Trainer from paddlerec.core.trainer import Trainer
class CtrPaddleTrainer(Trainer): class CtrTrainer(Trainer):
"""R """R
""" """
...@@ -87,7 +88,7 @@ class CtrPaddleTrainer(Trainer): ...@@ -87,7 +88,7 @@ class CtrPaddleTrainer(Trainer):
optimizer = self.model.optimizer() optimizer = self.model.optimizer()
optimizer = fleet.distributed_optimizer(optimizer, strategy={"use_cvm": False}) optimizer = fleet.distributed_optimizer(optimizer, strategy={"use_cvm": False})
optimizer.minimize(self.model.get_cost_op()) optimizer.minimize(self.model.get_avg_cost())
if fleet.is_server(): if fleet.is_server():
context['status'] = 'server_pass' context['status'] = 'server_pass'
......
...@@ -13,12 +13,12 @@ ...@@ -13,12 +13,12 @@
# limitations under the License. # limitations under the License.
import datetime
import json
import sys import sys
import time import time
import json
import datetime
import numpy as np
import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
...@@ -72,7 +72,7 @@ def worker_numric_max(value, env="mpi"): ...@@ -72,7 +72,7 @@ def worker_numric_max(value, env="mpi"):
return wroker_numric_opt(value, env, "max") return wroker_numric_opt(value, env, "max")
class CtrPaddleTrainer(Trainer): class CtrTrainer(Trainer):
"""R """R
""" """
...@@ -129,7 +129,7 @@ class CtrPaddleTrainer(Trainer): ...@@ -129,7 +129,7 @@ class CtrPaddleTrainer(Trainer):
model = self._exector_context[executor['name']]['model'] model = self._exector_context[executor['name']]['model']
self._metrics.update(model.get_metrics()) self._metrics.update(model.get_metrics())
runnnable_scope.append(scope) runnnable_scope.append(scope)
runnnable_cost_op.append(model.get_cost_op()) runnnable_cost_op.append(model.get_avg_cost())
for var in model._data_var: for var in model._data_var:
if var.name in data_var_name_dict: if var.name in data_var_name_dict:
continue continue
...@@ -146,7 +146,7 @@ class CtrPaddleTrainer(Trainer): ...@@ -146,7 +146,7 @@ class CtrPaddleTrainer(Trainer):
model = self._exector_context[executor['name']]['model'] model = self._exector_context[executor['name']]['model']
program = model._build_param['model']['train_program'] program = model._build_param['model']['train_program']
if not executor['is_update_sparse']: if not executor['is_update_sparse']:
program._fleet_opt["program_configs"][str(id(model.get_cost_op().block.program))]["push_sparse"] = [] program._fleet_opt["program_configs"][str(id(model.get_avg_cost().block.program))]["push_sparse"] = []
if 'train_thread_num' not in executor: if 'train_thread_num' not in executor:
executor['train_thread_num'] = self.global_config['train_thread_num'] executor['train_thread_num'] = self.global_config['train_thread_num']
with fluid.scope_guard(scope): with fluid.scope_guard(scope):
......
...@@ -18,9 +18,9 @@ Training use fluid with one node only. ...@@ -18,9 +18,9 @@ Training use fluid with one node only.
from __future__ import print_function from __future__ import print_function
import datetime
import os import os
import time import time
import datetime
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
...@@ -31,7 +31,7 @@ from paddlerec.core.utils import envs ...@@ -31,7 +31,7 @@ from paddlerec.core.utils import envs
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
class ClusterTrainer(TranspileTrainer): class OnlineLearningTrainer(TranspileTrainer):
def processor_register(self): def processor_register(self):
role = PaddleCloudRoleMaker() role = PaddleCloudRoleMaker()
fleet.init(role) fleet.init(role)
...@@ -78,7 +78,7 @@ class ClusterTrainer(TranspileTrainer): ...@@ -78,7 +78,7 @@ class ClusterTrainer(TranspileTrainer):
optimizer = self.model.optimizer() optimizer = self.model.optimizer()
strategy = self.build_strategy() strategy = self.build_strategy()
optimizer = fleet.distributed_optimizer(optimizer, strategy) optimizer = fleet.distributed_optimizer(optimizer, strategy)
optimizer.minimize(self.model.get_cost_op()) optimizer.minimize(self.model.get_avg_cost())
if fleet.is_server(): if fleet.is_server():
context['status'] = 'server_pass' context['status'] = 'server_pass'
......
...@@ -17,14 +17,14 @@ Training use fluid with one node only. ...@@ -17,14 +17,14 @@ Training use fluid with one node only.
""" """
from __future__ import print_function from __future__ import print_function
import logging
import time import time
import logging
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
import numpy as np
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("fluid") logger = logging.getLogger("fluid")
...@@ -36,7 +36,8 @@ class SingleTrainer(TranspileTrainer): ...@@ -36,7 +36,8 @@ class SingleTrainer(TranspileTrainer):
self.regist_context_processor('uninit', self.instance) self.regist_context_processor('uninit', self.instance)
self.regist_context_processor('init_pass', self.init) self.regist_context_processor('init_pass', self.init)
self.regist_context_processor('startup_pass', self.startup) self.regist_context_processor('startup_pass', self.startup)
if envs.get_platform() == "LINUX" and envs.get_global_env("dataset_class", None, "train.reader") != "DataLoader": if envs.get_platform() == "LINUX" and envs.get_global_env("dataset_class", None,
"train.reader") != "DataLoader":
self.regist_context_processor('train_pass', self.dataset_train) self.regist_context_processor('train_pass', self.dataset_train)
else: else:
self.regist_context_processor('train_pass', self.dataloader_train) self.regist_context_processor('train_pass', self.dataloader_train)
...@@ -47,7 +48,7 @@ class SingleTrainer(TranspileTrainer): ...@@ -47,7 +48,7 @@ class SingleTrainer(TranspileTrainer):
def init(self, context): def init(self, context):
self.model.train_net() self.model.train_net()
optimizer = self.model.optimizer() optimizer = self.model.optimizer()
optimizer.minimize((self.model.get_cost_op())) optimizer.minimize((self.model.get_avg_cost()))
self.fetch_vars = [] self.fetch_vars = []
self.fetch_alias = [] self.fetch_alias = []
...@@ -74,7 +75,7 @@ class SingleTrainer(TranspileTrainer): ...@@ -74,7 +75,7 @@ class SingleTrainer(TranspileTrainer):
program = fluid.compiler.CompiledProgram( program = fluid.compiler.CompiledProgram(
fluid.default_main_program()).with_data_parallel( fluid.default_main_program()).with_data_parallel(
loss_name=self.model.get_cost_op().name) loss_name=self.model.get_avg_cost().name)
metrics_varnames = [] metrics_varnames = []
metrics_format = [] metrics_format = []
...@@ -122,8 +123,8 @@ class SingleTrainer(TranspileTrainer): ...@@ -122,8 +123,8 @@ class SingleTrainer(TranspileTrainer):
fetch_info=self.fetch_alias, fetch_info=self.fetch_alias,
print_period=self.fetch_period) print_period=self.fetch_period)
end_time = time.time() end_time = time.time()
times = end_time-begin_time times = end_time - begin_time
print("epoch {} using time {}, speed {:.2f} lines/s".format(i, times, ins/times)) print("epoch {} using time {}, speed {:.2f} lines/s".format(i, times, ins / times))
self.save(i, "train", is_fleet=False) self.save(i, "train", is_fleet=False)
context['status'] = 'infer_pass' context['status'] = 'infer_pass'
......
...@@ -17,17 +17,16 @@ Training use fluid with one node only. ...@@ -17,17 +17,16 @@ Training use fluid with one node only.
""" """
from __future__ import print_function from __future__ import print_function
import logging import logging
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory
from paddle.fluid.incubate.fleet.base.role_maker import PaddleCloudRoleMaker
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.trainers.cluster_trainer import ClusterTrainer from paddlerec.core.trainers.cluster_trainer import ClusterTrainer
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("fluid") logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO) logger.setLevel(logging.INFO)
......
...@@ -18,12 +18,11 @@ Training use fluid with one node only. ...@@ -18,12 +18,11 @@ Training use fluid with one node only.
from __future__ import print_function from __future__ import print_function
import logging import logging
import paddle.fluid as fluid
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer import numpy as np
import paddle.fluid as fluid
from paddlerec.core.trainers.single_trainer import SingleTrainer from paddlerec.core.trainers.single_trainer import SingleTrainer
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
import numpy as np
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s") logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("fluid") logger = logging.getLogger("fluid")
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
from __future__ import print_function from __future__ import print_function
import os import os
import sys
from paddlerec.core.utils.envs import lazy_instance_by_fliename from paddlerec.core.utils.envs import lazy_instance_by_fliename
from paddlerec.core.utils.envs import get_global_env from paddlerec.core.utils.envs import get_global_env
......
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
# limitations under the License. # limitations under the License.
import abc import abc
import time
import datetime import datetime
import time
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -22,7 +22,7 @@ from paddlerec.core.utils import fs as fs ...@@ -22,7 +22,7 @@ from paddlerec.core.utils import fs as fs
from paddlerec.core.utils import util as util from paddlerec.core.utils import util as util
class Dataset(object): class DatasetHolder(object):
""" """
Dataset Base Dataset Base
""" """
...@@ -62,7 +62,7 @@ class Dataset(object): ...@@ -62,7 +62,7 @@ class Dataset(object):
pass pass
class TimeSplitDataset(Dataset): class TimeSplitDatasetHolder(DatasetHolder):
""" """
Dataset with time split dir. root_path/$DAY/$HOUR Dataset with time split dir. root_path/$DAY/$HOUR
""" """
...@@ -142,16 +142,6 @@ class TimeSplitDataset(Dataset): ...@@ -142,16 +142,6 @@ class TimeSplitDataset(Dataset):
data_time = data_time + datetime.timedelta(minutes=self._split_interval) data_time = data_time + datetime.timedelta(minutes=self._split_interval)
return data_file_list return data_file_list
class FluidTimeSplitDataset(TimeSplitDataset):
"""
A Dataset with time split for PaddleFluid
"""
def __init__(self, config):
""" """
TimeSplitDataset.__init__(self, config)
def _alloc_dataset(self, file_list): def _alloc_dataset(self, file_list):
""" """ """ """
dataset = fluid.DatasetFactory().create_dataset(self._config['dataset_type']) dataset = fluid.DatasetFactory().create_dataset(self._config['dataset_type'])
......
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import sys import sys
......
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import os from contextlib import closing
import copy import copy
import sys import os
import socket import socket
from contextlib import closing import sys
global_envs = {} global_envs = {}
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import os import os
from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient
...@@ -94,11 +95,12 @@ class FileHandler(object): ...@@ -94,11 +95,12 @@ class FileHandler(object):
""" """
A Smart file handler. auto judge local/afs by path A Smart file handler. auto judge local/afs by path
""" """
def __init__(self, config): def __init__(self, config):
"""R """R
""" """
if 'fs_name' in config: if 'fs_name' in config:
hadoop_home="$HADOOP_HOME" hadoop_home = "$HADOOP_HOME"
hdfs_configs = { hdfs_configs = {
"hadoop.job.ugi": config['fs_ugi'], "hadoop.job.ugi": config['fs_ugi'],
"fs.default.name": config['fs_name'] "fs.default.name": config['fs_name']
...@@ -131,7 +133,8 @@ class FileHandler(object): ...@@ -131,7 +133,8 @@ class FileHandler(object):
if mode.find('a') >= 0: if mode.find('a') >= 0:
org_content = self._hdfs_client.cat(dest_path) org_content = self._hdfs_client.cat(dest_path)
content = content + org_content content = content + org_content
self._local_fs_client.write(content, temp_local_file, mode) #fleet hdfs_client only support upload, so write tmp file self._local_fs_client.write(content, temp_local_file,
mode) # fleet hdfs_client only support upload, so write tmp file
self._hdfs_client.delete(dest_path + ".tmp") self._hdfs_client.delete(dest_path + ".tmp")
self._hdfs_client.upload(dest_path + ".tmp", temp_local_file) self._hdfs_client.upload(dest_path + ".tmp", temp_local_file)
self._hdfs_client.delete(dest_path + ".bak") self._hdfs_client.delete(dest_path + ".bak")
......
...@@ -12,9 +12,6 @@ ...@@ -12,9 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import copy
import yaml
class TableMeta(object): class TableMeta(object):
""" """
......
...@@ -12,11 +12,12 @@ ...@@ -12,11 +12,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import datetime
import os import os
import time import time
import datetime
from paddle import fluid from paddle import fluid
from paddlerec.core.utils import fs as fs from paddlerec.core.utils import fs as fs
......
...@@ -153,7 +153,7 @@ class Model(object): ...@@ -153,7 +153,7 @@ class Model(object):
def infer_net(self): def infer_net(self):
pass pass
def get_cost_op(self): def get_avg_cost(self):
return self._cost return self._cost
``` ```
......
...@@ -13,15 +13,9 @@ ...@@ -13,15 +13,9 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
import paddle.fluid as fluid
import paddle.fluid.layers.nn as nn
import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.control_flow as cf
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
...@@ -65,7 +59,7 @@ class Model(ModelBase): ...@@ -65,7 +59,7 @@ class Model(ModelBase):
self.cost = avg_cost self.cost = avg_cost
self._metrics["acc"] = acc self._metrics["acc"] = acc
def get_cost_op(self): def get_avg_cost(self):
return self.cost return self.cost
def get_metrics(self): def get_metrics(self):
......
...@@ -12,20 +12,11 @@ ...@@ -12,20 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import re
import sys import sys
import collections
import os
import six
import time
import numpy as np
import paddle.fluid as fluid
import paddle
import csv
import io
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
...@@ -47,6 +38,7 @@ class TrainReader(Reader): ...@@ -47,6 +38,7 @@ class TrainReader(Reader):
data = [int(i) for i in data] data = [int(i) for i in data]
label = [int(i) for i in label] label = [int(i) for i in label]
seq_len = [int(i) for i in seq_len] seq_len = [int(i) for i in seq_len]
print >>sys.stderr, str([('data', data), ('label', label), ('seq_len', seq_len)]) print >> sys.stderr, str([('data', data), ('label', label), ('seq_len', seq_len)])
yield [('data', data), ('label', label), ('seq_len', seq_len)] yield [('data', data), ('label', label), ('seq_len', seq_len)]
return data_iter return data_iter
...@@ -12,30 +12,27 @@ ...@@ -12,30 +12,27 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers.nn as nn import paddle.fluid.layers.nn as nn
import paddle.fluid.layers.tensor as tensor import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.control_flow as cf import paddle.fluid.layers.control_flow as cf
from paddlerec.core.model import Model as ModelBase
from paddlerec.core.utils import envs
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
self.cost = None self.cost = None
self.metrics = {} self.metrics = {}
self.vocab_text_size = 11447#envs.get_global_env("vocab_text_size", None, self._namespace) self.vocab_text_size = envs.get_global_env("vocab_text_size", None, self._namespace)
self.vocab_tag_size = 4#envs.get_global_env("vocab_tag_size", None, self._namespace) self.vocab_tag_size = envs.get_global_env("vocab_tag_size", None, self._namespace)
self.emb_dim = 10#envs.get_global_env("emb_dim", None, self._namespace) self.emb_dim = envs.get_global_env("emb_dim", None, self._namespace)
self.hid_dim = 1000#envs.get_global_env("hid_dim", None, self._namespace) self.hid_dim = envs.get_global_env("hid_dim", None, self._namespace)
self.win_size = 5#envs.get_global_env("win_size", None, self._namespace) self.win_size = envs.get_global_env("win_size", None, self._namespace)
self.margin = 0.1#envs.get_global_env("margin", None, self._namespace) self.margin = envs.get_global_env("margin", None, self._namespace)
self.neg_size = 3#envs.get_global_env("neg_size", None, self._namespace) self.neg_size = envs.get_global_env("neg_size", None, self._namespace)
print self.emb_dim
def train_net(self): def train_net(self):
""" network definition """ """ network definition """
...@@ -92,18 +89,16 @@ class Model(ModelBase): ...@@ -92,18 +89,16 @@ class Model(ModelBase):
self.metrics["correct"] = correct self.metrics["correct"] = correct
self.metrics["cos_pos"] = cos_pos self.metrics["cos_pos"] = cos_pos
def get_cost_op(self): def get_avg_cost(self):
return self.cost return self.cost
def get_metrics(self): def get_metrics(self):
return self.metrics return self.metrics
def optimizer(self): def optimizer(self):
learning_rate = 0.01#envs.get_global_env("hyper_parameters.base_lr", None, self._namespace) learning_rate = envs.get_global_env("hyper_parameters.base_lr", None, self._namespace)
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=learning_rate) sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=learning_rate)
#sgd_optimizer.minimize(avg_cost)
return sgd_optimizer return sgd_optimizer
def infer_net(self, parameter_list): def infer_net(self, parameter_list):
self.train_net() self.train_net()
...@@ -12,20 +12,13 @@ ...@@ -12,20 +12,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import re
import sys import sys
import collections
import os
import six
import time
import numpy as np import numpy as np
import paddle.fluid as fluid
import paddle
import csv
import io
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
...@@ -54,9 +47,6 @@ class TrainReader(Reader): ...@@ -54,9 +47,6 @@ class TrainReader(Reader):
neg_index = rand_i neg_index = rand_i
neg_tag.append(neg_index) neg_tag.append(neg_index)
sum_n += 1 sum_n += 1
# if n > 0 and len(text) > n:
# #yield None
# return None, None, None
return text, pos_tag, neg_tag return text, pos_tag, neg_tag
def generate_sample(self, line): def generate_sample(self, line):
...@@ -66,4 +56,5 @@ class TrainReader(Reader): ...@@ -66,4 +56,5 @@ class TrainReader(Reader):
yield None yield None
return return
yield [('text', text), ('pos_tag', pos_tag), ('neg_tag', neg_tag)] yield [('text', text), ('pos_tag', pos_tag), ('neg_tag', neg_tag)]
return data_iter return data_iter
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -29,7 +28,8 @@ class Model(ModelBase): ...@@ -29,7 +28,8 @@ class Model(ModelBase):
self.query = fluid.data(name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0) self.query = fluid.data(name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_pos = fluid.data(name="doc_pos", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0) self.doc_pos = fluid.data(name="doc_pos", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_negs = [fluid.data(name="doc_neg_" + str(i), shape=[-1, TRIGRAM_D], dtype="float32", lod_level=0) for i in range(Neg)] self.doc_negs = [fluid.data(name="doc_neg_" + str(i), shape=[-1, TRIGRAM_D], dtype="float32", lod_level=0) for i
in range(Neg)]
self._data_var.append(self.query) self._data_var.append(self.query)
self._data_var.append(self.doc_pos) self._data_var.append(self.doc_pos)
for input in self.doc_negs: for input in self.doc_negs:
...@@ -39,7 +39,6 @@ class Model(ModelBase): ...@@ -39,7 +39,6 @@ class Model(ModelBase):
self._data_loader = fluid.io.DataLoader.from_generator( self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False) feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False)
def net(self, is_infer=False): def net(self, is_infer=False):
hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace) hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace)
hidden_acts = envs.get_global_env("hyper_parameters.fc_acts", None, self._namespace) hidden_acts = envs.get_global_env("hyper_parameters.fc_acts", None, self._namespace)
...@@ -47,7 +46,7 @@ class Model(ModelBase): ...@@ -47,7 +46,7 @@ class Model(ModelBase):
def fc(data, hidden_layers, hidden_acts, names): def fc(data, hidden_layers, hidden_acts, names):
fc_inputs = [data] fc_inputs = [data]
for i in range(len(hidden_layers)): for i in range(len(hidden_layers)):
xavier=fluid.initializer.Xavier(uniform=True, fan_in=fc_inputs[-1].shape[1], fan_out=hidden_layers[i]) xavier = fluid.initializer.Xavier(uniform=True, fan_in=fc_inputs[-1].shape[1], fan_out=hidden_layers[i])
out = fluid.layers.fc(input=fc_inputs[-1], out = fluid.layers.fc(input=fc_inputs[-1],
size=hidden_layers[i], size=hidden_layers[i],
act=hidden_acts[i], act=hidden_acts[i],
...@@ -66,12 +65,13 @@ class Model(ModelBase): ...@@ -66,12 +65,13 @@ class Model(ModelBase):
R_Q_D_ns = [] R_Q_D_ns = []
for i, doc_neg in enumerate(self.doc_negs): for i, doc_neg in enumerate(self.doc_negs):
doc_neg_fc_i = fc(doc_neg, hidden_layers, hidden_acts, ['doc_neg_l1_' + str(i), 'doc_neg_l2_' + str(i), 'doc_neg_l3_' + str(i)]) doc_neg_fc_i = fc(doc_neg, hidden_layers, hidden_acts,
['doc_neg_l1_' + str(i), 'doc_neg_l2_' + str(i), 'doc_neg_l3_' + str(i)])
R_Q_D_ns.append(fluid.layers.cos_sim(query_fc, doc_neg_fc_i)) R_Q_D_ns.append(fluid.layers.cos_sim(query_fc, doc_neg_fc_i))
concat_Rs = fluid.layers.concat(input=[self.R_Q_D_p] + R_Q_D_ns, axis=-1) concat_Rs = fluid.layers.concat(input=[self.R_Q_D_p] + R_Q_D_ns, axis=-1)
prob = fluid.layers.softmax(concat_Rs, axis=1) prob = fluid.layers.softmax(concat_Rs, axis=1)
hit_prob = fluid.layers.slice(prob, axes=[0,1], starts=[0,0], ends=[4, 1]) hit_prob = fluid.layers.slice(prob, axes=[0, 1], starts=[0, 0], ends=[4, 1])
loss = -fluid.layers.reduce_sum(fluid.layers.log(hit_prob)) loss = -fluid.layers.reduce_sum(fluid.layers.log(hit_prob))
self.avg_cost = fluid.layers.mean(x=loss) self.avg_cost = fluid.layers.mean(x=loss)
......
...@@ -14,7 +14,6 @@ ...@@ -14,7 +14,6 @@
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class EvaluateReader(Reader): class EvaluateReader(Reader):
......
...@@ -11,10 +11,10 @@ ...@@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
...@@ -37,7 +37,7 @@ class TrainReader(Reader): ...@@ -37,7 +37,7 @@ class TrainReader(Reader):
neg_docs = [] neg_docs = []
for i in range(len(features) - 2): for i in range(len(features) - 2):
feature_names.append('doc_neg_' + str(i)) feature_names.append('doc_neg_' + str(i))
neg_docs.append(map(float, features[i+2].split(','))) neg_docs.append(map(float, features[i + 2].split(',')))
yield zip(feature_names, [query] + [pos_doc] + neg_docs) yield zip(feature_names, [query] + [pos_doc] + neg_docs)
......
#! /bin/bash #! /bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e set -e
echo "begin to prepare data" echo "begin to prepare data"
......
...@@ -11,10 +11,7 @@ ...@@ -11,10 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import io
import copy
import random
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -54,4 +51,5 @@ class EvaluateReader(Reader): ...@@ -54,4 +51,5 @@ class EvaluateReader(Reader):
else: else:
output[index][1].append(padding) output[index][1].append(padding)
yield output yield output
return data_iter return data_iter
...@@ -14,10 +14,12 @@ ...@@ -14,10 +14,12 @@
import random import random
class Dataset: class Dataset:
def __init__(self): def __init__(self):
pass pass
class SyntheticDataset(Dataset): class SyntheticDataset(Dataset):
def __init__(self, sparse_feature_dim, query_slot_num, title_slot_num, dataset_size=10000): def __init__(self, sparse_feature_dim, query_slot_num, title_slot_num, dataset_size=10000):
# ids are randomly generated # ids are randomly generated
...@@ -50,7 +52,8 @@ class SyntheticDataset(Dataset): ...@@ -50,7 +52,8 @@ class SyntheticDataset(Dataset):
for i in range(self.title_slot_num): for i in range(self.title_slot_num):
nt_slot = generate_ids(self.ids_per_slot, nt_slot = generate_ids(self.ids_per_slot,
self.sparse_feature_dim) self.sparse_feature_dim)
nt_slot = [str(fea) + ':' + str(i + self.query_slot_num + self.title_slot_num) for fea in nt_slot] nt_slot = [str(fea) + ':' + str(i + self.query_slot_num + self.title_slot_num) for fea in
nt_slot]
neg_title_slots += nt_slot neg_title_slots += nt_slot
yield query_slots + pos_title_slots + neg_title_slots yield query_slots + pos_title_slots + neg_title_slots
else: else:
...@@ -67,6 +70,7 @@ class SyntheticDataset(Dataset): ...@@ -67,6 +70,7 @@ class SyntheticDataset(Dataset):
def test(self): def test(self):
return self._reader_creator(False) return self._reader_creator(False)
if __name__ == '__main__': if __name__ == '__main__':
sparse_feature_dim = 1000001 sparse_feature_dim = 1000001
query_slots = 1 query_slots = 1
......
...@@ -12,16 +12,14 @@ ...@@ -12,16 +12,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import math
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle.fluid.layers.tensor as tensor import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.control_flow as cf import paddle.fluid.layers.control_flow as cf
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
class BowEncoder(object): class BowEncoder(object):
""" bow-encoder """ """ bow-encoder """
...@@ -97,6 +95,7 @@ class SimpleEncoderFactory(object): ...@@ -97,6 +95,7 @@ class SimpleEncoderFactory(object):
rnn_encode = GrnnEncoder(hidden_size=enc_hid_size) rnn_encode = GrnnEncoder(hidden_size=enc_hid_size)
return rnn_encode return rnn_encode
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
...@@ -143,7 +142,8 @@ class Model(ModelBase): ...@@ -143,7 +142,8 @@ class Model(ModelBase):
self.nt_slots = [ self.nt_slots = [
fluid.data( fluid.data(
name="%d" % (i + len(self.query_encoders) + len(self.title_encoders)), shape=[None, 1], lod_level=1, dtype='int64') name="%d" % (i + len(self.query_encoders) + len(self.title_encoders)), shape=[None, 1], lod_level=1,
dtype='int64')
for i in range(len(self.title_encoders)) for i in range(len(self.title_encoders))
] ]
......
...@@ -11,10 +11,7 @@ ...@@ -11,10 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import io
import copy
import random
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -57,4 +54,5 @@ class TrainReader(Reader): ...@@ -57,4 +54,5 @@ class TrainReader(Reader):
else: else:
output[index][1].append(padding) output[index][1].append(padding)
yield output yield output
return data_iter return data_iter
...@@ -13,19 +13,19 @@ ...@@ -13,19 +13,19 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
from collections import defaultdict from collections import defaultdict
import numpy as np
from paddlerec.core.reader import Reader
class EvaluateReader(Reader): class EvaluateReader(Reader):
def init(self): def init(self):
all_field_id = ['101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124', '125', '126', '127', '128', '129', all_field_id = ['101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124', '125', '126', '127', '128',
'129',
'205', '206', '207', '210', '216', '508', '509', '702', '853', '301'] '205', '206', '207', '210', '216', '508', '509', '702', '853', '301']
self.all_field_id_dict = defaultdict(int) self.all_field_id_dict = defaultdict(int)
for i,field_id in enumerate(all_field_id): for i, field_id in enumerate(all_field_id):
self.all_field_id_dict[field_id] = [False,i] self.all_field_id_dict[field_id] = [False, i]
def generate_sample(self, line): def generate_sample(self, line):
""" """
...@@ -41,10 +41,10 @@ class EvaluateReader(Reader): ...@@ -41,10 +41,10 @@ class EvaluateReader(Reader):
cvr = int(features[2]) cvr = int(features[2])
padding = 0 padding = 0
output = [(field_id,[]) for field_id in self.all_field_id_dict] output = [(field_id, []) for field_id in self.all_field_id_dict]
for elem in features[4:]: for elem in features[4:]:
field_id,feat_id = elem.strip().split(':') field_id, feat_id = elem.strip().split(':')
if field_id not in self.all_field_id_dict: if field_id not in self.all_field_id_dict:
continue continue
self.all_field_id_dict[field_id][0] = True self.all_field_id_dict[field_id][0] = True
...@@ -52,7 +52,7 @@ class EvaluateReader(Reader): ...@@ -52,7 +52,7 @@ class EvaluateReader(Reader):
output[index][1].append(int(feat_id)) output[index][1].append(int(feat_id))
for field_id in self.all_field_id_dict: for field_id in self.all_field_id_dict:
visited,index = self.all_field_id_dict[field_id] visited, index = self.all_field_id_dict[field_id]
if visited: if visited:
self.all_field_id_dict[field_id][0] = False self.all_field_id_dict[field_id][0] = False
else: else:
...@@ -60,4 +60,5 @@ class EvaluateReader(Reader): ...@@ -60,4 +60,5 @@ class EvaluateReader(Reader):
output.append(('ctr', [ctr])) output.append(('ctr', [ctr]))
output.append(('cvr', [cvr])) output.append(('cvr', [cvr]))
yield output yield output
return reader return reader
...@@ -11,21 +11,22 @@ ...@@ -11,21 +11,22 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
from collections import defaultdict from collections import defaultdict
import numpy as np
from paddlerec.core.reader import Reader
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
all_field_id = ['101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124', '125', '126', '127', '128', '129', all_field_id = ['101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124', '125', '126', '127', '128',
'129',
'205', '206', '207', '210', '216', '508', '509', '702', '853', '301'] '205', '206', '207', '210', '216', '508', '509', '702', '853', '301']
self.all_field_id_dict = defaultdict(int) self.all_field_id_dict = defaultdict(int)
for i,field_id in enumerate(all_field_id): for i, field_id in enumerate(all_field_id):
self.all_field_id_dict[field_id] = [False,i] self.all_field_id_dict[field_id] = [False, i]
def generate_sample(self, line): def generate_sample(self, line):
""" """
...@@ -37,25 +38,25 @@ class TrainReader(Reader): ...@@ -37,25 +38,25 @@ class TrainReader(Reader):
This function needs to be implemented by the user, based on data format This function needs to be implemented by the user, based on data format
""" """
features = line.strip().split(',') features = line.strip().split(',')
#ctr = list(map(int, features[1])) # ctr = list(map(int, features[1]))
#cvr = list(map(int, features[2])) # cvr = list(map(int, features[2]))
ctr = int(features[1]) ctr = int(features[1])
cvr = int(features[2]) cvr = int(features[2])
padding = 0 padding = 0
output = [(field_id,[]) for field_id in self.all_field_id_dict] output = [(field_id, []) for field_id in self.all_field_id_dict]
for elem in features[4:]: for elem in features[4:]:
field_id,feat_id = elem.strip().split(':') field_id, feat_id = elem.strip().split(':')
if field_id not in self.all_field_id_dict: if field_id not in self.all_field_id_dict:
continue continue
self.all_field_id_dict[field_id][0] = True self.all_field_id_dict[field_id][0] = True
index = self.all_field_id_dict[field_id][1] index = self.all_field_id_dict[field_id][1]
#feat_id = list(map(int, feat_id)) # feat_id = list(map(int, feat_id))
output[index][1].append(int(feat_id)) output[index][1].append(int(feat_id))
for field_id in self.all_field_id_dict: for field_id in self.all_field_id_dict:
visited,index = self.all_field_id_dict[field_id] visited, index = self.all_field_id_dict[field_id]
if visited: if visited:
self.all_field_id_dict[field_id][0] = False self.all_field_id_dict[field_id][0] = False
else: else:
...@@ -63,4 +64,5 @@ class TrainReader(Reader): ...@@ -63,4 +64,5 @@ class TrainReader(Reader):
output.append(('ctr', [ctr])) output.append(('ctr', [ctr]))
output.append(('cvr', [cvr])) output.append(('cvr', [cvr]))
yield output yield output
return reader return reader
...@@ -12,25 +12,25 @@ ...@@ -12,25 +12,25 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
import numpy as np
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
def fc(self,tag, data, out_dim, active='prelu'): def fc(self, tag, data, out_dim, active='prelu'):
init_stddev = 1.0 init_stddev = 1.0
scales = 1.0 / np.sqrt(data.shape[1]) scales = 1.0 / np.sqrt(data.shape[1])
p_attr = fluid.param_attr.ParamAttr(name='%s_weight' % tag, p_attr = fluid.param_attr.ParamAttr(name='%s_weight' % tag,
initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=init_stddev * scales)) initializer=fluid.initializer.NormalInitializer(loc=0.0,
scale=init_stddev * scales))
b_attr = fluid.ParamAttr(name='%s_bias' % tag, initializer=fluid.initializer.Constant(0.1)) b_attr = fluid.ParamAttr(name='%s_bias' % tag, initializer=fluid.initializer.Constant(0.1))
...@@ -38,13 +38,13 @@ class Model(ModelBase): ...@@ -38,13 +38,13 @@ class Model(ModelBase):
size=out_dim, size=out_dim,
act=active, act=active,
param_attr=p_attr, param_attr=p_attr,
bias_attr =b_attr, bias_attr=b_attr,
name=tag) name=tag)
return out return out
def input_data(self): def input_data(self):
sparse_input_ids = [ sparse_input_ids = [
fluid.data(name="field_" + str(i), shape=[-1, 1], dtype="int64", lod_level=1) for i in range(0,23) fluid.data(name="field_" + str(i), shape=[-1, 1], dtype="int64", lod_level=1) for i in range(0, 23)
] ]
label_ctr = fluid.data(name="ctr", shape=[-1, 1], dtype="int64") label_ctr = fluid.data(name="ctr", shape=[-1, 1], dtype="int64")
label_cvr = fluid.data(name="cvr", shape=[-1, 1], dtype="int64") label_cvr = fluid.data(name="cvr", shape=[-1, 1], dtype="int64")
...@@ -63,10 +63,11 @@ class Model(ModelBase): ...@@ -63,10 +63,11 @@ class Model(ModelBase):
size=[vocab_size, embed_size], size=[vocab_size, embed_size],
param_attr=fluid.ParamAttr(name='dis_emb', param_attr=fluid.ParamAttr(name='dis_emb',
learning_rate=5, learning_rate=5,
initializer=fluid.initializer.Xavier(fan_in=embed_size,fan_out=embed_size) initializer=fluid.initializer.Xavier(
fan_in=embed_size, fan_out=embed_size)
), ),
is_sparse=True) is_sparse=True)
field_emb = fluid.layers.sequence_pool(input=feat_emb,pool_type='sum') field_emb = fluid.layers.sequence_pool(input=feat_emb, pool_type='sum')
emb.append(field_emb) emb.append(field_emb)
concat_emb = fluid.layers.concat(emb, axis=1) concat_emb = fluid.layers.concat(emb, axis=1)
...@@ -79,7 +80,7 @@ class Model(ModelBase): ...@@ -79,7 +80,7 @@ class Model(ModelBase):
# cvr # cvr
cvr_fc1 = self.fc('cvr_fc1', concat_emb, 200, active) cvr_fc1 = self.fc('cvr_fc1', concat_emb, 200, active)
cvr_fc2 = self.fc('cvr_fc2', cvr_fc1, 80, active) cvr_fc2 = self.fc('cvr_fc2', cvr_fc1, 80, active)
cvr_out = self.fc('cvr_out', cvr_fc2, 2,'softmax') cvr_out = self.fc('cvr_out', cvr_fc2, 2, 'softmax')
ctr_clk = inputs[-2] ctr_clk = inputs[-2]
ctcvr_buy = inputs[-1] ctcvr_buy = inputs[-1]
...@@ -88,7 +89,7 @@ class Model(ModelBase): ...@@ -88,7 +89,7 @@ class Model(ModelBase):
cvr_prop_one = fluid.layers.slice(cvr_out, axes=[1], starts=[1], ends=[2]) cvr_prop_one = fluid.layers.slice(cvr_out, axes=[1], starts=[1], ends=[2])
ctcvr_prop_one = fluid.layers.elementwise_mul(ctr_prop_one, cvr_prop_one) ctcvr_prop_one = fluid.layers.elementwise_mul(ctr_prop_one, cvr_prop_one)
ctcvr_prop = fluid.layers.concat(input=[1-ctcvr_prop_one,ctcvr_prop_one], axis = 1) ctcvr_prop = fluid.layers.concat(input=[1 - ctcvr_prop_one, ctcvr_prop_one], axis=1)
auc_ctr, batch_auc_ctr, auc_states_ctr = fluid.layers.auc(input=ctr_out, label=ctr_clk) auc_ctr, batch_auc_ctr, auc_states_ctr = fluid.layers.auc(input=ctr_out, label=ctr_clk)
auc_ctcvr, batch_auc_ctcvr, auc_states_ctcvr = fluid.layers.auc(input=ctcvr_prop, label=ctcvr_buy) auc_ctcvr, batch_auc_ctcvr, auc_states_ctcvr = fluid.layers.auc(input=ctcvr_prop, label=ctcvr_buy)
...@@ -98,25 +99,21 @@ class Model(ModelBase): ...@@ -98,25 +99,21 @@ class Model(ModelBase):
self._infer_results["AUC_ctcvr"] = auc_ctcvr self._infer_results["AUC_ctcvr"] = auc_ctcvr
return return
loss_ctr = fluid.layers.cross_entropy(input=ctr_out, label=ctr_clk) loss_ctr = fluid.layers.cross_entropy(input=ctr_out, label=ctr_clk)
loss_ctcvr = fluid.layers.cross_entropy(input=ctcvr_prop, label=ctcvr_buy) loss_ctcvr = fluid.layers.cross_entropy(input=ctcvr_prop, label=ctcvr_buy)
cost = loss_ctr + loss_ctcvr cost = loss_ctr + loss_ctcvr
avg_cost = fluid.layers.mean(cost) avg_cost = fluid.layers.mean(cost)
self._cost = avg_cost self._cost = avg_cost
self._metrics["AUC_ctr"] = auc_ctr self._metrics["AUC_ctr"] = auc_ctr
self._metrics["BATCH_AUC_ctr"] = batch_auc_ctr self._metrics["BATCH_AUC_ctr"] = batch_auc_ctr
self._metrics["AUC_ctcvr"] = auc_ctcvr self._metrics["AUC_ctcvr"] = auc_ctcvr
self._metrics["BATCH_AUC_ctcvr"] = batch_auc_ctcvr self._metrics["BATCH_AUC_ctcvr"] = batch_auc_ctcvr
def train_net(self): def train_net(self):
input_data = self.input_data() input_data = self.input_data()
self.net(input_data) self.net(input_data)
def infer_net(self): def infer_net(self):
self._infer_data_var = self.input_data() self._infer_data_var = self.input_data()
self._infer_data_loader = fluid.io.DataLoader.from_generator( self._infer_data_loader = fluid.io.DataLoader.from_generator(
......
...@@ -11,11 +11,10 @@ ...@@ -11,11 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
class EvaluateReader(Reader): class EvaluateReader(Reader):
......
...@@ -11,11 +11,10 @@ ...@@ -11,11 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
class TrainReader(Reader): class TrainReader(Reader):
...@@ -44,8 +43,8 @@ class TrainReader(Reader): ...@@ -44,8 +43,8 @@ class TrainReader(Reader):
label_marital = [1, 0] label_marital = [1, 0]
elif int(l[0]) == 1: elif int(l[0]) == 1:
label_marital = [0, 1] label_marital = [0, 1]
#label_income = np.array(label_income) # label_income = np.array(label_income)
#label_marital = np.array(label_marital) # label_marital = np.array(label_marital)
feature_name = ["input", "label_income", "label_marital"] feature_name = ["input", "label_income", "label_marital"]
yield zip(feature_name, [data] + [label_income] + [label_marital]) yield zip(feature_name, [data] + [label_income] + [label_marital])
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -50,8 +49,7 @@ class Model(ModelBase): ...@@ -50,8 +49,7 @@ class Model(ModelBase):
name='expert_' + str(i)) name='expert_' + str(i))
expert_outputs.append(expert_output) expert_outputs.append(expert_output)
expert_concat = fluid.layers.concat(expert_outputs, axis=1) expert_concat = fluid.layers.concat(expert_outputs, axis=1)
expert_concat = fluid.layers.reshape(expert_concat,[-1, expert_num, expert_size]) expert_concat = fluid.layers.reshape(expert_concat, [-1, expert_num, expert_size])
# g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper # g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper
output_layers = [] output_layers = []
...@@ -79,19 +77,22 @@ class Model(ModelBase): ...@@ -79,19 +77,22 @@ class Model(ModelBase):
pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15) pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15)
pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15) pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15)
label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2]) label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2])
label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2]) label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2])
auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income, label=fluid.layers.cast(x=label_income_1, dtype='int64')) auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income,
auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital, label=fluid.layers.cast(x=label_marital_1, dtype='int64')) label=fluid.layers.cast(x=label_income_1,
dtype='int64'))
auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital,
label=fluid.layers.cast(x=label_marital_1,
dtype='int64'))
if is_infer: if is_infer:
self._infer_results["AUC_income"] = auc_income self._infer_results["AUC_income"] = auc_income
self._infer_results["AUC_marital"] = auc_marital self._infer_results["AUC_marital"] = auc_marital
return return
cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True) cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income, soft_label=True)
cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True) cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital, soft_label=True)
avg_cost_income = fluid.layers.mean(x=cost_income) avg_cost_income = fluid.layers.mean(x=cost_income)
avg_cost_marital = fluid.layers.mean(x=cost_marital) avg_cost_marital = fluid.layers.mean(x=cost_marital)
...@@ -104,10 +105,8 @@ class Model(ModelBase): ...@@ -104,10 +105,8 @@ class Model(ModelBase):
self._metrics["AUC_marital"] = auc_marital self._metrics["AUC_marital"] = auc_marital
self._metrics["BATCH_AUC_marital"] = batch_auc_2 self._metrics["BATCH_AUC_marital"] = batch_auc_2
def train_net(self): def train_net(self):
self.MMOE() self.MMOE()
def infer_net(self): def infer_net(self):
self.MMOE(is_infer=True) self.MMOE(is_infer=True)
...@@ -11,11 +11,10 @@ ...@@ -11,11 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
class EvaluateReader(Reader): class EvaluateReader(Reader):
......
...@@ -11,11 +11,10 @@ ...@@ -11,11 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
class TrainReader(Reader): class TrainReader(Reader):
...@@ -44,8 +43,8 @@ class TrainReader(Reader): ...@@ -44,8 +43,8 @@ class TrainReader(Reader):
label_marital = [1, 0] label_marital = [1, 0]
elif int(l[0]) == 1: elif int(l[0]) == 1:
label_marital = [0, 1] label_marital = [0, 1]
#label_income = np.array(label_income) # label_income = np.array(label_income)
#label_marital = np.array(label_marital) # label_marital = np.array(label_marital)
feature_name = ["input", "label_income", "label_marital"] feature_name = ["input", "label_income", "label_marital"]
yield zip(feature_name, [data] + [label_income] + [label_marital]) yield zip(feature_name, [data] + [label_income] + [label_marital])
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -47,7 +46,6 @@ class Model(ModelBase): ...@@ -47,7 +46,6 @@ class Model(ModelBase):
bias_attr=fluid.ParamAttr(learning_rate=1.0), bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='bottom_output') name='bottom_output')
# Build tower layer from bottom layer # Build tower layer from bottom layer
output_layers = [] output_layers = []
for index in range(tower_nums): for index in range(tower_nums):
...@@ -61,23 +59,26 @@ class Model(ModelBase): ...@@ -61,23 +59,26 @@ class Model(ModelBase):
name='output_layer_' + str(index)) name='output_layer_' + str(index))
output_layers.append(output_layer) output_layers.append(output_layer)
pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15) pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15)
pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15) pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15)
label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2]) label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2])
label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2]) label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2])
auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income, label=fluid.layers.cast(x=label_income_1, dtype='int64')) auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income,
auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital, label=fluid.layers.cast(x=label_marital_1, dtype='int64')) label=fluid.layers.cast(x=label_income_1,
dtype='int64'))
auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital,
label=fluid.layers.cast(x=label_marital_1,
dtype='int64'))
if is_infer: if is_infer:
self._infer_results["AUC_income"] = auc_income self._infer_results["AUC_income"] = auc_income
self._infer_results["AUC_marital"] = auc_marital self._infer_results["AUC_marital"] = auc_marital
return return
cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True) cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income, soft_label=True)
cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True) cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital, soft_label=True)
cost = fluid.layers.elementwise_add(cost_income, cost_marital, axis=1) cost = fluid.layers.elementwise_add(cost_income, cost_marital, axis=1)
avg_cost = fluid.layers.mean(x=cost) avg_cost = fluid.layers.mean(x=cost)
...@@ -88,10 +89,8 @@ class Model(ModelBase): ...@@ -88,10 +89,8 @@ class Model(ModelBase):
self._metrics["AUC_marital"] = auc_marital self._metrics["AUC_marital"] = auc_marital
self._metrics["BATCH_AUC_marital"] = batch_auc_2 self._metrics["BATCH_AUC_marital"] = batch_auc_2
def train_net(self): def train_net(self):
self.model() self.model()
def infer_net(self): def infer_net(self):
self.model(is_infer=True) self.model(is_infer=True)
...@@ -11,6 +11,7 @@ ...@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
......
...@@ -11,18 +11,19 @@ ...@@ -11,18 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import math import math
import sys import os
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try: try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
import pickle import pickle
from collections import Counter
import os from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
...@@ -83,6 +84,7 @@ class TrainReader(Reader): ...@@ -83,6 +84,7 @@ class TrainReader(Reader):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
""" """
def data_iter(): def data_iter():
label_feat_list = self._process_line(line) label_feat_list = self._process_line(line)
yield list(zip(self.label_feat_names, label_feat_list)) yield list(zip(self.label_feat_names, label_feat_list))
......
...@@ -12,12 +12,13 @@ ...@@ -12,12 +12,13 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from collections import OrderedDict
import paddle.fluid as fluid import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
from collections import OrderedDict
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
...@@ -141,7 +142,6 @@ class Model(ModelBase): ...@@ -141,7 +142,6 @@ class Model(ModelBase):
self._metrics["AUC"] = auc_var self._metrics["AUC"] = auc_var
self._metrics["BATCH_AUC"] = batch_auc_var self._metrics["BATCH_AUC"] = batch_auc_var
# logloss # logloss
logloss = fluid.layers.log_loss(self.prob, self.target_input) logloss = fluid.layers.log_loss(self.prob, self.target_input)
self.avg_logloss = fluid.layers.reduce_mean(logloss) self.avg_logloss = fluid.layers.reduce_mean(logloss)
......
...@@ -11,15 +11,18 @@ ...@@ -11,15 +11,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try: try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
import pickle import pickle
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
...@@ -64,6 +67,7 @@ class TrainReader(Reader): ...@@ -64,6 +67,7 @@ class TrainReader(Reader):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
""" """
def data_iter(): def data_iter():
feat_idx, feat_value, label = self._process_line(line) feat_idx, feat_value, label = self._process_line(line)
yield [('feat_idx', feat_idx), ('feat_value', feat_value), ('label', label)] yield [('feat_idx', feat_idx), ('feat_value', feat_value), ('label', label)]
......
...@@ -12,9 +12,10 @@ ...@@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid
import math import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
...@@ -32,10 +33,11 @@ class Model(ModelBase): ...@@ -32,10 +33,11 @@ class Model(ModelBase):
# ------------------------- network input -------------------------- # ------------------------- network input --------------------------
num_field = envs.get_global_env("hyper_parameters.num_field", None, self._namespace) num_field = envs.get_global_env("hyper_parameters.num_field", None, self._namespace)
raw_feat_idx = fluid.data(name='feat_idx', shape=[None, num_field], dtype='int64') # None * num_field(defalut:39) raw_feat_idx = fluid.data(name='feat_idx', shape=[None, num_field],
dtype='int64') # None * num_field(defalut:39)
raw_feat_value = fluid.data(name='feat_value', shape=[None, num_field], dtype='float32') # None * num_field raw_feat_value = fluid.data(name='feat_value', shape=[None, num_field], dtype='float32') # None * num_field
self.label = fluid.data(name='label', shape=[None, 1], dtype='float32') # None * 1 self.label = fluid.data(name='label', shape=[None, 1], dtype='float32') # None * 1
feat_idx = fluid.layers.reshape(raw_feat_idx,[-1, 1]) # (None * num_field) * 1 feat_idx = fluid.layers.reshape(raw_feat_idx, [-1, 1]) # (None * num_field) * 1
feat_value = fluid.layers.reshape(raw_feat_value, [-1, num_field, 1]) # None * num_field * 1 feat_value = fluid.layers.reshape(raw_feat_value, [-1, num_field, 1]) # None * num_field * 1
# ------------------------- set _data_var -------------------------- # ------------------------- set _data_var --------------------------
...@@ -47,7 +49,7 @@ class Model(ModelBase): ...@@ -47,7 +49,7 @@ class Model(ModelBase):
self._data_loader = fluid.io.DataLoader.from_generator( self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False) feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False)
#------------------------- first order term -------------------------- # ------------------------- first order term --------------------------
reg = envs.get_global_env("hyper_parameters.reg", 1e-4, self._namespace) reg = envs.get_global_env("hyper_parameters.reg", 1e-4, self._namespace)
first_weights_re = fluid.embedding( first_weights_re = fluid.embedding(
...@@ -65,7 +67,7 @@ class Model(ModelBase): ...@@ -65,7 +67,7 @@ class Model(ModelBase):
first_weights_re, shape=[-1, num_field, 1]) # None * num_field * 1 first_weights_re, shape=[-1, num_field, 1]) # None * num_field * 1
y_first_order = fluid.layers.reduce_sum((first_weights * feat_value), 1) y_first_order = fluid.layers.reduce_sum((first_weights * feat_value), 1)
#------------------------- second order term -------------------------- # ------------------------- second order term --------------------------
feat_embeddings_re = fluid.embedding( feat_embeddings_re = fluid.embedding(
input=feat_idx, input=feat_idx,
...@@ -99,8 +101,7 @@ class Model(ModelBase): ...@@ -99,8 +101,7 @@ class Model(ModelBase):
summed_features_emb_square - squared_sum_features_emb, 1, summed_features_emb_square - squared_sum_features_emb, 1,
keep_dim=True) # None * 1 keep_dim=True) # None * 1
# ------------------------- DNN --------------------------
#------------------------- DNN --------------------------
layer_sizes = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace) layer_sizes = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace)
act = envs.get_global_env("hyper_parameters.act", None, self._namespace) act = envs.get_global_env("hyper_parameters.act", None, self._namespace)
...@@ -128,21 +129,21 @@ class Model(ModelBase): ...@@ -128,21 +129,21 @@ class Model(ModelBase):
initializer=fluid.initializer.TruncatedNormalInitializer( initializer=fluid.initializer.TruncatedNormalInitializer(
loc=0.0, scale=init_value_))) loc=0.0, scale=init_value_)))
#------------------------- DeepFM -------------------------- # ------------------------- DeepFM --------------------------
self.predict = fluid.layers.sigmoid(y_first_order + y_second_order + y_dnn) self.predict = fluid.layers.sigmoid(y_first_order + y_second_order + y_dnn)
def train_net(self): def train_net(self):
self.deepfm_net() self.deepfm_net()
#------------------------- Cost(logloss) -------------------------- # ------------------------- Cost(logloss) --------------------------
cost = fluid.layers.log_loss(input=self.predict, label=self.label) cost = fluid.layers.log_loss(input=self.predict, label=self.label)
avg_cost = fluid.layers.reduce_sum(cost) avg_cost = fluid.layers.reduce_sum(cost)
self._cost = avg_cost self._cost = avg_cost
#------------------------- Metric(Auc) -------------------------- # ------------------------- Metric(Auc) --------------------------
predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1) predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1)
label_int = fluid.layers.cast(self.label, 'int64') label_int = fluid.layers.cast(self.label, 'int64')
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
......
...@@ -13,16 +13,20 @@ ...@@ -13,16 +13,20 @@
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
import os import os
import random import random
try: try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
import pickle import pickle
import numpy as np
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
self.train_data_path = envs.get_global_env("train_data_path", None, "train.reader") self.train_data_path = envs.get_global_env("train_data_path", None, "train.reader")
...@@ -43,9 +47,6 @@ class TrainReader(Reader): ...@@ -43,9 +47,6 @@ class TrainReader(Reader):
self.batch_size = envs.get_global_env("batch_size", 32, "train.reader") self.batch_size = envs.get_global_env("batch_size", 32, "train.reader")
self.group_size = self.batch_size * 20 self.group_size = self.batch_size * 20
def _process_line(self, line): def _process_line(self, line):
line = line.strip().split(';') line = line.strip().split(';')
hist = line[0].split() hist = line[0].split()
...@@ -54,13 +55,13 @@ class TrainReader(Reader): ...@@ -54,13 +55,13 @@ class TrainReader(Reader):
cate = [int(i) for i in cate] cate = [int(i) for i in cate]
return [hist, cate, [int(line[2])], [int(line[3])], [float(line[4])]] return [hist, cate, [int(line[2])], [int(line[3])], [float(line[4])]]
def generate_sample(self, line): def generate_sample(self, line):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
""" """
def data_iter(): def data_iter():
#feat_idx, feat_value, label = self._process_line(line) # feat_idx, feat_value, label = self._process_line(line)
yield self._process_line(line) yield self._process_line(line)
return data_iter return data_iter
...@@ -127,5 +128,3 @@ class TrainReader(Reader): ...@@ -127,5 +128,3 @@ class TrainReader(Reader):
data_set = self.base_read(files) data_set = self.base_read(files)
random.shuffle(data_set) random.shuffle(data_set)
return self.batch_reader(data_set, self.batch_size, self.batch_size * 20) return self.batch_reader(data_set, self.batch_size, self.batch_size * 20)
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
# limitations under the License. # limitations under the License.
import math import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
......
...@@ -12,9 +12,10 @@ ...@@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid
import math import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
...@@ -26,8 +27,12 @@ class Model(ModelBase): ...@@ -26,8 +27,12 @@ class Model(ModelBase):
def wide_part(self, data): def wide_part(self, data):
out = fluid.layers.fc(input=data, out = fluid.layers.fc(input=data,
size=1, size=1,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0 / math.sqrt(data.shape[1])), param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0,
regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4)), scale=1.0 / math.sqrt(
data.shape[
1])),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)),
act=None, act=None,
name='wide') name='wide')
return out return out
...@@ -35,7 +40,10 @@ class Model(ModelBase): ...@@ -35,7 +40,10 @@ class Model(ModelBase):
def fc(self, data, hidden_units, active, tag): def fc(self, data, hidden_units, active, tag):
output = fluid.layers.fc(input=data, output = fluid.layers.fc(input=data,
size=hidden_units, size=hidden_units,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0 / math.sqrt(data.shape[1]))), param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0,
scale=1.0 / math.sqrt(
data.shape[
1]))),
act=active, act=active,
name=tag) name=tag)
...@@ -64,13 +72,15 @@ class Model(ModelBase): ...@@ -64,13 +72,15 @@ class Model(ModelBase):
wide_model = fluid.layers.fc(input=wide_output, wide_model = fluid.layers.fc(input=wide_output,
size=1, size=1,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0)), param_attr=fluid.ParamAttr(
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0)),
act=None, act=None,
name='w_wide') name='w_wide')
deep_model = fluid.layers.fc(input=deep_output, deep_model = fluid.layers.fc(input=deep_output,
size=1, size=1,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0)), param_attr=fluid.ParamAttr(
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0)),
act=None, act=None,
name='w_deep') name='w_deep')
......
...@@ -11,15 +11,17 @@ ...@@ -11,15 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try: try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
import pickle import pickle
from paddlerec.core.reader import Reader
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
pass pass
...@@ -28,7 +30,7 @@ class TrainReader(Reader): ...@@ -28,7 +30,7 @@ class TrainReader(Reader):
line = line.strip().split(',') line = line.strip().split(',')
features = list(map(float, line)) features = list(map(float, line))
wide_feat = features[0:8] wide_feat = features[0:8]
deep_feat = features[8:58+8] deep_feat = features[8:58 + 8]
label = features[-1] label = features[-1]
return wide_feat, deep_feat, [label] return wide_feat, deep_feat, [label]
...@@ -36,6 +38,7 @@ class TrainReader(Reader): ...@@ -36,6 +38,7 @@ class TrainReader(Reader):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
""" """
def data_iter(): def data_iter():
wide_feat, deep_deat, label = self._process_line(line) wide_feat, deep_deat, label = self._process_line(line)
yield [('wide_input', wide_feat), ('deep_input', deep_deat), ('label', label)] yield [('wide_input', wide_feat), ('deep_input', deep_deat), ('label', label)]
......
...@@ -11,15 +11,17 @@ ...@@ -11,15 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try: try:
import cPickle as pickle import cPickle as pickle
except ImportError: except ImportError:
import pickle import pickle
from paddlerec.core.reader import Reader
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
pass pass
......
...@@ -13,7 +13,6 @@ ...@@ -13,7 +13,6 @@
# limitations under the License. # limitations under the License.
import paddle.fluid as fluid import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
......
#! /bin/bash #! /bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e set -e
echo "begin to download data" echo "begin to download data"
......
...@@ -11,10 +11,12 @@ ...@@ -11,10 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import io
import copy import copy
import random import random
import numpy as np
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -120,8 +122,9 @@ class EvaluateReader(Reader): ...@@ -120,8 +122,9 @@ class EvaluateReader(Reader):
else: else:
# Due to fixed batch_size, discard the remaining ins # Due to fixed batch_size, discard the remaining ins
return return
#cur_batch = remain_data[i:] # cur_batch = remain_data[i:]
#yield self.make_data(cur_batch, group_remain % batch_size) # yield self.make_data(cur_batch, group_remain % batch_size)
return _reader return _reader
def generate_batch_from_trainfiles(self, files): def generate_batch_from_trainfiles(self, files):
...@@ -132,4 +135,5 @@ class EvaluateReader(Reader): ...@@ -132,4 +135,5 @@ class EvaluateReader(Reader):
def generate_sample(self, line): def generate_sample(self, line):
def data_iter(): def data_iter():
yield [] yield []
return data_iter return data_iter
...@@ -12,8 +12,9 @@ ...@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import math import math
import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
...@@ -28,13 +29,13 @@ class Model(ModelBase): ...@@ -28,13 +29,13 @@ class Model(ModelBase):
def init_config(self): def init_config(self):
self._fetch_interval = 1 self._fetch_interval = 1
self.items_num, self.ins_num = self.config_read(envs.get_global_env("hyper_parameters.config_path", None, self._namespace)) self.items_num, self.ins_num = self.config_read(
envs.get_global_env("hyper_parameters.config_path", None, self._namespace))
self.train_batch_size = envs.get_global_env("batch_size", None, "train.reader") self.train_batch_size = envs.get_global_env("batch_size", None, "train.reader")
self.evaluate_batch_size = envs.get_global_env("batch_size", None, "evaluate.reader") self.evaluate_batch_size = envs.get_global_env("batch_size", None, "evaluate.reader")
self.hidden_size = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace) self.hidden_size = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
self.step = envs.get_global_env("hyper_parameters.gnn_propogation_steps", None, self._namespace) self.step = envs.get_global_env("hyper_parameters.gnn_propogation_steps", None, self._namespace)
def config_read(self, config_path=None): def config_read(self, config_path=None):
if config_path is None: if config_path is None:
raise ValueError("please set train.model.hyper_parameters.config_path at first") raise ValueError("please set train.model.hyper_parameters.config_path at first")
...@@ -47,31 +48,31 @@ class Model(ModelBase): ...@@ -47,31 +48,31 @@ class Model(ModelBase):
self.items = fluid.data( self.items = fluid.data(
name="items", name="items",
shape=[bs, -1], shape=[bs, -1],
dtype="int64") #[batch_size, uniq_max] dtype="int64") # [batch_size, uniq_max]
self.seq_index = fluid.data( self.seq_index = fluid.data(
name="seq_index", name="seq_index",
shape=[bs, -1, 2], shape=[bs, -1, 2],
dtype="int32") #[batch_size, seq_max, 2] dtype="int32") # [batch_size, seq_max, 2]
self.last_index = fluid.data( self.last_index = fluid.data(
name="last_index", name="last_index",
shape=[bs, 2], shape=[bs, 2],
dtype="int32") #[batch_size, 2] dtype="int32") # [batch_size, 2]
self.adj_in = fluid.data( self.adj_in = fluid.data(
name="adj_in", name="adj_in",
shape=[bs, -1, -1], shape=[bs, -1, -1],
dtype="float32") #[batch_size, seq_max, seq_max] dtype="float32") # [batch_size, seq_max, seq_max]
self.adj_out = fluid.data( self.adj_out = fluid.data(
name="adj_out", name="adj_out",
shape=[bs, -1, -1], shape=[bs, -1, -1],
dtype="float32") #[batch_size, seq_max, seq_max] dtype="float32") # [batch_size, seq_max, seq_max]
self.mask = fluid.data( self.mask = fluid.data(
name="mask", name="mask",
shape=[bs, -1, 1], shape=[bs, -1, 1],
dtype="float32") #[batch_size, seq_max, 1] dtype="float32") # [batch_size, seq_max, 1]
self.label = fluid.data( self.label = fluid.data(
name="label", name="label",
shape=[bs, 1], shape=[bs, 1],
dtype="int64") #[batch_size, 1] dtype="int64") # [batch_size, 1]
res = [self.items, self.seq_index, self.last_index, self.adj_in, self.adj_out, self.mask, self.label] res = [self.items, self.seq_index, self.last_index, self.adj_in, self.adj_out, self.mask, self.label]
return res return res
...@@ -113,7 +114,7 @@ class Model(ModelBase): ...@@ -113,7 +114,7 @@ class Model(ModelBase):
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv)), low=-stdv, high=stdv)),
bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, uniq_max, h] low=-stdv, high=stdv))) # [batch_size, uniq_max, h]
state_out = layers.fc( state_out = layers.fc(
input=pre_state, input=pre_state,
name="state_out", name="state_out",
...@@ -123,10 +124,10 @@ class Model(ModelBase): ...@@ -123,10 +124,10 @@ class Model(ModelBase):
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv)), low=-stdv, high=stdv)),
bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, uniq_max, h] low=-stdv, high=stdv))) # [batch_size, uniq_max, h]
state_adj_in = layers.matmul(self.adj_in, state_in) #[batch_size, uniq_max, h] state_adj_in = layers.matmul(self.adj_in, state_in) # [batch_size, uniq_max, h]
state_adj_out = layers.matmul(self.adj_out, state_out) #[batch_size, uniq_max, h] state_adj_out = layers.matmul(self.adj_out, state_out) # [batch_size, uniq_max, h]
gru_input = layers.concat([state_adj_in, state_adj_out], axis=2) gru_input = layers.concat([state_adj_in, state_adj_out], axis=2)
...@@ -154,7 +155,7 @@ class Model(ModelBase): ...@@ -154,7 +155,7 @@ class Model(ModelBase):
num_flatten_dims=2, num_flatten_dims=2,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, seq_max, h] low=-stdv, high=stdv))) # [batch_size, seq_max, h]
last_fc = layers.fc( last_fc = layers.fc(
input=last, input=last,
name="last_fc", name="last_fc",
...@@ -164,21 +165,21 @@ class Model(ModelBase): ...@@ -164,21 +165,21 @@ class Model(ModelBase):
num_flatten_dims=1, num_flatten_dims=1,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[bathc_size, h] low=-stdv, high=stdv))) # [bathc_size, h]
seq_fc_t = layers.transpose( seq_fc_t = layers.transpose(
seq_fc, perm=[1, 0, 2]) #[seq_max, batch_size, h] seq_fc, perm=[1, 0, 2]) # [seq_max, batch_size, h]
add = layers.elementwise_add( add = layers.elementwise_add(
seq_fc_t, last_fc) #[seq_max, batch_size, h] seq_fc_t, last_fc) # [seq_max, batch_size, h]
b = layers.create_parameter( b = layers.create_parameter(
shape=[hidden_size], shape=[hidden_size],
dtype='float32', dtype='float32',
default_initializer=fluid.initializer.Constant(value=0.0)) #[h] default_initializer=fluid.initializer.Constant(value=0.0)) # [h]
add = layers.elementwise_add(add, b) #[seq_max, batch_size, h] add = layers.elementwise_add(add, b) # [seq_max, batch_size, h]
add_sigmoid = layers.sigmoid(add) #[seq_max, batch_size, h] add_sigmoid = layers.sigmoid(add) # [seq_max, batch_size, h]
add_sigmoid = layers.transpose( add_sigmoid = layers.transpose(
add_sigmoid, perm=[1, 0, 2]) #[batch_size, seq_max, h] add_sigmoid, perm=[1, 0, 2]) # [batch_size, seq_max, h]
weight = layers.fc( weight = layers.fc(
input=add_sigmoid, input=add_sigmoid,
...@@ -189,13 +190,13 @@ class Model(ModelBase): ...@@ -189,13 +190,13 @@ class Model(ModelBase):
bias_attr=False, bias_attr=False,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, seq_max, 1] low=-stdv, high=stdv))) # [batch_size, seq_max, 1]
weight *= self.mask weight *= self.mask
weight_mask = layers.elementwise_mul(seq, weight, axis=0) #[batch_size, seq_max, h] weight_mask = layers.elementwise_mul(seq, weight, axis=0) # [batch_size, seq_max, h]
global_attention = layers.reduce_sum(weight_mask, dim=1) #[batch_size, h] global_attention = layers.reduce_sum(weight_mask, dim=1) # [batch_size, h]
final_attention = layers.concat( final_attention = layers.concat(
[global_attention, last], axis=1) #[batch_size, 2*h] [global_attention, last], axis=1) # [batch_size, 2*h]
final_attention_fc = layers.fc( final_attention_fc = layers.fc(
input=final_attention, input=final_attention,
name="final_attention_fc", name="final_attention_fc",
...@@ -203,7 +204,7 @@ class Model(ModelBase): ...@@ -203,7 +204,7 @@ class Model(ModelBase):
bias_attr=False, bias_attr=False,
act=None, act=None,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform( param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, h] low=-stdv, high=stdv))) # [batch_size, h]
# all_vocab = layers.create_global_var( # all_vocab = layers.create_global_var(
# shape=[items_num - 1], # shape=[items_num - 1],
...@@ -220,13 +221,13 @@ class Model(ModelBase): ...@@ -220,13 +221,13 @@ class Model(ModelBase):
name="emb", name="emb",
initializer=fluid.initializer.Uniform( initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv)), low=-stdv, high=stdv)),
size=[items_num, hidden_size]) #[all_vocab, h] size=[items_num, hidden_size]) # [all_vocab, h]
logits = layers.matmul( logits = layers.matmul(
x=final_attention_fc, y=all_emb, x=final_attention_fc, y=all_emb,
transpose_y=True) #[batch_size, all_vocab] transpose_y=True) # [batch_size, all_vocab]
softmax = layers.softmax_with_cross_entropy( softmax = layers.softmax_with_cross_entropy(
logits=logits, label=self.label) #[batch_size, 1] logits=logits, label=self.label) # [batch_size, 1]
self.loss = layers.reduce_mean(softmax) # [1] self.loss = layers.reduce_mean(softmax) # [1]
self.acc = layers.accuracy(input=logits, label=self.label, k=20) self.acc = layers.accuracy(input=logits, label=self.label, k=20)
......
...@@ -11,10 +11,12 @@ ...@@ -11,10 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import io
import copy import copy
import random import random
import numpy as np
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -120,8 +122,9 @@ class TrainReader(Reader): ...@@ -120,8 +122,9 @@ class TrainReader(Reader):
else: else:
# Due to fixed batch_size, discard the remaining ins # Due to fixed batch_size, discard the remaining ins
return return
#cur_batch = remain_data[i:] # cur_batch = remain_data[i:]
#yield self.make_data(cur_batch, group_remain % batch_size) # yield self.make_data(cur_batch, group_remain % batch_size)
return _reader return _reader
def generate_batch_from_trainfiles(self, files): def generate_batch_from_trainfiles(self, files):
...@@ -132,4 +135,5 @@ class TrainReader(Reader): ...@@ -132,4 +135,5 @@ class TrainReader(Reader):
def generate_sample(self, line): def generate_sample(self, line):
def data_iter(): def data_iter():
yield [] yield []
return data_iter return data_iter
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -87,10 +86,8 @@ class Model(ModelBase): ...@@ -87,10 +86,8 @@ class Model(ModelBase):
self._metrics["cost"] = avg_cost self._metrics["cost"] = avg_cost
self._metrics["acc"] = acc self._metrics["acc"] = acc
def train_net(self): def train_net(self):
self.all_vocab_network() self.all_vocab_network()
def infer_net(self): def infer_net(self):
self.all_vocab_network(is_infer=True) self.all_vocab_network(is_infer=True)
...@@ -11,10 +11,10 @@ ...@@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class EvaluateReader(Reader): class EvaluateReader(Reader):
......
...@@ -11,10 +11,10 @@ ...@@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader): class TrainReader(Reader):
......
...@@ -12,15 +12,12 @@ ...@@ -12,15 +12,12 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import paddle.fluid as fluid import paddle.fluid as fluid
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
import paddle.fluid.layers.tensor as tensor import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.io as io
import paddle.fluid.layers.control_flow as cf import paddle.fluid.layers.control_flow as cf
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
class BowEncoder(object): class BowEncoder(object):
...@@ -54,6 +51,7 @@ class GrnnEncoder(object): ...@@ -54,6 +51,7 @@ class GrnnEncoder(object):
bias_attr=self.param_name + ".bias") bias_attr=self.param_name + ".bias")
return fluid.layers.sequence_pool(input=gru_h, pool_type='max') return fluid.layers.sequence_pool(input=gru_h, pool_type='max')
class PairwiseHingeLoss(object): class PairwiseHingeLoss(object):
def __init__(self, margin=0.8): def __init__(self, margin=0.8):
self.margin = margin self.margin = margin
...@@ -70,6 +68,7 @@ class PairwiseHingeLoss(object): ...@@ -70,6 +68,7 @@ class PairwiseHingeLoss(object):
loss_part2) loss_part2)
return loss_part3 return loss_part3
class Model(ModelBase): class Model(ModelBase):
def __init__(self, config): def __init__(self, config):
ModelBase.__init__(self, config) ModelBase.__init__(self, config)
...@@ -80,7 +79,6 @@ class Model(ModelBase): ...@@ -80,7 +79,6 @@ class Model(ModelBase):
return correct return correct
def train(self): def train(self):
vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace) vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace)
emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, self._namespace) emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, self._namespace)
hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, self._namespace) hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, self._namespace)
...@@ -129,11 +127,9 @@ class Model(ModelBase): ...@@ -129,11 +127,9 @@ class Model(ModelBase):
self._metrics["correct"] = correct self._metrics["correct"] = correct
self._metrics["hinge_loss"] = hinge_loss self._metrics["hinge_loss"] = hinge_loss
def train_net(self): def train_net(self):
self.train() self.train()
def infer(self): def infer(self):
vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace) vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace)
emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, self._namespace) emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, self._namespace)
...@@ -173,6 +169,5 @@ class Model(ModelBase): ...@@ -173,6 +169,5 @@ class Model(ModelBase):
self._infer_results['recall20'] = acc self._infer_results['recall20'] = acc
def infer_net(self): def infer_net(self):
self.infer() self.infer()
...@@ -11,19 +11,19 @@ ...@@ -11,19 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
import numpy as np
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
import random
import numpy as np
class EvaluateReader(Reader): class EvaluateReader(Reader):
def init(self): def init(self):
self.vocab_size = envs.get_global_env("vocab_size", 10, "train.model.hyper_parameters") self.vocab_size = envs.get_global_env("vocab_size", 10, "train.model.hyper_parameters")
def generate_sample(self, line): def generate_sample(self, line):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
...@@ -39,6 +39,6 @@ class EvaluateReader(Reader): ...@@ -39,6 +39,6 @@ class EvaluateReader(Reader):
src = conv_ids[:boundary] src = conv_ids[:boundary]
pos_tgt = [conv_ids[boundary]] pos_tgt = [conv_ids[boundary]]
feature_name = ["user", "all_item", "p_item"] feature_name = ["user", "all_item", "p_item"]
yield zip(feature_name, [src] + [np.arange(self.vocab_size).astype("int64").tolist()]+ [pos_tgt]) yield zip(feature_name, [src] + [np.arange(self.vocab_size).astype("int64").tolist()] + [pos_tgt])
return reader return reader
...@@ -11,12 +11,13 @@ ...@@ -11,12 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import print_function from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import random import random
from paddlerec.core.reader import Reader
class TrainReader(Reader): class TrainReader(Reader):
def init(self): def init(self):
...@@ -25,7 +26,6 @@ class TrainReader(Reader): ...@@ -25,7 +26,6 @@ class TrainReader(Reader):
def sample_neg_from_seq(self, seq): def sample_neg_from_seq(self, seq):
return seq[random.randint(0, len(seq) - 1)] return seq[random.randint(0, len(seq) - 1)]
def generate_sample(self, line): def generate_sample(self, line):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
......
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import math
import numpy as np import numpy as np
import paddle.fluid as fluid import paddle.fluid as fluid
......
#! /bin/bash #! /bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# download train_data # download train_data
mkdir raw_data mkdir raw_data
wget --no-check-certificate https://paddlerec.bj.bcebos.com/word2vec/1-billion-word-language-modeling-benchmark-r13output.tar wget --no-check-certificate https://paddlerec.bj.bcebos.com/word2vec/1-billion-word-language-modeling-benchmark-r13output.tar
......
...@@ -13,13 +13,15 @@ ...@@ -13,13 +13,15 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import io
import math
import os import os
import random import random
import re import re
import six import six
import argparse import argparse
import io
import math
prog = re.compile("[^a-z ]", flags=0) prog = re.compile("[^a-z ]", flags=0)
...@@ -73,7 +75,7 @@ def parse_args(): ...@@ -73,7 +75,7 @@ def parse_args():
def text_strip(text): def text_strip(text):
#English Preprocess Rule # English Preprocess Rule
return prog.sub("", text.lower()) return prog.sub("", text.lower())
...@@ -115,7 +117,7 @@ def filter_corpus(args): ...@@ -115,7 +117,7 @@ def filter_corpus(args):
word_all_count = 0 word_all_count = 0
id_counts = [] id_counts = []
word_id = 0 word_id = 0
#read dict # read dict
with io.open(args.dict_path, 'r', encoding='utf-8') as f: with io.open(args.dict_path, 'r', encoding='utf-8') as f:
for line in f: for line in f:
word, count = line.split()[0], int(line.split()[1]) word, count = line.split()[0], int(line.split()[1])
...@@ -125,13 +127,13 @@ def filter_corpus(args): ...@@ -125,13 +127,13 @@ def filter_corpus(args):
id_counts.append(count) id_counts.append(count)
word_all_count += count word_all_count += count
#write word2id file # write word2id file
print("write word2id file to : " + args.dict_path + "_word_to_id_") print("write word2id file to : " + args.dict_path + "_word_to_id_")
with io.open( with io.open(
args.dict_path + "_word_to_id_", 'w+', encoding='utf-8') as fid: args.dict_path + "_word_to_id_", 'w+', encoding='utf-8') as fid:
for k, v in word_to_id_.items(): for k, v in word_to_id_.items():
fid.write(k + " " + str(v) + '\n') fid.write(k + " " + str(v) + '\n')
#filter corpus and convert id # filter corpus and convert id
if not os.path.exists(args.output_corpus_dir): if not os.path.exists(args.output_corpus_dir):
os.makedirs(args.output_corpus_dir) os.makedirs(args.output_corpus_dir)
for file in os.listdir(args.input_corpus_dir): for file in os.listdir(args.input_corpus_dir):
...@@ -200,7 +202,7 @@ def build_dict(args): ...@@ -200,7 +202,7 @@ def build_dict(args):
for item in item_to_remove: for item in item_to_remove:
unk_sum += word_count[item] unk_sum += word_count[item]
del word_count[item] del word_count[item]
#sort by count # sort by count
word_count[native_to_unicode('<UNK>')] = unk_sum word_count[native_to_unicode('<UNK>')] = unk_sum
word_count = sorted( word_count = sorted(
word_count.items(), key=lambda word_count: -word_count[1]) word_count.items(), key=lambda word_count: -word_count[1])
...@@ -228,12 +230,13 @@ def data_split(args): ...@@ -228,12 +230,13 @@ def data_split(args):
print("contents: ", str(len(contents))) print("contents: ", str(len(contents)))
print("lines_per_file: ", str(lines_per_file)) print("lines_per_file: ", str(lines_per_file))
for i in range(1, num+1): for i in range(1, num + 1):
with open(os.path.join(new_data_dir, "part_" + str(i)), 'w') as fout: with open(os.path.join(new_data_dir, "part_" + str(i)), 'w') as fout:
data = contents[(i-1)*lines_per_file:min(i*lines_per_file,len(contents))] data = contents[(i - 1) * lines_per_file:min(i * lines_per_file, len(contents))]
for line in data: for line in data:
fout.write(line) fout.write(line)
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = parse_args()
if args.build_dict: if args.build_dict:
......
...@@ -11,9 +11,11 @@ ...@@ -11,9 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import io import io
import six import six
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -47,18 +49,15 @@ class EvaluateReader(Reader): ...@@ -47,18 +49,15 @@ class EvaluateReader(Reader):
return True return True
return False return False
def _to_unicode(self, s, ignore_errors=False): def _to_unicode(self, s, ignore_errors=False):
if self._is_unicode(s): if self._is_unicode(s):
return s return s
error_mode = "ignore" if ignore_errors else "strict" error_mode = "ignore" if ignore_errors else "strict"
return s.decode("utf-8", errors=error_mode) return s.decode("utf-8", errors=error_mode)
def strip_lines(self, line, vocab): def strip_lines(self, line, vocab):
return self._replace_oov(vocab, self.native_to_unicode(line)) return self._replace_oov(vocab, self.native_to_unicode(line))
def _replace_oov(self, original_vocab, line): def _replace_oov(self, original_vocab, line):
"""Replace out-of-vocab words with "<UNK>". """Replace out-of-vocab words with "<UNK>".
This maintains compatibility with published results. This maintains compatibility with published results.
...@@ -76,5 +75,7 @@ class EvaluateReader(Reader): ...@@ -76,5 +75,7 @@ class EvaluateReader(Reader):
def reader(): def reader():
features = self.strip_lines(line.lower(), self.word_to_id) features = self.strip_lines(line.lower(), self.word_to_id)
features = features.split() features = features.split()
yield [('analogy_a', [self.word_to_id[features[0]]]), ('analogy_b', [self.word_to_id[features[1]]]), ('analogy_c', [self.word_to_id[features[2]]]), ('analogy_d', [self.word_to_id[features[3]]])] yield [('analogy_a', [self.word_to_id[features[0]]]), ('analogy_b', [self.word_to_id[features[1]]]),
('analogy_c', [self.word_to_id[features[2]]]), ('analogy_d', [self.word_to_id[features[3]]])]
return reader return reader
...@@ -11,8 +11,11 @@ ...@@ -11,8 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import numpy as np
import io import io
import numpy as np
from paddlerec.core.reader import Reader from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
...@@ -84,7 +87,7 @@ class TrainReader(Reader): ...@@ -84,7 +87,7 @@ class TrainReader(Reader):
output = [('input_word', [int(target_id)]), ('true_label', [int(context_id)])] output = [('input_word', [int(target_id)]), ('true_label', [int(context_id)])]
if not self.with_shuffle_batch: if not self.with_shuffle_batch:
neg_array = self.cs.searchsorted(np.random.sample(self.neg_num)) neg_array = self.cs.searchsorted(np.random.sample(self.neg_num))
output += [('neg_label', [int(str(i)) for i in neg_array ])] output += [('neg_label', [int(str(i)) for i in neg_array])]
yield output yield output
return reader
return reader
...@@ -14,8 +14,8 @@ ...@@ -14,8 +14,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" """
import paddle.fluid as fluid import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase from paddlerec.core.model import Model as ModelBase
...@@ -229,7 +229,7 @@ class Model(ModelBase): ...@@ -229,7 +229,7 @@ class Model(ModelBase):
act=self.act, act=self.act,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="trans.layer_fc.weight." + str(i)), name="trans.layer_fc.weight." + str(i)),
bias_attr=fluid.ParamAttr(name="trans.layer_fc.bias."+str(i)), bias_attr=fluid.ParamAttr(name="trans.layer_fc.bias." + str(i)),
) for i in range(self.max_layers) ) for i in range(self.max_layers)
] ]
...@@ -268,8 +268,8 @@ class Model(ModelBase): ...@@ -268,8 +268,8 @@ class Model(ModelBase):
num_flatten_dims=2, num_flatten_dims=2,
act=self.act, act=self.act,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="cls.concat_fc.weight."+str(i)), name="cls.concat_fc.weight." + str(i)),
bias_attr=fluid.ParamAttr(name="cls.concat_fc.bias."+str(i)) bias_attr=fluid.ParamAttr(name="cls.concat_fc.bias." + str(i))
) for i in range(self.max_layers) ) for i in range(self.max_layers)
] ]
...@@ -458,7 +458,7 @@ class Model(ModelBase): ...@@ -458,7 +458,7 @@ class Model(ModelBase):
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="trans.layer_fc.weight." + str(layer_idx)), name="trans.layer_fc.weight." + str(layer_idx)),
bias_attr=fluid.ParamAttr( bias_attr=fluid.ParamAttr(
name="trans.layer_fc.bias."+str(layer_idx)), name="trans.layer_fc.bias." + str(layer_idx)),
) )
return input_layer_fc_out return input_layer_fc_out
...@@ -479,6 +479,6 @@ class Model(ModelBase): ...@@ -479,6 +479,6 @@ class Model(ModelBase):
num_flatten_dims=2, num_flatten_dims=2,
act=self.act, act=self.act,
param_attr=fluid.ParamAttr( param_attr=fluid.ParamAttr(
name="cls.concat_fc.weight."+str(layer_idx)), name="cls.concat_fc.weight." + str(layer_idx)),
bias_attr=fluid.ParamAttr(name="cls.concat_fc.bias."+str(layer_idx))) bias_attr=fluid.ParamAttr(name="cls.concat_fc.bias." + str(layer_idx)))
return hidden_states_fc return hidden_states_fc
...@@ -28,6 +28,7 @@ class EvaluateReader(Reader): ...@@ -28,6 +28,7 @@ class EvaluateReader(Reader):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
""" """
def reader(): def reader():
""" """
This function needs to be implemented by the user, based on data format This function needs to be implemented by the user, based on data format
......
...@@ -28,6 +28,7 @@ class TrainReader(Reader): ...@@ -28,6 +28,7 @@ class TrainReader(Reader):
""" """
Read the data line by line and process it as a dictionary Read the data line by line and process it as a dictionary
""" """
def reader(): def reader():
""" """
This function needs to be implemented by the user, based on data format This function needs to be implemented by the user, based on data format
......
...@@ -12,11 +12,11 @@ ...@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import argparse
import os import os
import subprocess import subprocess
import tempfile
import argparse
import tempfile
import yaml import yaml
from paddlerec.core.factory import TrainerFactory from paddlerec.core.factory import TrainerFactory
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" """
setup for paddle-rec. setup for paddle-rec.
""" """
import os import os
from setuptools import setup, find_packages from setuptools import setup, find_packages
import tempfile
import shutil import shutil
import tempfile
requires = [ requires = [
"paddlepaddle == 1.7.2", "paddlepaddle == 1.7.2",
...@@ -19,7 +36,7 @@ about["__author__"] = "paddle-dev" ...@@ -19,7 +36,7 @@ about["__author__"] = "paddle-dev"
about["__author_email__"] = "paddle-dev@baidu.com" about["__author_email__"] = "paddle-dev@baidu.com"
about["__url__"] = "https://github.com/PaddlePaddle/PaddleRec" about["__url__"] = "https://github.com/PaddlePaddle/PaddleRec"
readme = "..." readme = ""
def run_cmd(command): def run_cmd(command):
......
...@@ -12,15 +12,16 @@ ...@@ -12,15 +12,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import functools
import os import os
import time import platform
import sys
import shutil import shutil
import time
import requests import requests
import sys
import tarfile import tarfile
import zipfile import zipfile
import platform
import functools
lasttime = time.time() lasttime = time.time()
FLUSH_INTERVAL = 0.1 FLUSH_INTERVAL = 0.1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册