提交 76471fca 编写于 作者: F frankwhzhang

Merge branch 'develop' of http://gitlab.baidu.com/tangwei12/paddlerec into develop

#!/bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
###################################################
# Usage: submit.sh
......
......@@ -15,10 +15,9 @@
from __future__ import print_function
from __future__ import unicode_literals
import subprocess
import sys
import os
import copy
import os
import subprocess
from paddlerec.core.engine.engine import Engine
from paddlerec.core.factory import TrainerFactory
......
......@@ -29,4 +29,3 @@ class Engine:
@abc.abstractmethod
def run(self):
pass
......@@ -14,10 +14,11 @@
from __future__ import print_function
from __future__ import unicode_literals
import subprocess
import sys
import os
import copy
import os
import sys
import subprocess
from paddlerec.core.engine.engine import Engine
from paddlerec.core.utils import envs
......
......@@ -14,10 +14,11 @@
from __future__ import print_function
from __future__ import unicode_literals
import subprocess
import sys
import os
import copy
import os
import sys
import subprocess
from paddlerec.core.engine.engine import Engine
......
......@@ -25,17 +25,8 @@ class Layer(object):
"""
pass
def generate(self, mode, param):
"""R
"""
if mode == 'fluid':
return self.generate_fluid(param)
print('unsupport this mode: ' + mode)
return None, None
@abc.abstractmethod
def generate_fluid(self, param):
def generate(self, param):
"""R
"""
pass
......@@ -53,7 +53,7 @@ class Metric(object):
pass
@abc.abstractmethod
def get_result_to_string(self):
def __str__(self):
"""
Return:
result(string) : calculate result with string format, for output
......
......@@ -13,8 +13,10 @@
# limitations under the License.
import math
import numpy as np
import paddle.fluid as fluid
from paddlerec.core.metric import Metric
......@@ -198,7 +200,7 @@ class AUCMetric(Metric):
""" """
return self._result
def get_result_to_string(self):
def __str__(self):
""" """
result = self.get_result()
result_str = "%s AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f RMSE=%.6f " \
......
......@@ -47,7 +47,7 @@ class Model(object):
def get_infer_results(self):
return self._infer_results
def get_cost_op(self):
def get_avg_cost(self):
"""R
"""
return self._cost
......
......@@ -12,10 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import yaml
import copy
import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
import yaml
from paddlerec.core.model import Model
from paddlerec.core.utils import table
......
......@@ -13,10 +13,11 @@
# limitations under the License.
import paddle.fluid as fluid
from paddlerec.core.layer import Layer
class EmbeddingInputLayer(Layer):
class EmbeddingFuseLayer(Layer):
"""R
"""
......@@ -31,7 +32,7 @@ class EmbeddingInputLayer(Layer):
self._emb_dim = self._mf_dim + 3 # append show ctr lr
self._emb_layers = []
def generate_fluid(self, param):
def generate(self, param):
"""R
"""
show_clk = fluid.layers.concat(
......@@ -63,7 +64,7 @@ class LabelInputLayer(Layer):
self._data_type = config.get('data_type', "int64")
self._label_idx = config['label_idx']
def generate_fluid(self, param):
def generate(self, param):
"""R
"""
label = fluid.layers.data(name=self._name, shape=[-1, self._dim], \
......@@ -85,7 +86,7 @@ class TagInputLayer(Layer):
self._dim = config.get('dim', 1)
self._data_type = config['data_type']
def generate_fluid(self, param):
def generate(self, param):
"""R
"""
output = fluid.layers.data(name=self._name, shape=[-1, self._dim], \
......@@ -107,7 +108,7 @@ class ParamLayer(Layer):
self._data_type = config.get('data_type', 'float32')
self._config = config
def generate_fluid(self, param):
def generate(self, param):
"""R
"""
return self._config, {'inference_param': {'name': 'param', 'params': [], 'table_id': self._table_id}}
......@@ -125,7 +126,7 @@ class SummaryLayer(Layer):
self._data_type = config.get('data_type', 'float32')
self._config = config
def generate_fluid(self, param):
def generate(self, param):
"""R
"""
return self._config, {'inference_param': {'name': 'summary', 'params': [], 'table_id': self._table_id}}
......@@ -143,7 +144,7 @@ class NormalizetionLayer(Layer):
self._summary = config['summary']
self._table_id = config.get('table_id', -1)
def generate_fluid(self, param):
def generate(self, param):
"""R
"""
input_layer = param['layer'][self._input[0]]
......@@ -158,7 +159,7 @@ class NormalizetionLayer(Layer):
'params': inference_param, 'table_id': summary_layer.get('table_id', -1)}}
class NeuralLayer(Layer):
class FCLayer(Layer):
"""R
"""
......@@ -171,7 +172,7 @@ class NeuralLayer(Layer):
self._bias = config.get('bias', True)
self._act_func = config.get('act_func', None)
def generate_fluid(self, param):
def generate(self, param):
"""R
"""
param_layer = param['layer'][self._param]
......@@ -199,7 +200,7 @@ class NeuralLayer(Layer):
'table_id': param_layer.get('table_id', -1)}}
class SigmoidLossLayer(Layer):
class LogLossLayer(Layer):
"""R
"""
......@@ -230,7 +231,7 @@ class SigmoidLossLayer(Layer):
}
}
def generate_fluid(self, param):
def generate(self, param):
"""R
"""
input_layer = param['layer'][self._input[0]]
......
......@@ -12,14 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
import abc
import os
import time
import sys
import yaml
from paddle import fluid
from paddlerec.core.utils import envs
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
trainer implement.
↗ (single/cluster) CtrTrainer
Trainer
↗ (for single training) SingleTrainer/TDMSingleTrainer
↘ TranspilerTrainer → (for cluster training) ClusterTrainer/TDMClusterTrainer
↘ (for online learning training) OnlineLearningTrainer
"""
......@@ -25,7 +25,6 @@ import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory
from paddle.fluid.incubate.fleet.base.role_maker import PaddleCloudRoleMaker
from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker
from paddlerec.core.utils import envs
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
......@@ -83,7 +82,7 @@ class ClusterTrainer(TranspileTrainer):
strategy = self.build_strategy()
optimizer = fleet.distributed_optimizer(optimizer, strategy)
optimizer.minimize(self.model.get_cost_op())
optimizer.minimize(self.model.get_avg_cost())
if fleet.is_server():
context['status'] = 'server_pass'
......@@ -115,7 +114,7 @@ class ClusterTrainer(TranspileTrainer):
program = fluid.compiler.CompiledProgram(
fleet.main_program).with_data_parallel(
loss_name=self.model.get_cost_op().name,
loss_name=self.model.get_avg_cost().name,
build_strategy=self.strategy.get_build_strategy(),
exec_strategy=self.strategy.get_execute_strategy())
......
......@@ -11,9 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
from paddle.fluid.incubate.fleet.base.role_maker import MPISymetricRoleMaker
......@@ -22,7 +23,7 @@ from paddlerec.core.utils import envs
from paddlerec.core.trainer import Trainer
class CtrPaddleTrainer(Trainer):
class CtrTrainer(Trainer):
"""R
"""
......@@ -87,7 +88,7 @@ class CtrPaddleTrainer(Trainer):
optimizer = self.model.optimizer()
optimizer = fleet.distributed_optimizer(optimizer, strategy={"use_cvm": False})
optimizer.minimize(self.model.get_cost_op())
optimizer.minimize(self.model.get_avg_cost())
if fleet.is_server():
context['status'] = 'server_pass'
......
......@@ -13,12 +13,12 @@
# limitations under the License.
import datetime
import json
import sys
import time
import json
import datetime
import numpy as np
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.pslib import fleet
from paddle.fluid.incubate.fleet.base.role_maker import GeneralRoleMaker
......@@ -72,7 +72,7 @@ def worker_numric_max(value, env="mpi"):
return wroker_numric_opt(value, env, "max")
class CtrPaddleTrainer(Trainer):
class CtrTrainer(Trainer):
"""R
"""
......@@ -129,7 +129,7 @@ class CtrPaddleTrainer(Trainer):
model = self._exector_context[executor['name']]['model']
self._metrics.update(model.get_metrics())
runnnable_scope.append(scope)
runnnable_cost_op.append(model.get_cost_op())
runnnable_cost_op.append(model.get_avg_cost())
for var in model._data_var:
if var.name in data_var_name_dict:
continue
......@@ -146,7 +146,7 @@ class CtrPaddleTrainer(Trainer):
model = self._exector_context[executor['name']]['model']
program = model._build_param['model']['train_program']
if not executor['is_update_sparse']:
program._fleet_opt["program_configs"][str(id(model.get_cost_op().block.program))]["push_sparse"] = []
program._fleet_opt["program_configs"][str(id(model.get_avg_cost().block.program))]["push_sparse"] = []
if 'train_thread_num' not in executor:
executor['train_thread_num'] = self.global_config['train_thread_num']
with fluid.scope_guard(scope):
......
......@@ -18,9 +18,9 @@ Training use fluid with one node only.
from __future__ import print_function
import datetime
import os
import time
import datetime
import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
......@@ -31,7 +31,7 @@ from paddlerec.core.utils import envs
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
class ClusterTrainer(TranspileTrainer):
class OnlineLearningTrainer(TranspileTrainer):
def processor_register(self):
role = PaddleCloudRoleMaker()
fleet.init(role)
......@@ -78,7 +78,7 @@ class ClusterTrainer(TranspileTrainer):
optimizer = self.model.optimizer()
strategy = self.build_strategy()
optimizer = fleet.distributed_optimizer(optimizer, strategy)
optimizer.minimize(self.model.get_cost_op())
optimizer.minimize(self.model.get_avg_cost())
if fleet.is_server():
context['status'] = 'server_pass'
......
......@@ -17,14 +17,14 @@ Training use fluid with one node only.
"""
from __future__ import print_function
import logging
import time
import logging
import paddle.fluid as fluid
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
from paddlerec.core.utils import envs
import numpy as np
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("fluid")
......@@ -36,7 +36,8 @@ class SingleTrainer(TranspileTrainer):
self.regist_context_processor('uninit', self.instance)
self.regist_context_processor('init_pass', self.init)
self.regist_context_processor('startup_pass', self.startup)
if envs.get_platform() == "LINUX" and envs.get_global_env("dataset_class", None, "train.reader") != "DataLoader":
if envs.get_platform() == "LINUX" and envs.get_global_env("dataset_class", None,
"train.reader") != "DataLoader":
self.regist_context_processor('train_pass', self.dataset_train)
else:
self.regist_context_processor('train_pass', self.dataloader_train)
......@@ -47,7 +48,7 @@ class SingleTrainer(TranspileTrainer):
def init(self, context):
self.model.train_net()
optimizer = self.model.optimizer()
optimizer.minimize((self.model.get_cost_op()))
optimizer.minimize((self.model.get_avg_cost()))
self.fetch_vars = []
self.fetch_alias = []
......@@ -74,7 +75,7 @@ class SingleTrainer(TranspileTrainer):
program = fluid.compiler.CompiledProgram(
fluid.default_main_program()).with_data_parallel(
loss_name=self.model.get_cost_op().name)
loss_name=self.model.get_avg_cost().name)
metrics_varnames = []
metrics_format = []
......@@ -122,8 +123,8 @@ class SingleTrainer(TranspileTrainer):
fetch_info=self.fetch_alias,
print_period=self.fetch_period)
end_time = time.time()
times = end_time-begin_time
print("epoch {} using time {}, speed {:.2f} lines/s".format(i, times, ins/times))
times = end_time - begin_time
print("epoch {} using time {}, speed {:.2f} lines/s".format(i, times, ins / times))
self.save(i, "train", is_fleet=False)
context['status'] = 'infer_pass'
......
......@@ -17,17 +17,16 @@ Training use fluid with one node only.
"""
from __future__ import print_function
import logging
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import fleet
from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler.distributed_strategy import StrategyFactory
from paddle.fluid.incubate.fleet.base.role_maker import PaddleCloudRoleMaker
from paddlerec.core.utils import envs
from paddlerec.core.trainers.cluster_trainer import ClusterTrainer
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO)
......
......@@ -18,12 +18,11 @@ Training use fluid with one node only.
from __future__ import print_function
import logging
import paddle.fluid as fluid
from paddlerec.core.trainers.transpiler_trainer import TranspileTrainer
import numpy as np
import paddle.fluid as fluid
from paddlerec.core.trainers.single_trainer import SingleTrainer
from paddlerec.core.utils import envs
import numpy as np
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("fluid")
......
......@@ -147,8 +147,8 @@ class TranspileTrainer(Trainer):
if not need_save(epoch_id, save_interval, False):
return
# print("save inference model is not supported now.")
# return
# print("save inference model is not supported now.")
# return
feed_varnames = envs.get_global_env(
"save.inference.feed_varnames", None, namespace)
......@@ -248,7 +248,7 @@ class TranspileTrainer(Trainer):
'evaluate_model_path', "", namespace='evaluate'))]
is_return_numpy = envs.get_global_env(
'is_return_numpy', True, namespace='evaluate')
'is_return_numpy', True, namespace='evaluate')
for (epoch, model_dir) in model_list:
print("Begin to infer No.{} model, model_dir: {}".format(
......
......@@ -14,7 +14,6 @@
from __future__ import print_function
import os
import sys
from paddlerec.core.utils.envs import lazy_instance_by_fliename
from paddlerec.core.utils.envs import get_global_env
......
......@@ -13,8 +13,8 @@
# limitations under the License.
import abc
import time
import datetime
import time
import paddle.fluid as fluid
......@@ -22,7 +22,7 @@ from paddlerec.core.utils import fs as fs
from paddlerec.core.utils import util as util
class Dataset(object):
class DatasetHolder(object):
"""
Dataset Base
"""
......@@ -62,7 +62,7 @@ class Dataset(object):
pass
class TimeSplitDataset(Dataset):
class TimeSplitDatasetHolder(DatasetHolder):
"""
Dataset with time split dir. root_path/$DAY/$HOUR
"""
......@@ -142,16 +142,6 @@ class TimeSplitDataset(Dataset):
data_time = data_time + datetime.timedelta(minutes=self._split_interval)
return data_file_list
class FluidTimeSplitDataset(TimeSplitDataset):
"""
A Dataset with time split for PaddleFluid
"""
def __init__(self, config):
""" """
TimeSplitDataset.__init__(self, config)
def _alloc_dataset(self, file_list):
""" """
dataset = fluid.DatasetFactory().create_dataset(self._config['dataset_type'])
......
......@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import sys
......
......@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from contextlib import closing
import copy
import sys
import os
import socket
from contextlib import closing
import sys
global_envs = {}
......
......@@ -13,6 +13,7 @@
# limitations under the License.
import os
from paddle.fluid.incubate.fleet.utils.hdfs import HDFSClient
......@@ -28,12 +29,12 @@ class LocalFSClient(object):
"""
Util for local disk file_system io
"""
def __init__(self):
"""R
"""
pass
def write(self, content, path, mode):
"""
write to file
......@@ -43,7 +44,7 @@ class LocalFSClient(object):
mode(string): w/a w:clear_write a:append_write
"""
temp_dir = os.path.dirname(path)
if not os.path.exists(temp_dir):
if not os.path.exists(temp_dir):
os.makedirs(temp_dir)
f = open(path, mode)
f.write(content)
......@@ -75,7 +76,7 @@ class LocalFSClient(object):
"""R
"""
os.system("rm -rf " + path)
def is_exist(self, path):
"""R
"""
......@@ -94,13 +95,14 @@ class FileHandler(object):
"""
A Smart file handler. auto judge local/afs by path
"""
def __init__(self, config):
"""R
"""
if 'fs_name' in config:
hadoop_home="$HADOOP_HOME"
hadoop_home = "$HADOOP_HOME"
hdfs_configs = {
"hadoop.job.ugi": config['fs_ugi'],
"hadoop.job.ugi": config['fs_ugi'],
"fs.default.name": config['fs_name']
}
self._hdfs_client = HDFSClient(hadoop_home, hdfs_configs)
......@@ -131,7 +133,8 @@ class FileHandler(object):
if mode.find('a') >= 0:
org_content = self._hdfs_client.cat(dest_path)
content = content + org_content
self._local_fs_client.write(content, temp_local_file, mode) #fleet hdfs_client only support upload, so write tmp file
self._local_fs_client.write(content, temp_local_file,
mode) # fleet hdfs_client only support upload, so write tmp file
self._hdfs_client.delete(dest_path + ".tmp")
self._hdfs_client.upload(dest_path + ".tmp", temp_local_file)
self._hdfs_client.delete(dest_path + ".bak")
......@@ -139,7 +142,7 @@ class FileHandler(object):
self._hdfs_client.rename(dest_path + ".tmp", dest_path)
else:
self._local_fs_client.write(content, dest_path, mode)
def cat(self, path):
"""R
"""
......@@ -148,7 +151,7 @@ class FileHandler(object):
return hdfs_cat
else:
return self._local_fs_client.cat(path)
def ls(self, path):
"""R
"""
......@@ -160,7 +163,7 @@ class FileHandler(object):
files = self._local_fs_client.ls(path)
files = [path + '/' + fi for fi in files] # absulte path
return files
def cp(self, org_path, dest_path):
"""R
"""
......@@ -170,6 +173,6 @@ class FileHandler(object):
return self._local_fs_client.cp(org_path, dest_path)
if not org_is_afs and dest_is_afs:
return self._hdfs_client.upload(dest_path, org_path)
if org_is_afs and not dest_is_afs:
if org_is_afs and not dest_is_afs:
return self._hdfs_client.download(org_path, dest_path)
print("Not Suppor hdfs cp currently")
......@@ -12,16 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import yaml
class TableMeta(object):
"""
Simple ParamTable Meta, Contain table_id
"""
TableId = 1
@staticmethod
def alloc_new_table(table_id):
"""
......
......@@ -12,11 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import datetime
import os
import time
import datetime
from paddle import fluid
from paddlerec.core.utils import fs as fs
......
......@@ -153,7 +153,7 @@ class Model(object):
def infer_net(self):
pass
def get_cost_op(self):
def get_avg_cost(self):
return self._cost
```
......
......@@ -13,15 +13,9 @@
# limitations under the License.
import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
import paddle.fluid as fluid
import paddle.fluid.layers.nn as nn
import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.control_flow as cf
class Model(ModelBase):
def __init__(self, config):
......@@ -36,7 +30,7 @@ class Model(ModelBase):
def train_net(self):
""" network definition """
data = fluid.data(name="input", shape=[None, self.max_len], dtype='int64')
label = fluid.data(name="label", shape=[None, 1], dtype='int64')
seq_len = fluid.data(name="seq_len", shape=[None], dtype='int64')
......@@ -60,12 +54,12 @@ class Model(ModelBase):
prediction = fluid.layers.fc(input=[fc_1], size=self.class_dim, act="softmax")
cost = fluid.layers.cross_entropy(input=prediction, label=label)
avg_cost = fluid.layers.mean(x=cost)
acc = fluid.layers.accuracy(input=prediction, label=label)
acc = fluid.layers.accuracy(input=prediction, label=label)
self.cost = avg_cost
self._metrics["acc"] = acc
def get_cost_op(self):
def get_avg_cost(self):
return self.cost
def get_metrics(self):
......
......@@ -12,31 +12,22 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import sys
import collections
import os
import six
import time
import numpy as np
import paddle.fluid as fluid
import paddle
import csv
import io
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
pass
def _process_line(self, l):
def _process_line(self, l):
l = l.strip().split(" ")
data = l[0:10]
seq_len = l[10:11]
label = l[11:]
return data, label, seq_len
return data, label, seq_len
def generate_sample(self, line):
def data_iter():
......@@ -47,6 +38,7 @@ class TrainReader(Reader):
data = [int(i) for i in data]
label = [int(i) for i in label]
seq_len = [int(i) for i in seq_len]
print >>sys.stderr, str([('data', data), ('label', label), ('seq_len', seq_len)])
print >> sys.stderr, str([('data', data), ('label', label), ('seq_len', seq_len)])
yield [('data', data), ('label', label), ('seq_len', seq_len)]
return data_iter
......@@ -12,30 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
import paddle.fluid as fluid
import paddle.fluid.layers.nn as nn
import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.control_flow as cf
from paddlerec.core.model import Model as ModelBase
from paddlerec.core.utils import envs
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
self.cost = None
self.metrics = {}
self.vocab_text_size = 11447#envs.get_global_env("vocab_text_size", None, self._namespace)
self.vocab_tag_size = 4#envs.get_global_env("vocab_tag_size", None, self._namespace)
self.emb_dim = 10#envs.get_global_env("emb_dim", None, self._namespace)
self.hid_dim = 1000#envs.get_global_env("hid_dim", None, self._namespace)
self.win_size = 5#envs.get_global_env("win_size", None, self._namespace)
self.margin = 0.1#envs.get_global_env("margin", None, self._namespace)
self.neg_size = 3#envs.get_global_env("neg_size", None, self._namespace)
print self.emb_dim
self.vocab_text_size = envs.get_global_env("vocab_text_size", None, self._namespace)
self.vocab_tag_size = envs.get_global_env("vocab_tag_size", None, self._namespace)
self.emb_dim = envs.get_global_env("emb_dim", None, self._namespace)
self.hid_dim = envs.get_global_env("hid_dim", None, self._namespace)
self.win_size = envs.get_global_env("win_size", None, self._namespace)
self.margin = envs.get_global_env("margin", None, self._namespace)
self.neg_size = envs.get_global_env("neg_size", None, self._namespace)
def train_net(self):
""" network definition """
......@@ -92,18 +89,16 @@ class Model(ModelBase):
self.metrics["correct"] = correct
self.metrics["cos_pos"] = cos_pos
def get_cost_op(self):
def get_avg_cost(self):
return self.cost
def get_metrics(self):
return self.metrics
def optimizer(self):
learning_rate = 0.01#envs.get_global_env("hyper_parameters.base_lr", None, self._namespace)
learning_rate = envs.get_global_env("hyper_parameters.base_lr", None, self._namespace)
sgd_optimizer = fluid.optimizer.Adagrad(learning_rate=learning_rate)
#sgd_optimizer.minimize(avg_cost)
return sgd_optimizer
def infer_net(self, parameter_list):
self.train_net()
......@@ -12,26 +12,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import re
import sys
import collections
import os
import six
import time
import numpy as np
import paddle.fluid as fluid
import paddle
import csv
import io
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
pass
def _process_line(self, l):
def _process_line(self, l):
tag_size = 4
neg_size = 3
l = l.strip().split(",")
......@@ -54,10 +47,7 @@ class TrainReader(Reader):
neg_index = rand_i
neg_tag.append(neg_index)
sum_n += 1
# if n > 0 and len(text) > n:
# #yield None
# return None, None, None
return text, pos_tag, neg_tag
return text, pos_tag, neg_tag
def generate_sample(self, line):
def data_iter():
......@@ -66,4 +56,5 @@ class TrainReader(Reader):
yield None
return
yield [('text', text), ('pos_tag', pos_tag), ('neg_tag', neg_tag)]
return data_iter
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs
......@@ -25,11 +24,12 @@ class Model(ModelBase):
def input(self):
TRIGRAM_D = envs.get_global_env("hyper_parameters.TRIGRAM_D", None, self._namespace)
Neg = envs.get_global_env("hyper_parameters.NEG", None, self._namespace)
Neg = envs.get_global_env("hyper_parameters.NEG", None, self._namespace)
self.query = fluid.data(name="query", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_pos = fluid.data(name="doc_pos", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self.doc_negs = [fluid.data(name="doc_neg_" + str(i), shape=[-1, TRIGRAM_D], dtype="float32", lod_level=0) for i in range(Neg)]
self.doc_negs = [fluid.data(name="doc_neg_" + str(i), shape=[-1, TRIGRAM_D], dtype="float32", lod_level=0) for i
in range(Neg)]
self._data_var.append(self.query)
self._data_var.append(self.doc_pos)
for input in self.doc_negs:
......@@ -38,40 +38,40 @@ class Model(ModelBase):
if self._platform != "LINUX":
self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False)
def net(self, is_infer=False):
hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace)
hidden_layers = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace)
hidden_acts = envs.get_global_env("hyper_parameters.fc_acts", None, self._namespace)
def fc(data, hidden_layers, hidden_acts, names):
fc_inputs = [data]
for i in range(len(hidden_layers)):
xavier=fluid.initializer.Xavier(uniform=True, fan_in=fc_inputs[-1].shape[1], fan_out=hidden_layers[i])
out = fluid.layers.fc(input=fc_inputs[-1],
size=hidden_layers[i],
act=hidden_acts[i],
param_attr=xavier,
bias_attr=xavier,
name=names[i])
fc_inputs.append(out)
return fc_inputs[-1]
query_fc = fc(self.query, hidden_layers, hidden_acts, ['query_l1', 'query_l2', 'query_l3'])
doc_pos_fc = fc(self.doc_pos, hidden_layers, hidden_acts, ['doc_pos_l1', 'doc_pos_l2', 'doc_pos_l3'])
self.R_Q_D_p = fluid.layers.cos_sim(query_fc, doc_pos_fc)
for i in range(len(hidden_layers)):
xavier = fluid.initializer.Xavier(uniform=True, fan_in=fc_inputs[-1].shape[1], fan_out=hidden_layers[i])
out = fluid.layers.fc(input=fc_inputs[-1],
size=hidden_layers[i],
act=hidden_acts[i],
param_attr=xavier,
bias_attr=xavier,
name=names[i])
fc_inputs.append(out)
return fc_inputs[-1]
query_fc = fc(self.query, hidden_layers, hidden_acts, ['query_l1', 'query_l2', 'query_l3'])
doc_pos_fc = fc(self.doc_pos, hidden_layers, hidden_acts, ['doc_pos_l1', 'doc_pos_l2', 'doc_pos_l3'])
self.R_Q_D_p = fluid.layers.cos_sim(query_fc, doc_pos_fc)
if is_infer:
return
R_Q_D_ns = []
for i, doc_neg in enumerate(self.doc_negs):
doc_neg_fc_i = fc(doc_neg, hidden_layers, hidden_acts, ['doc_neg_l1_' + str(i), 'doc_neg_l2_' + str(i), 'doc_neg_l3_' + str(i)])
for i, doc_neg in enumerate(self.doc_negs):
doc_neg_fc_i = fc(doc_neg, hidden_layers, hidden_acts,
['doc_neg_l1_' + str(i), 'doc_neg_l2_' + str(i), 'doc_neg_l3_' + str(i)])
R_Q_D_ns.append(fluid.layers.cos_sim(query_fc, doc_neg_fc_i))
concat_Rs = fluid.layers.concat(input=[self.R_Q_D_p] + R_Q_D_ns, axis=-1)
prob = fluid.layers.softmax(concat_Rs, axis=1)
hit_prob = fluid.layers.slice(prob, axes=[0,1], starts=[0,0], ends=[4, 1])
prob = fluid.layers.softmax(concat_Rs, axis=1)
hit_prob = fluid.layers.slice(prob, axes=[0, 1], starts=[0, 0], ends=[4, 1])
loss = -fluid.layers.reduce_sum(fluid.layers.log(hit_prob))
self.avg_cost = fluid.layers.mean(x=loss)
......@@ -101,10 +101,10 @@ class Model(ModelBase):
self.doc_pos = fluid.data(name="doc_pos", shape=[-1, TRIGRAM_D], dtype='float32', lod_level=0)
self._infer_data_var = [self.query, self.doc_pos]
self._infer_data_loader = fluid.io.DataLoader.from_generator(
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
def infer_net(self):
self.infer_input()
self.infer_input()
self.net(is_infer=True)
self.infer_results()
self.infer_results()
......@@ -14,7 +14,6 @@
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class EvaluateReader(Reader):
......
......@@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader):
......@@ -37,7 +37,7 @@ class TrainReader(Reader):
neg_docs = []
for i in range(len(features) - 2):
feature_names.append('doc_neg_' + str(i))
neg_docs.append(map(float, features[i+2].split(',')))
neg_docs.append(map(float, features[i + 2].split(',')))
yield zip(feature_names, [query] + [pos_doc] + neg_docs)
......
#! /bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
echo "begin to prepare data"
......
......@@ -11,18 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import io
import copy
import random
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class EvaluateReader(Reader):
def init(self):
self.query_slots = envs.get_global_env("hyper_parameters.query_slots", None, "train.model")
self.title_slots = envs.get_global_env("hyper_parameters.title_slots", None, "train.model")
self.query_slots = envs.get_global_env("hyper_parameters.query_slots", None, "train.model")
self.title_slots = envs.get_global_env("hyper_parameters.title_slots", None, "train.model")
self.all_slots = []
for i in range(self.query_slots):
......@@ -52,6 +49,7 @@ class EvaluateReader(Reader):
if visit:
self._all_slots_dict[slot][0] = False
else:
output[index][1].append(padding)
output[index][1].append(padding)
yield output
return data_iter
......@@ -14,10 +14,12 @@
import random
class Dataset:
def __init__(self):
pass
class SyntheticDataset(Dataset):
def __init__(self, sparse_feature_dim, query_slot_num, title_slot_num, dataset_size=10000):
# ids are randomly generated
......@@ -39,7 +41,7 @@ class SyntheticDataset(Dataset):
for i in range(self.query_slot_num):
qslot = generate_ids(self.ids_per_slot,
self.sparse_feature_dim)
qslot = [str(fea) + ':' + str(i) for fea in qslot]
qslot = [str(fea) + ':' + str(i) for fea in qslot]
query_slots += qslot
for i in range(self.title_slot_num):
pt_slot = generate_ids(self.ids_per_slot,
......@@ -50,7 +52,8 @@ class SyntheticDataset(Dataset):
for i in range(self.title_slot_num):
nt_slot = generate_ids(self.ids_per_slot,
self.sparse_feature_dim)
nt_slot = [str(fea) + ':' + str(i + self.query_slot_num + self.title_slot_num) for fea in nt_slot]
nt_slot = [str(fea) + ':' + str(i + self.query_slot_num + self.title_slot_num) for fea in
nt_slot]
neg_title_slots += nt_slot
yield query_slots + pos_title_slots + neg_title_slots
else:
......@@ -67,6 +70,7 @@ class SyntheticDataset(Dataset):
def test(self):
return self._reader_creator(False)
if __name__ == '__main__':
sparse_feature_dim = 1000001
query_slots = 1
......@@ -75,7 +79,7 @@ if __name__ == '__main__':
dataset = SyntheticDataset(sparse_feature_dim, query_slots, title_slots, dataset_size)
train_reader = dataset.train()
test_reader = dataset.test()
with open("data/train/train.txt", 'w') as fout:
for data in train_reader():
fout.write(' '.join(data))
......
......@@ -12,16 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import math
import paddle.fluid as fluid
import paddle.fluid.layers as layers
import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.control_flow as cf
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
class BowEncoder(object):
""" bow-encoder """
......@@ -97,13 +95,14 @@ class SimpleEncoderFactory(object):
rnn_encode = GrnnEncoder(hidden_size=enc_hid_size)
return rnn_encode
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
self.init_config()
def init_config(self):
self._fetch_interval = 1
self._fetch_interval = 1
query_encoder = envs.get_global_env("hyper_parameters.query_encoder", None, self._namespace)
title_encoder = envs.get_global_env("hyper_parameters.title_encoder", None, self._namespace)
query_encode_dim = envs.get_global_env("hyper_parameters.query_encode_dim", None, self._namespace)
......@@ -115,19 +114,19 @@ class Model(ModelBase):
factory.create(query_encoder, query_encode_dim)
for i in range(query_slots)
]
self.title_encoders = [
self.title_encoders = [
factory.create(title_encoder, title_encode_dim)
for i in range(title_slots)
]
self.emb_size = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
self.emb_dim = envs.get_global_env("hyper_parameters.embedding_dim", None, self._namespace)
self.emb_shape = [self.emb_size, self.emb_dim]
self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, self._namespace)
self.margin = 0.1
self.emb_size = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
self.emb_dim = envs.get_global_env("hyper_parameters.embedding_dim", None, self._namespace)
self.emb_shape = [self.emb_size, self.emb_dim]
self.hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, self._namespace)
self.margin = 0.1
def input(self, is_train=True):
self.q_slots = [
self.q_slots = [
fluid.data(
name="%d" % i, shape=[None, 1], lod_level=1, dtype='int64')
for i in range(len(self.query_encoders))
......@@ -138,22 +137,23 @@ class Model(ModelBase):
for i in range(len(self.title_encoders))
]
if is_train == False:
return self.q_slots + self.pt_slots
if is_train == False:
return self.q_slots + self.pt_slots
self.nt_slots = [
fluid.data(
name="%d" % (i + len(self.query_encoders) + len(self.title_encoders)), shape=[None, 1], lod_level=1, dtype='int64')
name="%d" % (i + len(self.query_encoders) + len(self.title_encoders)), shape=[None, 1], lod_level=1,
dtype='int64')
for i in range(len(self.title_encoders))
]
return self.q_slots + self.pt_slots + self.nt_slots
def train_input(self):
res = self.input()
self._data_var = res
use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader", False, self._namespace)
use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader", False, self._namespace)
if self._platform != "LINUX" or use_dataloader:
self._data_loader = fluid.io.DataLoader.from_generator(
......@@ -161,15 +161,15 @@ class Model(ModelBase):
def get_acc(self, x, y):
less = tensor.cast(cf.less_than(x, y), dtype='float32')
label_ones = fluid.layers.fill_constant_batch_size_like(
label_ones = fluid.layers.fill_constant_batch_size_like(
input=x, dtype='float32', shape=[-1, 1], value=1.0)
correct = fluid.layers.reduce_sum(less)
total = fluid.layers.reduce_sum(label_ones)
total = fluid.layers.reduce_sum(label_ones)
acc = fluid.layers.elementwise_div(correct, total)
return acc
return acc
def net(self):
q_embs = [
q_embs = [
fluid.embedding(
input=query, size=self.emb_shape, param_attr="emb")
for query in self.q_slots
......@@ -184,8 +184,8 @@ class Model(ModelBase):
input=title, size=self.emb_shape, param_attr="emb")
for title in self.nt_slots
]
# encode each embedding field with encoder
# encode each embedding field with encoder
q_encodes = [
self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs)
]
......@@ -201,7 +201,7 @@ class Model(ModelBase):
pt_concat = fluid.layers.concat(pt_encodes)
nt_concat = fluid.layers.concat(nt_encodes)
# projection of hidden layer
# projection of hidden layer
q_hid = fluid.layers.fc(q_concat,
size=self.hidden_size,
param_attr='q_fc.w',
......@@ -219,7 +219,7 @@ class Model(ModelBase):
cos_pos = fluid.layers.cos_sim(q_hid, pt_hid)
cos_neg = fluid.layers.cos_sim(q_hid, nt_hid)
# pairwise hinge_loss
# pairwise hinge_loss
loss_part1 = fluid.layers.elementwise_sub(
tensor.fill_constant_batch_size_like(
input=cos_pos,
......@@ -236,7 +236,7 @@ class Model(ModelBase):
loss_part2)
self.avg_cost = fluid.layers.mean(loss_part3)
self.acc = self.get_acc(cos_neg, cos_pos)
self.acc = self.get_acc(cos_neg, cos_pos)
def avg_loss(self):
self._cost = self.avg_cost
......@@ -253,19 +253,19 @@ class Model(ModelBase):
def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.learning_rate", None, self._namespace)
optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
return optimizer
optimizer = fluid.optimizer.Adam(learning_rate=learning_rate)
return optimizer
def infer_input(self):
res = self.input(is_train=False)
self._infer_data_var = res
self._infer_data_var = res
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
def infer_net(self):
self.infer_input()
# lookup embedding for each slot
self.infer_input()
# lookup embedding for each slot
q_embs = [
fluid.embedding(
input=query, size=self.emb_shape, param_attr="emb")
......@@ -276,14 +276,14 @@ class Model(ModelBase):
input=title, size=self.emb_shape, param_attr="emb")
for title in self.pt_slots
]
# encode each embedding field with encoder
# encode each embedding field with encoder
q_encodes = [
self.query_encoders[i].forward(emb) for i, emb in enumerate(q_embs)
]
pt_encodes = [
self.title_encoders[i].forward(emb) for i, emb in enumerate(pt_embs)
]
# concat multi view for query, pos_title, neg_title
# concat multi view for query, pos_title, neg_title
q_concat = fluid.layers.concat(q_encodes)
pt_concat = fluid.layers.concat(pt_encodes)
# projection of hidden layer
......
......@@ -11,18 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import io
import copy
import random
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
self.query_slots = envs.get_global_env("hyper_parameters.query_slots", None, "train.model")
self.title_slots = envs.get_global_env("hyper_parameters.title_slots", None, "train.model")
self.query_slots = envs.get_global_env("hyper_parameters.query_slots", None, "train.model")
self.title_slots = envs.get_global_env("hyper_parameters.title_slots", None, "train.model")
self.all_slots = []
for i in range(self.query_slots):
......@@ -55,6 +52,7 @@ class TrainReader(Reader):
if visit:
self._all_slots_dict[slot][0] = False
else:
output[index][1].append(padding)
output[index][1].append(padding)
yield output
return data_iter
......@@ -13,19 +13,19 @@
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
from collections import defaultdict
import numpy as np
from paddlerec.core.reader import Reader
class EvaluateReader(Reader):
def init(self):
all_field_id = ['101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124', '125', '126', '127', '128', '129',
all_field_id = ['101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124', '125', '126', '127', '128',
'129',
'205', '206', '207', '210', '216', '508', '509', '702', '853', '301']
self.all_field_id_dict = defaultdict(int)
for i,field_id in enumerate(all_field_id):
self.all_field_id_dict[field_id] = [False,i]
for i, field_id in enumerate(all_field_id):
self.all_field_id_dict[field_id] = [False, i]
def generate_sample(self, line):
"""
......@@ -39,25 +39,26 @@ class EvaluateReader(Reader):
features = line.strip().split(',')
ctr = int(features[1])
cvr = int(features[2])
padding = 0
output = [(field_id,[]) for field_id in self.all_field_id_dict]
output = [(field_id, []) for field_id in self.all_field_id_dict]
for elem in features[4:]:
field_id,feat_id = elem.strip().split(':')
field_id, feat_id = elem.strip().split(':')
if field_id not in self.all_field_id_dict:
continue
self.all_field_id_dict[field_id][0] = True
index = self.all_field_id_dict[field_id][1]
output[index][1].append(int(feat_id))
output[index][1].append(int(feat_id))
for field_id in self.all_field_id_dict:
visited,index = self.all_field_id_dict[field_id]
visited, index = self.all_field_id_dict[field_id]
if visited:
self.all_field_id_dict[field_id][0] = False
else:
output[index][1].append(padding)
output[index][1].append(padding)
output.append(('ctr', [ctr]))
output.append(('cvr', [cvr]))
yield output
return reader
......@@ -11,21 +11,22 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
from collections import defaultdict
import numpy as np
from paddlerec.core.reader import Reader
class TrainReader(Reader):
def init(self):
all_field_id = ['101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124', '125', '126', '127', '128', '129',
all_field_id = ['101', '109_14', '110_14', '127_14', '150_14', '121', '122', '124', '125', '126', '127', '128',
'129',
'205', '206', '207', '210', '216', '508', '509', '702', '853', '301']
self.all_field_id_dict = defaultdict(int)
for i,field_id in enumerate(all_field_id):
self.all_field_id_dict[field_id] = [False,i]
for i, field_id in enumerate(all_field_id):
self.all_field_id_dict[field_id] = [False, i]
def generate_sample(self, line):
"""
......@@ -37,30 +38,31 @@ class TrainReader(Reader):
This function needs to be implemented by the user, based on data format
"""
features = line.strip().split(',')
#ctr = list(map(int, features[1]))
#cvr = list(map(int, features[2]))
# ctr = list(map(int, features[1]))
# cvr = list(map(int, features[2]))
ctr = int(features[1])
cvr = int(features[2])
padding = 0
output = [(field_id,[]) for field_id in self.all_field_id_dict]
output = [(field_id, []) for field_id in self.all_field_id_dict]
for elem in features[4:]:
field_id,feat_id = elem.strip().split(':')
field_id, feat_id = elem.strip().split(':')
if field_id not in self.all_field_id_dict:
continue
self.all_field_id_dict[field_id][0] = True
index = self.all_field_id_dict[field_id][1]
#feat_id = list(map(int, feat_id))
output[index][1].append(int(feat_id))
# feat_id = list(map(int, feat_id))
output[index][1].append(int(feat_id))
for field_id in self.all_field_id_dict:
visited,index = self.all_field_id_dict[field_id]
visited, index = self.all_field_id_dict[field_id]
if visited:
self.all_field_id_dict[field_id][0] = False
else:
output[index][1].append(padding)
output[index][1].append(padding)
output.append(('ctr', [ctr]))
output.append(('cvr', [cvr]))
yield output
return reader
......@@ -12,83 +12,84 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import numpy as np
import paddle.fluid as fluid
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
import numpy as np
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
def fc(self,tag, data, out_dim, active='prelu'):
def fc(self, tag, data, out_dim, active='prelu'):
init_stddev = 1.0
scales = 1.0 / np.sqrt(data.shape[1])
scales = 1.0 / np.sqrt(data.shape[1])
p_attr = fluid.param_attr.ParamAttr(name='%s_weight' % tag,
initializer=fluid.initializer.NormalInitializer(loc=0.0, scale=init_stddev * scales))
initializer=fluid.initializer.NormalInitializer(loc=0.0,
scale=init_stddev * scales))
b_attr = fluid.ParamAttr(name='%s_bias' % tag, initializer=fluid.initializer.Constant(0.1))
out = fluid.layers.fc(input=data,
size=out_dim,
act=active,
param_attr=p_attr,
bias_attr =b_attr,
name=tag)
size=out_dim,
act=active,
param_attr=p_attr,
bias_attr=b_attr,
name=tag)
return out
def input_data(self):
sparse_input_ids = [
fluid.data(name="field_" + str(i), shape=[-1, 1], dtype="int64", lod_level=1) for i in range(0,23)
fluid.data(name="field_" + str(i), shape=[-1, 1], dtype="int64", lod_level=1) for i in range(0, 23)
]
label_ctr = fluid.data(name="ctr", shape=[-1, 1], dtype="int64")
label_cvr = fluid.data(name="cvr", shape=[-1, 1], dtype="int64")
inputs = sparse_input_ids + [label_ctr] + [label_cvr]
self._data_var.extend(inputs)
return inputs
def net(self, inputs, is_infer=False):
vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace)
embed_size = envs.get_global_env("hyper_parameters.embed_size", None, self._namespace)
emb = []
for data in inputs[0:-2]:
feat_emb = fluid.embedding(input=data,
size=[vocab_size, embed_size],
param_attr=fluid.ParamAttr(name='dis_emb',
learning_rate=5,
initializer=fluid.initializer.Xavier(fan_in=embed_size,fan_out=embed_size)
),
is_sparse=True)
field_emb = fluid.layers.sequence_pool(input=feat_emb,pool_type='sum')
size=[vocab_size, embed_size],
param_attr=fluid.ParamAttr(name='dis_emb',
learning_rate=5,
initializer=fluid.initializer.Xavier(
fan_in=embed_size, fan_out=embed_size)
),
is_sparse=True)
field_emb = fluid.layers.sequence_pool(input=feat_emb, pool_type='sum')
emb.append(field_emb)
concat_emb = fluid.layers.concat(emb, axis=1)
# ctr
active = 'relu'
ctr_fc1 = self.fc('ctr_fc1', concat_emb, 200, active)
ctr_fc2 = self.fc('ctr_fc2', ctr_fc1, 80, active)
ctr_out = self.fc('ctr_out', ctr_fc2, 2, 'softmax')
# cvr
cvr_fc1 = self.fc('cvr_fc1', concat_emb, 200, active)
cvr_fc2 = self.fc('cvr_fc2', cvr_fc1, 80, active)
cvr_out = self.fc('cvr_out', cvr_fc2, 2,'softmax')
cvr_out = self.fc('cvr_out', cvr_fc2, 2, 'softmax')
ctr_clk = inputs[-2]
ctcvr_buy = inputs[-1]
ctr_prop_one = fluid.layers.slice(ctr_out, axes=[1], starts=[1], ends=[2])
cvr_prop_one = fluid.layers.slice(cvr_out, axes=[1], starts=[1], ends=[2])
ctcvr_prop_one = fluid.layers.elementwise_mul(ctr_prop_one, cvr_prop_one)
ctcvr_prop = fluid.layers.concat(input=[1-ctcvr_prop_one,ctcvr_prop_one], axis = 1)
ctcvr_prop = fluid.layers.concat(input=[1 - ctcvr_prop_one, ctcvr_prop_one], axis=1)
auc_ctr, batch_auc_ctr, auc_states_ctr = fluid.layers.auc(input=ctr_out, label=ctr_clk)
auc_ctcvr, batch_auc_ctcvr, auc_states_ctcvr = fluid.layers.auc(input=ctcvr_prop, label=ctcvr_buy)
......@@ -98,27 +99,23 @@ class Model(ModelBase):
self._infer_results["AUC_ctcvr"] = auc_ctcvr
return
loss_ctr = fluid.layers.cross_entropy(input=ctr_out, label=ctr_clk)
loss_ctcvr = fluid.layers.cross_entropy(input=ctcvr_prop, label=ctcvr_buy)
cost = loss_ctr + loss_ctcvr
avg_cost = fluid.layers.mean(cost)
self._cost = avg_cost
self._metrics["AUC_ctr"] = auc_ctr
self._metrics["BATCH_AUC_ctr"] = batch_auc_ctr
self._metrics["AUC_ctcvr"] = auc_ctcvr
self._metrics["BATCH_AUC_ctcvr"] = batch_auc_ctcvr
def train_net(self):
input_data = self.input_data()
self.net(input_data)
def infer_net(self):
self._infer_data_var = self.input_data()
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
self.net(self._infer_data_var, is_infer=True)
......@@ -11,11 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
class EvaluateReader(Reader):
......
......@@ -11,11 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
class TrainReader(Reader):
......@@ -44,8 +43,8 @@ class TrainReader(Reader):
label_marital = [1, 0]
elif int(l[0]) == 1:
label_marital = [0, 1]
#label_income = np.array(label_income)
#label_marital = np.array(label_marital)
# label_income = np.array(label_income)
# label_marital = np.array(label_marital)
feature_name = ["input", "label_income", "label_marital"]
yield zip(feature_name, [data] + [label_income] + [label_marital])
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs
......@@ -37,22 +36,21 @@ class Model(ModelBase):
if is_infer:
self._infer_data_var = [input_data, label_income, label_marital]
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
self._data_var.extend([input_data, label_income, label_marital])
# f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper
expert_outputs = []
for i in range(0, expert_num):
expert_output = fluid.layers.fc(input=input_data,
size=expert_size,
act='relu',
bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='expert_' + str(i))
size=expert_size,
act='relu',
bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='expert_' + str(i))
expert_outputs.append(expert_output)
expert_concat = fluid.layers.concat(expert_outputs, axis=1)
expert_concat = fluid.layers.reshape(expert_concat,[-1, expert_num, expert_size])
expert_concat = fluid.layers.reshape(expert_concat, [-1, expert_num, expert_size])
# g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper
output_layers = []
for i in range(0, gate_num):
......@@ -62,52 +60,53 @@ class Model(ModelBase):
bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='gate_' + str(i))
# f^{k}(x) = sum_{i=1}^{n}(g^{k}(x)_{i} * f_{i}(x))
cur_gate_expert = fluid.layers.elementwise_mul(expert_concat, cur_gate, axis=0)
cur_gate_expert = fluid.layers.elementwise_mul(expert_concat, cur_gate, axis=0)
cur_gate_expert = fluid.layers.reduce_sum(cur_gate_expert, dim=1)
# Build tower layer
cur_tower = fluid.layers.fc(input=cur_gate_expert,
size=tower_size,
act='relu',
name='task_layer_' + str(i))
out = fluid.layers.fc(input=cur_tower,
size=2,
act='softmax',
name='out_' + str(i))
cur_tower = fluid.layers.fc(input=cur_gate_expert,
size=tower_size,
act='relu',
name='task_layer_' + str(i))
out = fluid.layers.fc(input=cur_tower,
size=2,
act='softmax',
name='out_' + str(i))
output_layers.append(out)
pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15)
pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15)
label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2])
label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2])
auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income, label=fluid.layers.cast(x=label_income_1, dtype='int64'))
auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital, label=fluid.layers.cast(x=label_marital_1, dtype='int64'))
auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income,
label=fluid.layers.cast(x=label_income_1,
dtype='int64'))
auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital,
label=fluid.layers.cast(x=label_marital_1,
dtype='int64'))
if is_infer:
self._infer_results["AUC_income"] = auc_income
self._infer_results["AUC_marital"] = auc_marital
return
cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True)
cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True)
cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income, soft_label=True)
cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital, soft_label=True)
avg_cost_income = fluid.layers.mean(x=cost_income)
avg_cost_marital = fluid.layers.mean(x=cost_marital)
cost = avg_cost_income + avg_cost_marital
cost = avg_cost_income + avg_cost_marital
self._cost = cost
self._metrics["AUC_income"] = auc_income
self._metrics["BATCH_AUC_income"] = batch_auc_1
self._metrics["AUC_marital"] = auc_marital
self._metrics["BATCH_AUC_marital"] = batch_auc_2
def train_net(self):
self.MMOE()
def infer_net(self):
self.MMOE(is_infer=True)
......@@ -11,11 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
class EvaluateReader(Reader):
......
......@@ -11,11 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
class TrainReader(Reader):
......@@ -44,8 +43,8 @@ class TrainReader(Reader):
label_marital = [1, 0]
elif int(l[0]) == 1:
label_marital = [0, 1]
#label_income = np.array(label_income)
#label_marital = np.array(label_marital)
# label_income = np.array(label_income)
# label_marital = np.array(label_marital)
feature_name = ["input", "label_income", "label_marital"]
yield zip(feature_name, [data] + [label_income] + [label_marital])
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs
......@@ -33,65 +32,65 @@ class Model(ModelBase):
input_data = fluid.data(name="input", shape=[-1, feature_size], dtype="float32")
label_income = fluid.data(name="label_income", shape=[-1, 2], dtype="float32", lod_level=0)
label_marital = fluid.data(name="label_marital", shape=[-1, 2], dtype="float32", lod_level=0)
if is_infer:
self._infer_data_var = [input_data, label_income, label_marital]
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
self._data_var.extend([input_data, label_income, label_marital])
bottom_output = fluid.layers.fc(input=input_data,
size=bottom_size,
act='relu',
bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='bottom_output')
size=bottom_size,
act='relu',
bias_attr=fluid.ParamAttr(learning_rate=1.0),
name='bottom_output')
# Build tower layer from bottom layer
output_layers = []
for index in range(tower_nums):
for index in range(tower_nums):
tower_layer = fluid.layers.fc(input=bottom_output,
size=tower_size,
act='relu',
name='task_layer_' + str(index))
size=tower_size,
act='relu',
name='task_layer_' + str(index))
output_layer = fluid.layers.fc(input=tower_layer,
size=2,
act='softmax',
name='output_layer_' + str(index))
size=2,
act='softmax',
name='output_layer_' + str(index))
output_layers.append(output_layer)
pred_income = fluid.layers.clip(output_layers[0], min=1e-15, max=1.0 - 1e-15)
pred_marital = fluid.layers.clip(output_layers[1], min=1e-15, max=1.0 - 1e-15)
label_income_1 = fluid.layers.slice(label_income, axes=[1], starts=[1], ends=[2])
label_marital_1 = fluid.layers.slice(label_marital, axes=[1], starts=[1], ends=[2])
auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income, label=fluid.layers.cast(x=label_income_1, dtype='int64'))
auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital, label=fluid.layers.cast(x=label_marital_1, dtype='int64'))
auc_income, batch_auc_1, auc_states_1 = fluid.layers.auc(input=pred_income,
label=fluid.layers.cast(x=label_income_1,
dtype='int64'))
auc_marital, batch_auc_2, auc_states_2 = fluid.layers.auc(input=pred_marital,
label=fluid.layers.cast(x=label_marital_1,
dtype='int64'))
if is_infer:
self._infer_results["AUC_income"] = auc_income
self._infer_results["AUC_marital"] = auc_marital
return
cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income,soft_label = True)
cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital,soft_label = True)
cost_income = fluid.layers.cross_entropy(input=pred_income, label=label_income, soft_label=True)
cost_marital = fluid.layers.cross_entropy(input=pred_marital, label=label_marital, soft_label=True)
cost = fluid.layers.elementwise_add(cost_income, cost_marital, axis=1)
avg_cost = fluid.layers.mean(x=cost)
avg_cost = fluid.layers.mean(x=cost)
self._cost = avg_cost
self._metrics["AUC_income"] = auc_income
self._metrics["BATCH_AUC_income"] = batch_auc_1
self._metrics["AUC_marital"] = auc_marital
self._metrics["BATCH_AUC_marital"] = batch_auc_2
def train_net(self):
self.model()
def infer_net(self):
self.model(is_infer=True)
......@@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
......
......@@ -11,18 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import math
import sys
import os
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try:
import cPickle as pickle
except ImportError:
import pickle
from collections import Counter
import os
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
......@@ -45,7 +46,7 @@ class TrainReader(Reader):
self.label_feat_names = target + dense_feat_names + sparse_feat_names
self.cat_feat_idx_dict_list = [{} for _ in range(26)]
# TODO: set vocabulary dictionary
vocab_dir = envs.get_global_env("feat_dict_name", None, "train.reader")
for i in range(26):
......@@ -53,7 +54,7 @@ class TrainReader(Reader):
for line in open(
os.path.join(vocab_dir, 'C' + str(i + 1) + '.txt')):
self.cat_feat_idx_dict_list[i][line.strip()] = lookup_idx
lookup_idx += 1
lookup_idx += 1
def _process_line(self, line):
features = line.rstrip('\n').split('\t')
......@@ -71,20 +72,21 @@ class TrainReader(Reader):
if idx == 2 else math.log(1 + float(features[idx])))
for idx in self.cat_idx_:
if features[idx] == '' or features[
idx] not in self.cat_feat_idx_dict_list[idx - 14]:
idx] not in self.cat_feat_idx_dict_list[idx - 14]:
label_feat_list[idx].append(0)
else:
label_feat_list[idx].append(self.cat_feat_idx_dict_list[
idx - 14][features[idx]])
idx - 14][features[idx]])
label_feat_list[0].append(int(features[0]))
return label_feat_list
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def data_iter():
label_feat_list = self._process_line(line)
yield list(zip(self.label_feat_names, label_feat_list))
return data_iter
\ No newline at end of file
return data_iter
......@@ -12,17 +12,18 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from collections import OrderedDict
import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
from collections import OrderedDict
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
def init_network(self):
self.cross_num = envs.get_global_env("hyper_parameters.cross_num", None, self._namespace)
self.dnn_hidden_units = envs.get_global_env("hyper_parameters.dnn_hidden_units", None, self._namespace)
......@@ -49,7 +50,7 @@ class Model(ModelBase):
self.net_input = None
self.loss = None
def _create_embedding_input(self, data_dict):
# sparse embedding
sparse_emb_dict = OrderedDict((name, fluid.embedding(
......@@ -77,7 +78,7 @@ class Model(ModelBase):
net_input = fluid.layers.concat([dense_input, sparse_input], axis=-1)
return net_input
def _deep_net(self, input, hidden_units, use_bn=False, is_test=False):
for units in hidden_units:
input = fluid.layers.fc(input=input, size=units)
......@@ -94,7 +95,7 @@ class Model(ModelBase):
[input_dim], dtype='float32', name=prefix + "_b")
xw = fluid.layers.reduce_sum(x * w, dim=1, keep_dim=True) # (N, 1)
return x0 * xw + b + x, w
def _cross_net(self, input, num_corss_layers):
x = x0 = input
l2_reg_cross_list = []
......@@ -105,10 +106,10 @@ class Model(ModelBase):
fluid.layers.concat(
l2_reg_cross_list, axis=-1))
return x, l2_reg_cross_loss
def _l2_loss(self, w):
return fluid.layers.reduce_sum(fluid.layers.square(w))
def train_net(self):
self.init_network()
self.target_input = fluid.data(
......@@ -117,14 +118,14 @@ class Model(ModelBase):
for feat_name in self.feat_dims_dict:
data_dict[feat_name] = fluid.data(
name=feat_name, shape=[None, 1], dtype='float32')
self.net_input = self._create_embedding_input(data_dict)
deep_out = self._deep_net(self.net_input, self.dnn_hidden_units, self.dnn_use_bn, False)
cross_out, l2_reg_cross_loss = self._cross_net(self.net_input,
self.cross_num)
self.cross_num)
last_out = fluid.layers.concat([deep_out, cross_out], axis=-1)
logit = fluid.layers.fc(last_out, 1)
......@@ -140,7 +141,6 @@ class Model(ModelBase):
input=prob_2d, label=label_int, slide_steps=0)
self._metrics["AUC"] = auc_var
self._metrics["BATCH_AUC"] = batch_auc_var
# logloss
logloss = fluid.layers.log_loss(self.prob, self.target_input)
......
......@@ -11,15 +11,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try:
import cPickle as pickle
except ImportError:
import pickle
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
......@@ -35,7 +38,7 @@ class TrainReader(Reader):
self.categorical_range_ = range(14, 40)
# load preprocessed feature dict
self.feat_dict_name = envs.get_global_env("feat_dict_name", None, "train.reader")
self.feat_dict_ = pickle.load(open(self.feat_dict_name, 'rb'))
self.feat_dict_ = pickle.load(open(self.feat_dict_name, 'rb'))
def _process_line(self, line):
features = line.rstrip('\n').split('\t')
......@@ -59,13 +62,14 @@ class TrainReader(Reader):
feat_value.append(1.0)
label = [int(features[0])]
return feat_idx, feat_value, label
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def data_iter():
feat_idx, feat_value, label = self._process_line(line)
yield [('feat_idx', feat_idx), ('feat_value', feat_value), ('label', label)]
return data_iter
\ No newline at end of file
return data_iter
......@@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
......@@ -28,26 +29,27 @@ class Model(ModelBase):
is_distributed = True if envs.get_trainer() == "CtrTrainer" else False
sparse_feature_number = envs.get_global_env("hyper_parameters.sparse_feature_number", None, self._namespace)
sparse_feature_dim = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
# ------------------------- network input --------------------------
num_field = envs.get_global_env("hyper_parameters.num_field", None, self._namespace)
raw_feat_idx = fluid.data(name='feat_idx', shape=[None, num_field], dtype='int64') # None * num_field(defalut:39)
raw_feat_value = fluid.data(name='feat_value', shape=[None, num_field], dtype='float32') # None * num_field
raw_feat_idx = fluid.data(name='feat_idx', shape=[None, num_field],
dtype='int64') # None * num_field(defalut:39)
raw_feat_value = fluid.data(name='feat_value', shape=[None, num_field], dtype='float32') # None * num_field
self.label = fluid.data(name='label', shape=[None, 1], dtype='float32') # None * 1
feat_idx = fluid.layers.reshape(raw_feat_idx,[-1, 1]) # (None * num_field) * 1
feat_idx = fluid.layers.reshape(raw_feat_idx, [-1, 1]) # (None * num_field) * 1
feat_value = fluid.layers.reshape(raw_feat_value, [-1, num_field, 1]) # None * num_field * 1
# ------------------------- set _data_var --------------------------
self._data_var.append(raw_feat_idx)
self._data_var.append(raw_feat_value)
self._data_var.append(self.label)
if self._platform != "LINUX":
self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False)
#------------------------- first order term --------------------------
# ------------------------- first order term --------------------------
reg = envs.get_global_env("hyper_parameters.reg", 1e-4, self._namespace)
first_weights_re = fluid.embedding(
......@@ -65,7 +67,7 @@ class Model(ModelBase):
first_weights_re, shape=[-1, num_field, 1]) # None * num_field * 1
y_first_order = fluid.layers.reduce_sum((first_weights * feat_value), 1)
#------------------------- second order term --------------------------
# ------------------------- second order term --------------------------
feat_embeddings_re = fluid.embedding(
input=feat_idx,
......@@ -80,12 +82,12 @@ class Model(ModelBase):
feat_embeddings = fluid.layers.reshape(
feat_embeddings_re,
shape=[-1, num_field,
sparse_feature_dim]) # None * num_field * embedding_size
sparse_feature_dim]) # None * num_field * embedding_size
feat_embeddings = feat_embeddings * feat_value # None * num_field * embedding_size
# sum_square part
summed_features_emb = fluid.layers.reduce_sum(feat_embeddings,
1) # None * embedding_size
1) # None * embedding_size
summed_features_emb_square = fluid.layers.square(
summed_features_emb) # None * embedding_size
......@@ -99,13 +101,12 @@ class Model(ModelBase):
summed_features_emb_square - squared_sum_features_emb, 1,
keep_dim=True) # None * 1
#------------------------- DNN --------------------------
# ------------------------- DNN --------------------------
layer_sizes = envs.get_global_env("hyper_parameters.fc_sizes", None, self._namespace)
act = envs.get_global_env("hyper_parameters.act", None, self._namespace)
y_dnn = fluid.layers.reshape(feat_embeddings,
[-1, num_field * sparse_feature_dim])
[-1, num_field * sparse_feature_dim])
for s in layer_sizes:
y_dnn = fluid.layers.fc(
input=y_dnn,
......@@ -127,28 +128,28 @@ class Model(ModelBase):
bias_attr=fluid.ParamAttr(
initializer=fluid.initializer.TruncatedNormalInitializer(
loc=0.0, scale=init_value_)))
#------------------------- DeepFM --------------------------
# ------------------------- DeepFM --------------------------
self.predict = fluid.layers.sigmoid(y_first_order + y_second_order + y_dnn)
def train_net(self):
self.deepfm_net()
#------------------------- Cost(logloss) --------------------------
# ------------------------- Cost(logloss) --------------------------
cost = fluid.layers.log_loss(input=self.predict, label=self.label)
avg_cost = fluid.layers.reduce_sum(cost)
self._cost = avg_cost
#------------------------- Metric(Auc) --------------------------
# ------------------------- Metric(Auc) --------------------------
predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1)
label_int = fluid.layers.cast(self.label, 'int64')
auc_var, batch_auc_var, _ = fluid.layers.auc(input=predict_2d,
label=label_int,
slide_steps=0)
label=label_int,
slide_steps=0)
self._metrics["AUC"] = auc_var
self._metrics["BATCH_AUC"] = batch_auc_var
......@@ -158,4 +159,4 @@ class Model(ModelBase):
return optimizer
def infer_net(self, parameter_list):
self.deepfm_net()
\ No newline at end of file
self.deepfm_net()
......@@ -13,7 +13,6 @@
# limitations under the License.
import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
......
......@@ -13,24 +13,28 @@
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import numpy as np
import os
import random
try:
import cPickle as pickle
except ImportError:
import pickle
import numpy as np
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
self.train_data_path = envs.get_global_env("train_data_path", None, "train.reader")
self.res = []
self.max_len = 0
data_file_list = os.listdir(self.train_data_path)
for i in range(0, len(data_file_list)):
for i in range(0, len(data_file_list)):
train_data_file = os.path.join(self.train_data_path, data_file_list[i])
with open(train_data_file, "r") as fin:
for line in fin:
......@@ -43,9 +47,6 @@ class TrainReader(Reader):
self.batch_size = envs.get_global_env("batch_size", 32, "train.reader")
self.group_size = self.batch_size * 20
def _process_line(self, line):
line = line.strip().split(';')
hist = line[0].split()
......@@ -54,22 +55,22 @@ class TrainReader(Reader):
cate = [int(i) for i in cate]
return [hist, cate, [int(line[2])], [int(line[3])], [float(line[4])]]
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def data_iter():
#feat_idx, feat_value, label = self._process_line(line)
# feat_idx, feat_value, label = self._process_line(line)
yield self._process_line(line)
return data_iter
def pad_batch_data(self, input, max_len):
res = np.array([x + [0] * (max_len - len(x)) for x in input])
res = res.astype("int64").reshape([-1, max_len])
return res
def make_data(self, b):
max_len = max(len(x[0]) for x in b)
item = self.pad_batch_data([x[0] for x in b], max_len)
......@@ -77,7 +78,7 @@ class TrainReader(Reader):
len_array = [len(x[0]) for x in b]
mask = np.array(
[[0] * x + [-1e9] * (max_len - x) for x in len_array]).reshape(
[-1, max_len, 1])
[-1, max_len, 1])
target_item_seq = np.array(
[[x[2]] * max_len for x in b]).astype("int64").reshape([-1, max_len])
target_cat_seq = np.array(
......@@ -89,7 +90,7 @@ class TrainReader(Reader):
target_item_seq[i], target_cat_seq[i]
])
return res
def batch_reader(self, reader, batch_size, group_size):
def batch_reader():
bg = []
......@@ -111,7 +112,7 @@ class TrainReader(Reader):
yield self.make_data(b)
return batch_reader
def base_read(self, file_dir):
res = []
for train_file in file_dir:
......@@ -122,10 +123,8 @@ class TrainReader(Reader):
cate = line[1].split()
res.append([hist, cate, line[2], line[3], float(line[4])])
return res
def generate_batch_from_trainfiles(self, files):
data_set = self.base_read(files)
random.shuffle(data_set)
return self.batch_reader(data_set, self.batch_size, self.batch_size * 20)
......@@ -13,6 +13,7 @@
# limitations under the License.
import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs
......
......@@ -12,9 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
......@@ -22,32 +23,39 @@ from paddlerec.core.model import Model as ModelBase
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
def wide_part(self, data):
out = fluid.layers.fc(input=data,
size=1,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0 / math.sqrt(data.shape[1])),
regularizer=fluid.regularizer.L2DecayRegularizer(regularization_coeff=1e-4)),
act=None,
name='wide')
size=1,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0,
scale=1.0 / math.sqrt(
data.shape[
1])),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)),
act=None,
name='wide')
return out
def fc(self, data, hidden_units, active, tag):
output = fluid.layers.fc(input=data,
size=hidden_units,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0 / math.sqrt(data.shape[1]))),
act=active,
name=tag)
size=hidden_units,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0,
scale=1.0 / math.sqrt(
data.shape[
1]))),
act=active,
name=tag)
return output
def deep_part(self, data, hidden1_units, hidden2_units, hidden3_units):
l1 = self.fc(data, hidden1_units, 'relu', 'l1')
l2 = self.fc(l1, hidden2_units, 'relu', 'l2')
l3 = self.fc(l2, hidden3_units, 'relu', 'l3')
return l3
def train_net(self):
wide_input = fluid.data(name='wide_input', shape=[None, 8], dtype='float32')
deep_input = fluid.data(name='deep_input', shape=[None, 58], dtype='float32')
......@@ -61,31 +69,33 @@ class Model(ModelBase):
hidden3_units = envs.get_global_env("hyper_parameters.hidden3_units", 25, self._namespace)
wide_output = self.wide_part(wide_input)
deep_output = self.deep_part(deep_input, hidden1_units, hidden2_units, hidden3_units)
wide_model = fluid.layers.fc(input=wide_output,
size=1,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0)),
act=None,
name='w_wide')
size=1,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0)),
act=None,
name='w_wide')
deep_model = fluid.layers.fc(input=deep_output,
size=1,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0)),
act=None,
name='w_deep')
size=1,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=1.0)),
act=None,
name='w_deep')
prediction = fluid.layers.elementwise_add(wide_model, deep_model)
pred = fluid.layers.sigmoid(fluid.layers.clip(prediction, min=-15.0, max=15.0), name="prediction")
num_seqs = fluid.layers.create_tensor(dtype='int64')
acc = fluid.layers.accuracy(input=pred, label=fluid.layers.cast(x=label, dtype='int64'), total=num_seqs)
auc_var, batch_auc, auc_states = fluid.layers.auc(input=pred, label=fluid.layers.cast(x=label, dtype='int64'))
self._metrics["AUC"] = auc_var
self._metrics["BATCH_AUC"] = batch_auc
self._metrics["ACC"] = acc
cost = fluid.layers.sigmoid_cross_entropy_with_logits(x=prediction, label=label)
cost = fluid.layers.sigmoid_cross_entropy_with_logits(x=prediction, label=label)
avg_cost = fluid.layers.mean(cost)
self._cost = avg_cost
......@@ -95,4 +105,4 @@ class Model(ModelBase):
return optimizer
def infer_net(self, parameter_list):
self.deepfm_net()
\ No newline at end of file
self.deepfm_net()
......@@ -11,15 +11,17 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try:
import cPickle as pickle
except ImportError:
import pickle
from paddlerec.core.reader import Reader
class TrainReader(Reader):
def init(self):
pass
......@@ -28,16 +30,17 @@ class TrainReader(Reader):
line = line.strip().split(',')
features = list(map(float, line))
wide_feat = features[0:8]
deep_feat = features[8:58+8]
deep_feat = features[8:58 + 8]
label = features[-1]
return wide_feat, deep_feat, [label]
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
"""
def data_iter():
wide_feat, deep_deat, label = self._process_line(line)
yield [('wide_input', wide_feat), ('deep_input', deep_deat), ('label', label)]
return data_iter
\ No newline at end of file
return data_iter
......@@ -11,19 +11,21 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
try:
import cPickle as pickle
except ImportError:
import pickle
class TrainReader(Reader):
from paddlerec.core.reader import Reader
class TrainReader(Reader):
def init(self):
pass
def _process_line(self, line):
features = line.strip('\n').split('\t')
feat_idx = []
......@@ -33,11 +35,11 @@ class TrainReader(Reader):
feat_value.append(1.0)
label = [int(features[0])]
return feat_idx, feat_value, label
def generate_sample(self, line):
def data_iter():
feat_idx, feat_value, label = self._process_line(line)
yield [('feat_idx', feat_idx), ('feat_value', feat_value), ('label',
label)]
return data_iter
\ No newline at end of file
return data_iter
......@@ -13,7 +13,6 @@
# limitations under the License.
import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
......@@ -27,13 +26,13 @@ class Model(ModelBase):
init_value_ = 0.1
initer = fluid.initializer.TruncatedNormalInitializer(
loc=0.0, scale=init_value_)
is_distributed = True if envs.get_trainer() == "CtrTrainer" else False
sparse_feature_number = envs.get_global_env("hyper_parameters.sparse_feature_number", None, self._namespace)
sparse_feature_dim = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
# ------------------------- network input --------------------------
num_field = envs.get_global_env("hyper_parameters.num_field", None, self._namespace)
raw_feat_idx = fluid.data(name='feat_idx', shape=[None, num_field], dtype='int64')
raw_feat_value = fluid.data(name='feat_value', shape=[None, num_field], dtype='float32')
......@@ -52,16 +51,16 @@ class Model(ModelBase):
feat_embeddings,
[-1, num_field, sparse_feature_dim]) # None * num_field * embedding_size
feat_embeddings = feat_embeddings * feat_value # None * num_field * embedding_size
# ------------------------- set _data_var --------------------------
self._data_var.append(raw_feat_idx)
self._data_var.append(raw_feat_value)
self._data_var.append(self.label)
if self._platform != "LINUX":
self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=64, use_double_buffer=False, iterable=False)
# -------------------- linear --------------------
weights_linear = fluid.embedding(
......@@ -79,7 +78,7 @@ class Model(ModelBase):
default_initializer=fluid.initializer.ConstantInitializer(value=0))
y_linear = fluid.layers.reduce_sum(
(weights_linear * feat_value), 1) + b_linear
# -------------------- CIN --------------------
layer_sizes_cin = envs.get_global_env("hyper_parameters.layer_sizes_cin", None, self._namespace)
......@@ -90,7 +89,7 @@ class Model(ModelBase):
X_0 = fluid.layers.reshape(
fluid.layers.transpose(Xs[0], [0, 2, 1]),
[-1, sparse_feature_dim, num_field,
1]) # None, embedding_size, num_field, 1
1]) # None, embedding_size, num_field, 1
X_k = fluid.layers.reshape(
fluid.layers.transpose(Xs[-1], [0, 2, 1]),
[-1, sparse_feature_dim, 1, last_s]) # None, embedding_size, 1, last_s
......@@ -136,7 +135,7 @@ class Model(ModelBase):
layer_sizes_dnn = envs.get_global_env("hyper_parameters.layer_sizes_dnn", None, self._namespace)
act = envs.get_global_env("hyper_parameters.act", None, self._namespace)
y_dnn = fluid.layers.reshape(feat_embeddings,
[-1, num_field * sparse_feature_dim])
[-1, num_field * sparse_feature_dim])
for s in layer_sizes_dnn:
y_dnn = fluid.layers.fc(input=y_dnn,
size=s,
......@@ -152,7 +151,7 @@ class Model(ModelBase):
# ------------------- xDeepFM ------------------
self.predict = fluid.layers.sigmoid(y_linear + y_cin + y_dnn)
def train_net(self):
self.xdeepfm_net()
......@@ -164,15 +163,15 @@ class Model(ModelBase):
predict_2d = fluid.layers.concat([1 - self.predict, self.predict], 1)
label_int = fluid.layers.cast(self.label, 'int64')
auc_var, batch_auc_var, _ = fluid.layers.auc(input=predict_2d,
label=label_int,
slide_steps=0)
label=label_int,
slide_steps=0)
self._metrics["AUC"] = auc_var
self._metrics["BATCH_AUC"] = batch_auc_var
def optimizer(self):
learning_rate = envs.get_global_env("hyper_parameters.learning_rate", None, self._namespace)
optimizer = fluid.optimizer.Adam(learning_rate, lazy_mode=True)
return optimizer
def infer_net(self, parameter_list):
self.xdeepfm_net()
\ No newline at end of file
self.xdeepfm_net()
#! /bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
echo "begin to download data"
......
......@@ -11,10 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import io
import copy
import random
import numpy as np
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
......@@ -22,17 +24,17 @@ from paddlerec.core.utils import envs
class EvaluateReader(Reader):
def init(self):
self.batch_size = envs.get_global_env("batch_size", None, "evaluate.reader")
self.input = []
self.length = None
def base_read(self, files):
res = []
for f in files:
with open(f, "r") as fin:
with open(f, "r") as fin:
for line in fin:
line = line.strip().split('\t')
res.append(tuple([map(int, line[0].split(',')), int(line[1])]))
line = line.strip().split('\t')
res.append(tuple([map(int, line[0].split(',')), int(line[1])]))
return res
def make_data(self, cur_batch, batch_size):
......@@ -120,10 +122,11 @@ class EvaluateReader(Reader):
else:
# Due to fixed batch_size, discard the remaining ins
return
#cur_batch = remain_data[i:]
#yield self.make_data(cur_batch, group_remain % batch_size)
# cur_batch = remain_data[i:]
# yield self.make_data(cur_batch, group_remain % batch_size)
return _reader
def generate_batch_from_trainfiles(self, files):
self.input = self.base_read(files)
self.length = len(self.input)
......@@ -132,4 +135,5 @@ class EvaluateReader(Reader):
def generate_sample(self, line):
def data_iter():
yield []
return data_iter
......@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import math
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.layers as layers
......@@ -25,19 +26,19 @@ class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
self.init_config()
def init_config(self):
self._fetch_interval = 1
self.items_num, self.ins_num = self.config_read(envs.get_global_env("hyper_parameters.config_path", None, self._namespace))
self.items_num, self.ins_num = self.config_read(
envs.get_global_env("hyper_parameters.config_path", None, self._namespace))
self.train_batch_size = envs.get_global_env("batch_size", None, "train.reader")
self.evaluate_batch_size = envs.get_global_env("batch_size", None, "evaluate.reader")
self.hidden_size = envs.get_global_env("hyper_parameters.sparse_feature_dim", None, self._namespace)
self.step = envs.get_global_env("hyper_parameters.gnn_propogation_steps", None, self._namespace)
def config_read(self, config_path=None):
if config_path is None:
raise ValueError("please set train.model.hyper_parameters.config_path at first")
if config_path is None:
raise ValueError("please set train.model.hyper_parameters.config_path at first")
with open(config_path, "r") as fin:
item_nums = int(fin.readline().strip())
ins_nums = int(fin.readline().strip())
......@@ -47,49 +48,49 @@ class Model(ModelBase):
self.items = fluid.data(
name="items",
shape=[bs, -1],
dtype="int64") #[batch_size, uniq_max]
dtype="int64") # [batch_size, uniq_max]
self.seq_index = fluid.data(
name="seq_index",
shape=[bs, -1, 2],
dtype="int32") #[batch_size, seq_max, 2]
dtype="int32") # [batch_size, seq_max, 2]
self.last_index = fluid.data(
name="last_index",
shape=[bs, 2],
dtype="int32") #[batch_size, 2]
dtype="int32") # [batch_size, 2]
self.adj_in = fluid.data(
name="adj_in",
shape=[bs, -1, -1],
dtype="float32") #[batch_size, seq_max, seq_max]
dtype="float32") # [batch_size, seq_max, seq_max]
self.adj_out = fluid.data(
name="adj_out",
shape=[bs, -1, -1],
dtype="float32") #[batch_size, seq_max, seq_max]
dtype="float32") # [batch_size, seq_max, seq_max]
self.mask = fluid.data(
name="mask",
shape=[bs, -1, 1],
dtype="float32") #[batch_size, seq_max, 1]
dtype="float32") # [batch_size, seq_max, 1]
self.label = fluid.data(
name="label",
shape=[bs, 1],
dtype="int64") #[batch_size, 1]
dtype="int64") # [batch_size, 1]
res = [self.items, self.seq_index, self.last_index, self.adj_in, self.adj_out, self.mask, self.label]
return res
def train_input(self):
res = self.input(self.train_batch_size)
self._data_var = res
use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader", False, self._namespace)
use_dataloader = envs.get_global_env("hyper_parameters.use_DataLoader", False, self._namespace)
if self._platform != "LINUX" or use_dataloader:
self._data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._data_var, capacity=256, use_double_buffer=False, iterable=False)
def net(self, items_num, hidden_size, step, bs):
stdv = 1.0 / math.sqrt(hidden_size)
stdv = 1.0 / math.sqrt(hidden_size)
def embedding_layer(input, table_name, emb_dim, initializer_instance=None):
def embedding_layer(input, table_name, emb_dim, initializer_instance=None):
emb = fluid.embedding(
input=input,
size=[items_num, emb_dim],
......@@ -97,10 +98,10 @@ class Model(ModelBase):
name=table_name,
initializer=initializer_instance),
)
return emb
sparse_initializer = fluid.initializer.Uniform(low=-stdv, high=stdv)
items_emb = embedding_layer(self.items, "emb", hidden_size, sparse_initializer)
return emb
sparse_initializer = fluid.initializer.Uniform(low=-stdv, high=stdv)
items_emb = embedding_layer(self.items, "emb", hidden_size, sparse_initializer)
pre_state = items_emb
for i in range(step):
pre_state = layers.reshape(x=pre_state, shape=[bs, -1, hidden_size])
......@@ -113,7 +114,7 @@ class Model(ModelBase):
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv)),
bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, uniq_max, h]
low=-stdv, high=stdv))) # [batch_size, uniq_max, h]
state_out = layers.fc(
input=pre_state,
name="state_out",
......@@ -123,13 +124,13 @@ class Model(ModelBase):
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv)),
bias_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, uniq_max, h]
state_adj_in = layers.matmul(self.adj_in, state_in) #[batch_size, uniq_max, h]
state_adj_out = layers.matmul(self.adj_out, state_out) #[batch_size, uniq_max, h]
low=-stdv, high=stdv))) # [batch_size, uniq_max, h]
state_adj_in = layers.matmul(self.adj_in, state_in) # [batch_size, uniq_max, h]
state_adj_out = layers.matmul(self.adj_out, state_out) # [batch_size, uniq_max, h]
gru_input = layers.concat([state_adj_in, state_adj_out], axis=2)
gru_input = layers.reshape(x=gru_input, shape=[-1, hidden_size * 2])
gru_fc = layers.fc(
input=gru_input,
......@@ -140,11 +141,11 @@ class Model(ModelBase):
input=gru_fc,
hidden=layers.reshape(x=pre_state, shape=[-1, hidden_size]),
size=3 * hidden_size)
final_state = layers.reshape(pre_state, shape=[bs, -1, hidden_size])
seq = layers.gather_nd(final_state, self.seq_index)
last = layers.gather_nd(final_state, self.last_index)
seq_fc = layers.fc(
input=seq,
name="seq_fc",
......@@ -154,7 +155,7 @@ class Model(ModelBase):
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, seq_max, h]
low=-stdv, high=stdv))) # [batch_size, seq_max, h]
last_fc = layers.fc(
input=last,
name="last_fc",
......@@ -164,22 +165,22 @@ class Model(ModelBase):
num_flatten_dims=1,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[bathc_size, h]
low=-stdv, high=stdv))) # [bathc_size, h]
seq_fc_t = layers.transpose(
seq_fc, perm=[1, 0, 2]) #[seq_max, batch_size, h]
seq_fc, perm=[1, 0, 2]) # [seq_max, batch_size, h]
add = layers.elementwise_add(
seq_fc_t, last_fc) #[seq_max, batch_size, h]
seq_fc_t, last_fc) # [seq_max, batch_size, h]
b = layers.create_parameter(
shape=[hidden_size],
dtype='float32',
default_initializer=fluid.initializer.Constant(value=0.0)) #[h]
add = layers.elementwise_add(add, b) #[seq_max, batch_size, h]
add_sigmoid = layers.sigmoid(add) #[seq_max, batch_size, h]
default_initializer=fluid.initializer.Constant(value=0.0)) # [h]
add = layers.elementwise_add(add, b) # [seq_max, batch_size, h]
add_sigmoid = layers.sigmoid(add) # [seq_max, batch_size, h]
add_sigmoid = layers.transpose(
add_sigmoid, perm=[1, 0, 2]) #[batch_size, seq_max, h]
add_sigmoid, perm=[1, 0, 2]) # [batch_size, seq_max, h]
weight = layers.fc(
input=add_sigmoid,
name="weight_fc",
......@@ -189,13 +190,13 @@ class Model(ModelBase):
bias_attr=False,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, seq_max, 1]
low=-stdv, high=stdv))) # [batch_size, seq_max, 1]
weight *= self.mask
weight_mask = layers.elementwise_mul(seq, weight, axis=0) #[batch_size, seq_max, h]
global_attention = layers.reduce_sum(weight_mask, dim=1) #[batch_size, h]
weight_mask = layers.elementwise_mul(seq, weight, axis=0) # [batch_size, seq_max, h]
global_attention = layers.reduce_sum(weight_mask, dim=1) # [batch_size, h]
final_attention = layers.concat(
[global_attention, last], axis=1) #[batch_size, 2*h]
[global_attention, last], axis=1) # [batch_size, 2*h]
final_attention_fc = layers.fc(
input=final_attention,
name="final_attention_fc",
......@@ -203,14 +204,14 @@ class Model(ModelBase):
bias_attr=False,
act=None,
param_attr=fluid.ParamAttr(initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv))) #[batch_size, h]
# all_vocab = layers.create_global_var(
# shape=[items_num - 1],
# value=0,
# dtype="int64",
# persistable=True,
# name="all_vocab")
low=-stdv, high=stdv))) # [batch_size, h]
# all_vocab = layers.create_global_var(
# shape=[items_num - 1],
# value=0,
# dtype="int64",
# persistable=True,
# name="all_vocab")
all_vocab = np.arange(1, items_num).reshape((-1)).astype('int32')
all_vocab = fluid.layers.cast(x=fluid.layers.assign(all_vocab), dtype='int64')
......@@ -220,13 +221,13 @@ class Model(ModelBase):
name="emb",
initializer=fluid.initializer.Uniform(
low=-stdv, high=stdv)),
size=[items_num, hidden_size]) #[all_vocab, h]
size=[items_num, hidden_size]) # [all_vocab, h]
logits = layers.matmul(
x=final_attention_fc, y=all_emb,
transpose_y=True) #[batch_size, all_vocab]
transpose_y=True) # [batch_size, all_vocab]
softmax = layers.softmax_with_cross_entropy(
logits=logits, label=self.label) #[batch_size, 1]
logits=logits, label=self.label) # [batch_size, 1]
self.loss = layers.reduce_mean(softmax) # [1]
self.acc = layers.accuracy(input=logits, label=self.label, k=20)
......@@ -249,7 +250,7 @@ class Model(ModelBase):
decay_steps = envs.get_global_env("hyper_parameters.decay_steps", None, self._namespace)
decay_rate = envs.get_global_env("hyper_parameters.decay_rate", None, self._namespace)
l2 = envs.get_global_env("hyper_parameters.l2", None, self._namespace)
optimizer = fluid.optimizer.Adam(
optimizer = fluid.optimizer.Adam(
learning_rate=fluid.layers.exponential_decay(
learning_rate=learning_rate,
decay_steps=decay_steps * step_per_epoch,
......@@ -257,18 +258,18 @@ class Model(ModelBase):
regularization=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=l2))
return optimizer
return optimizer
def infer_input(self):
self._reader_namespace = "evaluate.reader"
res = self.input(self.evaluate_batch_size)
self._infer_data_var = res
self._infer_data_var = res
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
def infer_net(self):
self.infer_input()
self.net(self.items_num, self.hidden_size, self.step, self.evaluate_batch_size)
self.infer_input()
self.net(self.items_num, self.hidden_size, self.step, self.evaluate_batch_size)
self._infer_results['acc'] = self.acc
self._infer_results['loss'] = self.loss
self._infer_results['loss'] = self.loss
......@@ -11,10 +11,12 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import io
import copy
import random
import numpy as np
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
......@@ -22,17 +24,17 @@ from paddlerec.core.utils import envs
class TrainReader(Reader):
def init(self):
self.batch_size = envs.get_global_env("batch_size", None, "train.reader")
self.input = []
self.length = None
def base_read(self, files):
res = []
for f in files:
with open(f, "r") as fin:
with open(f, "r") as fin:
for line in fin:
line = line.strip().split('\t')
res.append(tuple([map(int, line[0].split(',')), int(line[1])]))
line = line.strip().split('\t')
res.append(tuple([map(int, line[0].split(',')), int(line[1])]))
return res
def make_data(self, cur_batch, batch_size):
......@@ -120,10 +122,11 @@ class TrainReader(Reader):
else:
# Due to fixed batch_size, discard the remaining ins
return
#cur_batch = remain_data[i:]
#yield self.make_data(cur_batch, group_remain % batch_size)
# cur_batch = remain_data[i:]
# yield self.make_data(cur_batch, group_remain % batch_size)
return _reader
def generate_batch_from_trainfiles(self, files):
self.input = self.base_read(files)
self.length = len(self.input)
......@@ -132,4 +135,5 @@ class TrainReader(Reader):
def generate_sample(self, line):
def data_iter():
yield []
return data_iter
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs
......@@ -87,10 +86,8 @@ class Model(ModelBase):
self._metrics["cost"] = avg_cost
self._metrics["acc"] = acc
def train_net(self):
self.all_vocab_network()
def infer_net(self):
self.all_vocab_network(is_infer=True)
......@@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class EvaluateReader(Reader):
......
......@@ -11,10 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class TrainReader(Reader):
......
......@@ -12,15 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle.fluid as fluid
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
import paddle.fluid.layers.tensor as tensor
import paddle.fluid.layers.io as io
import paddle.fluid.layers.control_flow as cf
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
class BowEncoder(object):
......@@ -54,6 +51,7 @@ class GrnnEncoder(object):
bias_attr=self.param_name + ".bias")
return fluid.layers.sequence_pool(input=gru_h, pool_type='max')
class PairwiseHingeLoss(object):
def __init__(self, margin=0.8):
self.margin = margin
......@@ -70,6 +68,7 @@ class PairwiseHingeLoss(object):
loss_part2)
return loss_part3
class Model(ModelBase):
def __init__(self, config):
ModelBase.__init__(self, config)
......@@ -80,7 +79,6 @@ class Model(ModelBase):
return correct
def train(self):
vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace)
emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, self._namespace)
hidden_size = envs.get_global_env("hyper_parameters.hidden_size", None, self._namespace)
......@@ -124,16 +122,14 @@ class Model(ModelBase):
hinge_loss = self.pairwise_hinge_loss.forward(cos_pos, cos_neg)
avg_cost = fluid.layers.mean(hinge_loss)
correct = self.get_correct(cos_neg, cos_pos)
self._cost = avg_cost
self._metrics["correct"] = correct
self._metrics["hinge_loss"] = hinge_loss
def train_net(self):
self.train()
def infer(self):
vocab_size = envs.get_global_env("hyper_parameters.vocab_size", None, self._namespace)
emb_dim = envs.get_global_env("hyper_parameters.emb_dim", None, self._namespace)
......@@ -146,7 +142,7 @@ class Model(ModelBase):
pos_label = fluid.data(name="pos_label", shape=[None, 1], dtype="int64")
self._infer_data_var = [user_data, all_item_data, pos_label]
self._infer_data_loader = fluid.io.DataLoader.from_generator(
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
feed_list=self._infer_data_var, capacity=64, use_double_buffer=False, iterable=False)
user_emb = fluid.embedding(
input=user_data, size=[vocab_size, emb_dim], param_attr="emb.item")
......@@ -173,6 +169,5 @@ class Model(ModelBase):
self._infer_results['recall20'] = acc
def infer_net(self):
self.infer()
......@@ -11,19 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import random
import numpy as np
class EvaluateReader(Reader):
def init(self):
self.vocab_size = envs.get_global_env("vocab_size", 10, "train.model.hyper_parameters")
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
......@@ -39,6 +39,6 @@ class EvaluateReader(Reader):
src = conv_ids[:boundary]
pos_tgt = [conv_ids[boundary]]
feature_name = ["user", "all_item", "p_item"]
yield zip(feature_name, [src] + [np.arange(self.vocab_size).astype("int64").tolist()]+ [pos_tgt])
yield zip(feature_name, [src] + [np.arange(self.vocab_size).astype("int64").tolist()] + [pos_tgt])
return reader
......@@ -11,12 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
import random
from paddlerec.core.reader import Reader
class TrainReader(Reader):
def init(self):
......@@ -25,7 +26,6 @@ class TrainReader(Reader):
def sample_neg_from_seq(self, seq):
return seq[random.randint(0, len(seq) - 1)]
def generate_sample(self, line):
"""
Read the data line by line and process it as a dictionary
......
......@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import numpy as np
import paddle.fluid as fluid
......
#! /bin/bash
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# download train_data
mkdir raw_data
wget --no-check-certificate https://paddlerec.bj.bcebos.com/word2vec/1-billion-word-language-modeling-benchmark-r13output.tar
......
......@@ -13,13 +13,15 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import io
import math
import os
import random
import re
import six
import argparse
import io
import math
prog = re.compile("[^a-z ]", flags=0)
......@@ -73,7 +75,7 @@ def parse_args():
def text_strip(text):
#English Preprocess Rule
# English Preprocess Rule
return prog.sub("", text.lower())
......@@ -115,7 +117,7 @@ def filter_corpus(args):
word_all_count = 0
id_counts = []
word_id = 0
#read dict
# read dict
with io.open(args.dict_path, 'r', encoding='utf-8') as f:
for line in f:
word, count = line.split()[0], int(line.split()[1])
......@@ -125,13 +127,13 @@ def filter_corpus(args):
id_counts.append(count)
word_all_count += count
#write word2id file
# write word2id file
print("write word2id file to : " + args.dict_path + "_word_to_id_")
with io.open(
args.dict_path + "_word_to_id_", 'w+', encoding='utf-8') as fid:
for k, v in word_to_id_.items():
fid.write(k + " " + str(v) + '\n')
#filter corpus and convert id
# filter corpus and convert id
if not os.path.exists(args.output_corpus_dir):
os.makedirs(args.output_corpus_dir)
for file in os.listdir(args.input_corpus_dir):
......@@ -152,9 +154,9 @@ def filter_corpus(args):
count_w = id_counts[idx]
corpus_size = word_all_count
keep_prob = (
math.sqrt(count_w /
(args.downsample * corpus_size)) + 1
) * (args.downsample * corpus_size) / count_w
math.sqrt(count_w /
(args.downsample * corpus_size)) + 1
) * (args.downsample * corpus_size) / count_w
r_value = random.random()
if r_value > keep_prob:
continue
......@@ -200,7 +202,7 @@ def build_dict(args):
for item in item_to_remove:
unk_sum += word_count[item]
del word_count[item]
#sort by count
# sort by count
word_count[native_to_unicode('<UNK>')] = unk_sum
word_count = sorted(
word_count.items(), key=lambda word_count: -word_count[1])
......@@ -222,17 +224,18 @@ def data_split(args):
for file_ in files:
with open(os.path.join(raw_data_dir, file_), 'r') as f:
contents.extend(f.readlines())
num = int(args.file_nums)
lines_per_file = len(contents) / num
print("contents: ", str(len(contents)))
print("lines_per_file: ", str(lines_per_file))
for i in range(1, num+1):
for i in range(1, num + 1):
with open(os.path.join(new_data_dir, "part_" + str(i)), 'w') as fout:
data = contents[(i-1)*lines_per_file:min(i*lines_per_file,len(contents))]
data = contents[(i - 1) * lines_per_file:min(i * lines_per_file, len(contents))]
for line in data:
fout.write(line)
fout.write(line)
if __name__ == "__main__":
args = parse_args()
......
......@@ -11,16 +11,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import io
import six
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
class EvaluateReader(Reader):
def init(self):
dict_path = envs.get_global_env("word_id_dict_path", None, "evaluate.reader")
dict_path = envs.get_global_env("word_id_dict_path", None, "evaluate.reader")
self.word_to_id = dict()
self.id_to_word = dict()
with io.open(dict_path, 'r', encoding='utf-8') as f:
......@@ -46,19 +48,16 @@ class EvaluateReader(Reader):
if isinstance(s, str):
return True
return False
def _to_unicode(self, s, ignore_errors=False):
if self._is_unicode(s):
return s
error_mode = "ignore" if ignore_errors else "strict"
return s.decode("utf-8", errors=error_mode)
def strip_lines(self, line, vocab):
return self._replace_oov(vocab, self.native_to_unicode(line))
def _replace_oov(self, original_vocab, line):
"""Replace out-of-vocab words with "<UNK>".
This maintains compatibility with published results.
......@@ -76,5 +75,7 @@ class EvaluateReader(Reader):
def reader():
features = self.strip_lines(line.lower(), self.word_to_id)
features = features.split()
yield [('analogy_a', [self.word_to_id[features[0]]]), ('analogy_b', [self.word_to_id[features[1]]]), ('analogy_c', [self.word_to_id[features[2]]]), ('analogy_d', [self.word_to_id[features[3]]])]
yield [('analogy_a', [self.word_to_id[features[0]]]), ('analogy_b', [self.word_to_id[features[1]]]),
('analogy_c', [self.word_to_id[features[2]]]), ('analogy_d', [self.word_to_id[features[3]]])]
return reader
......@@ -11,8 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import io
import numpy as np
from paddlerec.core.reader import Reader
from paddlerec.core.utils import envs
......@@ -37,7 +40,7 @@ class NumpyRandomInt(object):
class TrainReader(Reader):
def init(self):
dict_path = envs.get_global_env("word_count_dict_path", None, "train.reader")
dict_path = envs.get_global_env("word_count_dict_path", None, "train.reader")
self.window_size = envs.get_global_env("hyper_parameters.window_size", None, "train.model")
self.neg_num = envs.get_global_env("hyper_parameters.neg_num", None, "train.model")
self.with_shuffle_batch = envs.get_global_env("hyper_parameters.with_shuffle_batch", None, "train.model")
......@@ -72,7 +75,7 @@ class TrainReader(Reader):
start_point = 0
end_point = idx + target_window
targets = words[start_point:idx] + words[idx + 1:end_point + 1]
return targets
return targets
def generate_sample(self, line):
def reader():
......@@ -84,7 +87,7 @@ class TrainReader(Reader):
output = [('input_word', [int(target_id)]), ('true_label', [int(context_id)])]
if not self.with_shuffle_batch:
neg_array = self.cs.searchsorted(np.random.sample(self.neg_num))
output += [('neg_label', [int(str(i)) for i in neg_array ])]
output += [('neg_label', [int(str(i)) for i in neg_array])]
yield output
return reader
return reader
......@@ -14,8 +14,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
import paddle.fluid as fluid
import math
from paddlerec.core.utils import envs
from paddlerec.core.model import Model as ModelBase
......@@ -134,7 +134,7 @@ class Model(ModelBase):
sample_nodes_emb = [
fluid.layers.reshape(sample_nodes_emb[i],
[-1, self.neg_sampling_list[i] +
self.output_positive, self.node_emb_size]
self.output_positive, self.node_emb_size]
) for i in range(self.max_layers)
]
......@@ -229,7 +229,7 @@ class Model(ModelBase):
act=self.act,
param_attr=fluid.ParamAttr(
name="trans.layer_fc.weight." + str(i)),
bias_attr=fluid.ParamAttr(name="trans.layer_fc.bias."+str(i)),
bias_attr=fluid.ParamAttr(name="trans.layer_fc.bias." + str(i)),
) for i in range(self.max_layers)
]
......@@ -268,8 +268,8 @@ class Model(ModelBase):
num_flatten_dims=2,
act=self.act,
param_attr=fluid.ParamAttr(
name="cls.concat_fc.weight."+str(i)),
bias_attr=fluid.ParamAttr(name="cls.concat_fc.bias."+str(i))
name="cls.concat_fc.weight." + str(i)),
bias_attr=fluid.ParamAttr(name="cls.concat_fc.bias." + str(i))
) for i in range(self.max_layers)
]
......@@ -348,7 +348,7 @@ class Model(ModelBase):
current_layer_node_num = self.first_layer_node.shape[1]
else:
current_layer_node_num = current_layer_node.shape[1] * \
current_layer_node.shape[2]
current_layer_node.shape[2]
current_layer_node = fluid.layers.reshape(
current_layer_node, [-1, current_layer_node_num])
......@@ -458,7 +458,7 @@ class Model(ModelBase):
param_attr=fluid.ParamAttr(
name="trans.layer_fc.weight." + str(layer_idx)),
bias_attr=fluid.ParamAttr(
name="trans.layer_fc.bias."+str(layer_idx)),
name="trans.layer_fc.bias." + str(layer_idx)),
)
return input_layer_fc_out
......@@ -479,6 +479,6 @@ class Model(ModelBase):
num_flatten_dims=2,
act=self.act,
param_attr=fluid.ParamAttr(
name="cls.concat_fc.weight."+str(layer_idx)),
bias_attr=fluid.ParamAttr(name="cls.concat_fc.bias."+str(layer_idx)))
name="cls.concat_fc.weight." + str(layer_idx)),
bias_attr=fluid.ParamAttr(name="cls.concat_fc.bias." + str(layer_idx)))
return hidden_states_fc
......@@ -28,6 +28,7 @@ class EvaluateReader(Reader):
"""
Read the data line by line and process it as a dictionary
"""
def reader():
"""
This function needs to be implemented by the user, based on data format
......
......@@ -28,6 +28,7 @@ class TrainReader(Reader):
"""
Read the data line by line and process it as a dictionary
"""
def reader():
"""
This function needs to be implemented by the user, based on data format
......
......@@ -12,11 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import argparse
import os
import subprocess
import tempfile
import argparse
import tempfile
import yaml
from paddlerec.core.factory import TrainerFactory
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
setup for paddle-rec.
"""
import os
from setuptools import setup, find_packages
import tempfile
import shutil
import tempfile
requires = [
"paddlepaddle == 1.7.2",
......@@ -19,7 +36,7 @@ about["__author__"] = "paddle-dev"
about["__author_email__"] = "paddle-dev@baidu.com"
about["__url__"] = "https://github.com/PaddlePaddle/PaddleRec"
readme = "..."
readme = ""
def run_cmd(command):
......
......@@ -12,15 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import functools
import os
import time
import platform
import sys
import shutil
import time
import requests
import sys
import tarfile
import zipfile
import platform
import functools
lasttime = time.time()
FLUSH_INTERVAL = 0.1
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册