model.py 11.6 KB
Newer Older
O
O2Dyokii 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
import tensorflow as tf
import numpy as np
from time import time
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.metrics import roc_auc_score


class DCN(BaseEstimator, TransformerMixin):
    def __init__(self, cate_feature_size,field_size,numeric_feature_size,embedding_size=8,
                 deep_layers=[32,32],dropout_deep=[0.5,0.5,0.5],
                deep_layers_activation=tf.nn.relu,epoch=10,batch_size=256,
                 learning_rate=0.01,optimizer_type='adam',
                verbose=False,random_seed=2018,loss_type='logloss',
                eval_metric=roc_auc_score,l2_reg=0.0,cross_layer_num=3):
        assert loss_type in ["logloss", "rmse"], \
            "'logloss' for classification or 'rmse' for regression"

        self.cate_feature_size = cate_feature_size
        self.numeric_feature_size = numeric_feature_size
        self.field_size = field_size
        self.embedding_size = embedding_size
        self.total_size = self.field_size * self.embedding_size + self.numeric_feature_size
        self.deep_layers = deep_layers
        self.cross_layer_num = cross_layer_num
        self.dropout_deep = dropout_deep
        self.deep_layers_activation = deep_layers_activation
        self.l2_reg = l2_reg

        self.epoch = epoch
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.optimizer_type = optimizer_type

        self.verbose = verbose
        self.random_seed = random_seed
        self.loss_type = loss_type
        self.eval_metric = eval_metric
        self.train_result,self.valid_result = [],[]

        self._init_graph()
        
    def _init_graph(self):
        self.graph = tf.Graph()
        with self.graph.as_default():
            tf.set_random_seed(self.random_seed)
            self.feat_index = tf.placeholder(tf.int32, shape=[None,None], name='feat_index')
            self.feat_value = tf.placeholder(tf.float32, shape=[None,None], name='feat_value')
            self.numeric_value = tf.placeholder(tf.float32, shape=[None,None],name='num_value')
            self.label = tf.placeholder(tf.float32,shape=[None,1],name='label')
            self.dropout_keep_deep = tf.placeholder(tf.float32,shape=[None],name='dropout_keep_deep')
            self.train_phase = tf.placeholder(tf.bool,name='train_phase')
                
            self.weights = self._initialize_weights()
                
            # model
            self.embeddings = tf.nn.embedding_lookup(self.weights['feature_embeddings'],self.feat_index) # N * F * K
            feat_value = tf.reshape(self.feat_value, shape=[-1,self.field_size,1])
            self.embeddings = tf.multiply(self.embeddings,feat_value)
                
            self.x0 = tf.concat([self.numeric_value,tf.reshape(self.embeddings,shape=[-1,self.field_size * self.embedding_size])], axis=1)
                
            # deep network
            self.y_deep = tf.nn.dropout(self.x0,self.dropout_keep_deep[0])
                
            for i in range(len(self.deep_layers)):
                self.y_deep = tf.add(tf.matmul(self.y_deep,self.weights['deep_layer_%d' % i]), self.weights['deep_bias_%d' % i])
                self.y_deep = self.deep_layers_activation(self.y_deep)
                self.y_deep = tf.nn.dropout(self.y_deep,self.dropout_keep_deep[i+1])
                    
            # cross network
            self._x0 = tf.reshape(self.x0,(-1,self.total_size,1))
            x_l = self._x0
            for l in range(self.cross_layer_num):
                x_l = tf.tensordot(tf.matmul(self._x0,x_l, transpose_b=True),
                                       self.weights['cross_layer_%d' % l],1) + self.weights['cross_bias_%d' % l] + x_l
                self.cross_network_out = tf.reshape(x_l,(-1,self.total_size))
                    
                    
            # concat layer
            concat_input = tf.concat([self.cross_network_out, self.y_deep], axis=1)
                
            self.out = tf.add(tf.matmul(concat_input,self.weights['concat_projection']),self.weights['concat_bias'])
                
            # loss
            if self.loss_type == 'logloss':
                self.out = tf.nn.sigmoid(self.out)
                self.loss = tf.losses.log_loss(self.label,self.out)
            elif self.loss_type == 'rmse':
                self.loss = tf.sqrt(tf.losses.mean_squared_error(self.label,self.out))
                    
                    
            # l2_reg
            if self.l2_reg > 0:
                self.loss += tf.contrib.layers.l2_regularizer(self.l2_reg)(self.weights['concat_projection'])
                for i in range(len(self.deep_layers)):
                    self.loss += tf.contrib.layers.l2_regularizer(self.l2_reg)(self.weights['deep_layer_%d' % i])
                for i in range(len(self.cross_layer_num)):
                    self.loss += tf.contrib.layers.l2_regularizer(self.l2_reg)(self.weights['cross_layer_%d' % i])
                        
                        
            # optimization
            if self.optimizer_type == 'adam':
                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate,beta1=0.9,beta2=0.999,epsilon=1e-8).minimize(self.loss)
                    
            elif self.optimizer_type == 'adagrad':
                self.optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate, initial_accumulator_value=1e-8).minimize(self.loss)
                    
            elif self.optimizer_type == 'gd':
                self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
                    
            elif self.optimizer_type == 'momentum':
                self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=0.95).minimize(self.loss)
                    
            # init 
            self.saver = tf.train.Saver()
            init = tf.global_variables_initializer()
            self.sess = tf.Session()
            self.sess.run(init)
                
            # number of params
            total_parameters = 0
            for v in self.weights.values():
                shape = v.get_shape()
                value_params = 1
                for dim in shape:
                    value_params *= dim.value
                total_parameters += value_params
                    
            if self.verbose > 0:
                print('Parames: %d' % total_parameters)
                    
    def _initialize_weights(self):
        weights = dict()
        
        #embedding
        weights['feature_embeddings'] = tf.Variable(tf.random_normal([self.cate_feature_size,self.embedding_size],0.0,0.01),name='feature_embeddings')
        weights['feature_bias'] = tf.Variable(tf.random_normal([self.cate_feature_size,1],0.0,1.0),name='feature_bias')
        
        # deep network
        num_layer = len(self.deep_layers)
        glorot = np.sqrt(2.0 / (self.total_size + self.deep_layers[0]))
        
        weights['deep_layer_0'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(self.total_size,self.deep_layers[0])),dtype=np.float32)
        weights['deep_bias_0'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(1,self.deep_layers[0])),dtype=np.float32) 
        
        
        for i in range(1, num_layer):
            glorot = np.sqrt(2.0 / (self.total_size + self.deep_layers[i]))
            # size = layers[i-1] * layers[i]
            weights['deep_layer_%d' % i] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(self.deep_layers[i-1],self.deep_layers[i])),dtype=np.float32)
            #size = 1 * layers[i]
            weights['deep_bias_%d' % i] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(1,self.deep_layers[i])),dtype=np.float32)
            
        # cross network
        
        for i in range(self.cross_layer_num):
            weights['cross_layer_%d' % i] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(self.total_size,1)), dtype=np.float32)
            weights['cross_bias_%d' % i] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(self.total_size,1)), dtype=np.float32)
            
        
        # Concat layers
        input_size = self.total_size + self.deep_layers[-1]
        
        glorot = np.sqrt(2.0 / (input_size + 1))
        weights['concat_projection'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(input_size,1)),dtype=np.float32)
        weights['concat_bias'] = tf.Variable(tf.constant(0.01), dtype=np.float32)
        
        return weights
            
        
    def get_batch(self,Xi,Xv,Xv2,y,batch_size, index):
        start = index * batch_size
        end = (index + 1) * batch_size
        end = end if end < len(y) else len(y)
        return Xi[start:end], Xv[start:end],Xv2[start:end],[[y_] for y_ in y[start:end]]
   

    # shuffle three lists simutaneously
    def shuffle_in_unison_scary(self,a,b,c,d):
        rng_state = np.random.get_state()
        np.random.shuffle(a)
        np.random.set_state(rng_state)
        np.random.shuffle(b)
        np.random.set_state(rng_state)
        np.random.shuffle(c)
        np.random.set_state(rng_state)
        np.random.shuffle(d)
        
        
    def predict(self,Xi,Xv,Xv2,y):
        feed_dict = {self.feat_index: Xi,
                     self.feat_value: Xv,
                     self.numeric_value: Xv2,
                     self.label: y,
                     self.dropout_keep_deep: [1.0] * len(self.dropout_deep),
                     self.train_phase: True}
        
        loss = self.sess.run([self.loss], feed_dict=feed_dict)
        
        return loss
    
    def fit_on_batch(self,Xi,Xv,Xv2,y):
        feed_dict = {self.feat_index: Xi,
                     self.feat_value: Xv,
                     self.numeric_value: Xv2,
                     self.label: y,
                     self.dropout_keep_deep: [1.0] * len(self.dropout_deep),
                     self.train_phase: True}
        
        loss, opt = self.sess.run([self.loss,self.optimizer],feed_dict=feed_dict)
        
        return loss
    
    def fit(self,cate_Xi_train,cate_Xv_train,numeric_Xv_train,y_train,
           cate_Xi_valid=None,cate_Xv_valid=None,numeric_Xv_valid=None,y_valid=None,
           early_stopping=False,refit=False):
        """
        :Xi_train: feature index of feature field of sample in the training set
        :Xv_train: feature value of feature field of sample in the training set; can be either binary or float
        :y_train: label of each sample in the training set
        :Xi_valid: feature indices of each sample in the validation set
        :Xv_valid: feature values of each sample in the validation set
        :y_valid: label of each sample in the validation set
        :early_stopping: early stopping or not
        :refit: refit the model on the train+valid dataset or not
        """
        
        print(len(cate_Xi_train))
        print(len(cate_Xv_train))
        print(len(numeric_Xv_train))
        print(len(y_train))
        
        has_valid = cate_Xv_valid is not None
        
        for epoch in range(self.epoch):
            t1 = time()
            self.shuffle_in_unison_scary(cate_Xi_train,cate_Xv_train,numeric_Xv_train,y_train)
            total_batch = int(len(y_train) / self.batch_size)
            for i in range(total_batch):
                cate_Xi_batch, cate_Xv_batch, numeric_Xv_batch, y_batch = self.get_batch(cate_Xi_train,cate_Xv_train,numeric_Xv_train,y_train,self.batch_size,1)
                
                self.fit_on_batch(cate_Xi_batch,cate_Xv_batch,numeric_Xv_batch,y_batch)
                
                
            if has_valid:
                y_valid = np.array(y_valid).reshape((-1,1))
                loss = self.predict(cate_Xi_valid,cate_Xv_valid,numeric_Xv_valid,y_valid)
                print('epoch: ',epoch, 'loss:',loss)