Merge pull request #6 from O2Dyokii/rs

DCN-demo

Merge pull request #6 from O2Dyokii/rs
DCN-demo
5f92ed93 · 片刻小哥哥 · GitHub · 16a7cf19 · c115b4de · 5f92ed93
7 changed file
--- a/RS-tf/DCN/README.md
+++ b/RS-tf/DCN/README.md
+Note： https://kaiyuanyokii2n.com/DCN.html
--- a/RS-tf/DCN/config.py
+++ b/RS-tf/DCN/config.py
+TRAIN_FILE = "data/train.csv"
+TEST_FILE = "data/test.csv"
+
+SUB_DIR = "output"
+
+
+NUM_SPLITS = 3
+RANDOM_SEED = 2018
+
+# types of columns of the dataset dataframe
+CATEGORICAL_COLS = [
+    'ps_ind_02_cat', 'ps_ind_04_cat', 'ps_ind_05_cat',
+    'ps_car_01_cat', 'ps_car_02_cat', 'ps_car_03_cat',
+    'ps_car_04_cat', 'ps_car_05_cat', 'ps_car_06_cat',
+    'ps_car_07_cat', 'ps_car_08_cat', 'ps_car_09_cat',
+    'ps_car_10_cat', 'ps_car_11_cat',
+]
+
+NUMERIC_COLS = [
+    # # binary
+    # "ps_ind_06_bin", "ps_ind_07_bin", "ps_ind_08_bin",
+    # "ps_ind_09_bin", "ps_ind_10_bin", "ps_ind_11_bin",
+    # "ps_ind_12_bin", "ps_ind_13_bin", "ps_ind_16_bin",
+    # "ps_ind_17_bin", "ps_ind_18_bin",
+    # "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
+    # "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin",
+    # numeric
+    "ps_reg_01", "ps_reg_02", "ps_reg_03",
+    "ps_car_12", "ps_car_13", "ps_car_14", "ps_car_15",
+
+    # feature engineering
+    "missing_feat", "ps_car_13_x_ps_reg_03",
+]
+
+IGNORE_COLS = [
+    "id", "target",
+    "ps_calc_01", "ps_calc_02", "ps_calc_03", "ps_calc_04",
+    "ps_calc_05", "ps_calc_06", "ps_calc_07", "ps_calc_08",
+    "ps_calc_09", "ps_calc_10", "ps_calc_11", "ps_calc_12",
+    "ps_calc_13", "ps_calc_14",
+    "ps_calc_15_bin", "ps_calc_16_bin", "ps_calc_17_bin",
+    "ps_calc_18_bin", "ps_calc_19_bin", "ps_calc_20_bin"
+]
\ No newline at end of file
--- a/RS-tf/DCN/data/test.csv
+++ b/RS-tf/DCN/data/test.csv
--- a/RS-tf/DCN/data/train.csv
+++ b/RS-tf/DCN/data/train.csv
--- a/RS-tf/DCN/load_data.py
+++ b/RS-tf/DCN/load_data.py
+import pandas as pd
+import numpy as np
+
+
+class FeatureDict(object):
+    def __init__(self, trainfile=None, testfile=None,numeric_cols=[],ignore_cols=[],cate_cols=[]):
+        self.trainfile = trainfile
+        self.testfile = testfile
+        self.cate_cols = cate_cols
+        self.numeric_cols = numeric_cols
+        self.ignore_cols = ignore_cols
+        self.gen_feat_dict()
+     
+    '''
+    Generate categorical deature dict
+    ex: df[col1] = [3,4,1,0,2];df[col2] = [-1,2,7]
+    generated feat_dict = {'col1':{3:0,4:1,1:2,0:3,2:4},'col2':{-1:5,2:6,7:7}}
+    '''
+    def gen_feat_dict(self):
+        df = pd.concat([self.trainfile,self.testfile])
+        self.feat_dict = {}
+        tc = 0
+        for col in df.columns:
+            if col in self.ignore_cols or col in self.numeric_cols:
+                continue
+            else:
+                us = df[col].unique()
+                self.feat_dict[col] = dict(zip(us, range(tc, len(us) + tc)))
+                tc += len(us)
+        self.feat_dim = tc
+        
+        
+        
+class DataPaser(object):
+    def __init__(self,feat_dict):
+        self.feat_dict = feat_dict
+    
+    def parse(self,df=None,has_label=False):
+        dfi = df.copy() # feature index
+        if has_label:
+            y = dfi['target'].values.tolist()
+            dfi.drop(['id','target'], axis=1, inplace=True)
+        else:
+            ids = dfi['id'].values.tolist()
+            dfi.drop(['id'],axis=1,inplace=True)
+        
+        numeric_values = dfi[self.feat_dict.numeric_cols].values.tolist()
+        dfi.drop(self.feat_dict.numeric_cols, axis=1,inplace=True)
+        
+        dfv = dfi.copy() # dfv for feature values which binary or float
+        for col in dfi.columns:
+            if col in self.feat_dict.ignore_cols:
+                dfi.drop(col, axis=1,inplace=True)
+                dfv.drop(col, axis=1,inplace=True)
+                continue
+            # categories feature
+            else:
+                dfi[col] = dfi[col].map(self.feat_dict.feat_dict[col])
+                dfv[col] = 1.
+                
+        cate_idx = dfi.values.tolist()
+        cate_values = dfv.values.tolist()
+        if has_label:
+            return cate_idx, cate_values, numeric_values, y
+        else:
+            return cate_idx, cate_values, numeric_values, ids
\ No newline at end of file
--- a/RS-tf/DCN/main.ipynb
+++ b/RS-tf/DCN/main.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/mllab/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
+      "  from ._conv import register_converters as _register_converters\n"
+     ]
+    }
+   ],
+   "source": [
+    "import tensorflow as tf\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import config\n",
+    "from sklearn.model_selection import StratifiedKFold\n",
+    "from load_data import FeatureDict, DataPaser\n",
+    "from model import DCN"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Load data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_data():\n",
+    "    dfTrain = pd.read_csv('data/train.csv')\n",
+    "    dfTest = pd.read_csv('data/test.csv')\n",
+    "    \n",
+    "    def preprocess(df):\n",
+    "        cols = [c for c in df.columns if c not in ['id','target']]\n",
+    "        df['missing_feat'] = np.sum((df[cols] == -1).values, axis=1)\n",
+    "        df['ps_car_13_x_ps_reg_03'] = df['ps_car_13'] * df['ps_reg_03']\n",
+    "        return df\n",
+    "    \n",
+    "    dfTrain = preprocess(dfTrain)\n",
+    "    dfTest = preprocess(dfTest)\n",
+    "    \n",
+    "    cols = [c for c in dfTrain.columns if c not in ['id','target']]\n",
+    "    cols = [c for c in cols if (not c in config.IGNORE_COLS)]\n",
+    "    \n",
+    "    X_train = dfTrain[cols].values\n",
+    "    y_train = dfTrain['target'].values\n",
+    "    X_test = dfTest[cols].values\n",
+    "    ids_test = dfTest['id'].values\n",
+    "    \n",
+    "    return dfTrain, dfTest, X_train, y_train,X_test,ids_test"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Run model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def run_dcn(dfTrain, dfTest, folds, params):\n",
+    "    fd = FeatureDict(dfTrain, dfTest, numeric_cols=config.NUMERIC_COLS, \n",
+    "                     ignore_cols=config.IGNORE_COLS, cate_cols=config.CATEGORICAL_COLS)\n",
+    "    # print(fd.feat_dim)\n",
+    "    # print(fd.feat_dict)\n",
+    "    \n",
+    "    data_parser = DataPaser(feat_dict=fd)\n",
+    "    cate_Xi_train,cate_Xv_train,numeric_Xv_train,y_train = data_parser.parse(df=dfTrain,has_label=True)\n",
+    "    cate_Xi_test,cate_Xv_test,numeric_Xv_test,ids_test = data_parser.parse(df=dfTest)\n",
+    "    \n",
+    "    params['cate_feature_size'] = fd.feat_dim\n",
+    "    params['field_size'] = len(cate_Xi_train[0])\n",
+    "    params['numeric_feature_size'] = len(config.NUMERIC_COLS)\n",
+    "    \n",
+    "    _get = lambda x, l: [x[i] for i in l]\n",
+    "    for i,(trn_idx,val_idx) in enumerate(folds):\n",
+    "        cate_Xi_train_, cate_Xv_train_,numeric_Xv_train_,y_train_ = _get(cate_Xi_train, trn_idx),_get(cate_Xv_train,trn_idx),_get(numeric_Xv_train,trn_idx),_get(y_train,trn_idx)\n",
+    "        cate_Xi_valid_, cate_Xv_valid_,numeric_Xv_valid_,y_valid_ = _get(cate_Xi_train,val_idx),_get(cate_Xi_train,val_idx),_get(numeric_Xv_train,val_idx),_get(y_train,val_idx)\n",
+    "        \n",
+    "        dcn = DCN(**params)\n",
+    "        dcn.fit(cate_Xi_train_,cate_Xv_train_,numeric_Xv_train_,y_train_,cate_Xi_valid_,cate_Xv_valid_,numeric_Xv_valid_,y_valid_)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Main"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dfTrain,dfTest,X_train,y_train,X_test,ids_test = load_data()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "folds = list(StratifiedKFold(n_splits=config.NUM_SPLITS,shuffle=True, random_state=config.RANDOM_SEED).split(X_train,y_train))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "params = {'embedding_size': 4,\n",
+    "          'deep_layers': [8,8],\n",
+    "          'dropout_deep': [0.5,0.5,0.5],\n",
+    "          'deep_layers_activation': tf.nn.relu,\n",
+    "          'epoch': 30,\n",
+    "          'batch_size': 128,\n",
+    "          'learning_rate': 0.001,\n",
+    "          'optimizer_type': 'adam',\n",
+    "          'verbose': True,\n",
+    "          'random_seed': config.RANDOM_SEED,\n",
+    "          'cross_layer_num': 3}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/mllab/jupyter/Workspace/Kyuan/RS/DCN/load_data.py:20: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
+      "of pandas will change to not sort by default.\n",
+      "\n",
+      "To accept the future behavior, pass 'sort=False'.\n",
+      "\n",
+      "To retain the current behavior and silence the warning, pass 'sort=True'.\n",
+      "\n",
+      "  df = pd.concat([self.trainfile,self.testfile])\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Parames: 3259\n",
+      "6666\n",
+      "6666\n",
+      "6666\n",
+      "6666\n",
+      "epoch:  0 loss: [12.024529]\n",
+      "epoch:  1 loss: [7.773973]\n",
+      "epoch:  2 loss: [2.8597765]\n",
+      "epoch:  3 loss: [1.4841796]\n",
+      "epoch:  4 loss: [1.1699396]\n",
+      "epoch:  5 loss: [1.6370908]\n",
+      "epoch:  6 loss: [1.8515997]\n",
+      "epoch:  7 loss: [2.2488427]\n",
+      "epoch:  8 loss: [2.4133043]\n",
+      "epoch:  9 loss: [3.5436616]\n",
+      "epoch:  10 loss: [3.5054557]\n",
+      "epoch:  11 loss: [2.6502101]\n",
+      "epoch:  12 loss: [2.3495483]\n",
+      "epoch:  13 loss: [2.6154437]\n",
+      "epoch:  14 loss: [1.1022573]\n",
+      "epoch:  15 loss: [2.9973662]\n",
+      "epoch:  16 loss: [3.8579004]\n",
+      "epoch:  17 loss: [6.178442]\n",
+      "epoch:  18 loss: [8.025207]\n",
+      "epoch:  19 loss: [7.136653]\n",
+      "epoch:  20 loss: [7.3870573]\n",
+      "epoch:  21 loss: [8.595674]\n",
+      "epoch:  22 loss: [7.3435473]\n",
+      "epoch:  23 loss: [6.7827497]\n",
+      "epoch:  24 loss: [4.636249]\n",
+      "epoch:  25 loss: [3.5871704]\n",
+      "epoch:  26 loss: [2.8329947]\n",
+      "epoch:  27 loss: [4.4573736]\n",
+      "epoch:  28 loss: [4.7281036]\n",
+      "epoch:  29 loss: [4.7764482]\n",
+      "Parames: 3259\n",
+      "6667\n",
+      "6667\n",
+      "6667\n",
+      "6667\n",
+      "epoch:  0 loss: [0.6093248]\n",
+      "epoch:  1 loss: [1.5039687]\n",
+      "epoch:  2 loss: [6.712246]\n",
+      "epoch:  3 loss: [7.0024014]\n",
+      "epoch:  4 loss: [6.4801226]\n",
+      "epoch:  5 loss: [3.2884202]\n",
+      "epoch:  6 loss: [3.6269343]\n",
+      "epoch:  7 loss: [1.3347118]\n",
+      "epoch:  8 loss: [1.0590647]\n",
+      "epoch:  9 loss: [1.1074238]\n",
+      "epoch:  10 loss: [1.8569903]\n",
+      "epoch:  11 loss: [1.6635537]\n",
+      "epoch:  12 loss: [2.031083]\n",
+      "epoch:  13 loss: [2.0649345]\n",
+      "epoch:  14 loss: [3.854222]\n",
+      "epoch:  15 loss: [2.8241727]\n",
+      "epoch:  16 loss: [4.5554295]\n",
+      "epoch:  17 loss: [3.8784018]\n",
+      "epoch:  18 loss: [3.9847918]\n",
+      "epoch:  19 loss: [6.2238193]\n",
+      "epoch:  20 loss: [5.2663083]\n",
+      "epoch:  21 loss: [7.23936]\n",
+      "epoch:  22 loss: [5.730556]\n",
+      "epoch:  23 loss: [6.973385]\n",
+      "epoch:  24 loss: [6.073906]\n",
+      "epoch:  25 loss: [6.378568]\n",
+      "epoch:  26 loss: [5.058364]\n",
+      "epoch:  27 loss: [3.7043087]\n",
+      "epoch:  28 loss: [5.9491067]\n",
+      "epoch:  29 loss: [5.677361]\n",
+      "Parames: 3259\n",
+      "6667\n",
+      "6667\n",
+      "6667\n",
+      "6667\n",
+      "epoch:  0 loss: [0.6093248]\n",
+      "epoch:  1 loss: [0.6189967]\n",
+      "epoch:  2 loss: [0.6625199]\n",
+      "epoch:  3 loss: [0.9236592]\n",
+      "epoch:  4 loss: [0.8559564]\n",
+      "epoch:  5 loss: [0.88980776]\n",
+      "epoch:  6 loss: [1.2686524]\n",
+      "epoch:  7 loss: [1.5201157]\n",
+      "epoch:  8 loss: [1.6151947]\n",
+      "epoch:  9 loss: [2.6549156]\n",
+      "epoch:  10 loss: [4.126936]\n",
+      "epoch:  11 loss: [3.530216]\n",
+      "epoch:  12 loss: [3.5785751]\n",
+      "epoch:  13 loss: [6.5671687]\n",
+      "epoch:  14 loss: [3.3609593]\n",
+      "epoch:  15 loss: [4.690835]\n",
+      "epoch:  16 loss: [5.375682]\n",
+      "epoch:  17 loss: [4.978845]\n",
+      "epoch:  18 loss: [8.299833]\n",
+      "epoch:  19 loss: [7.519844]\n",
+      "epoch:  20 loss: [9.057664]\n",
+      "epoch:  21 loss: [11.291857]\n",
+      "epoch:  22 loss: [11.814135]\n",
+      "epoch:  23 loss: [8.627268]\n",
+      "epoch:  24 loss: [10.576701]\n",
+      "epoch:  25 loss: [9.666995]\n",
+      "epoch:  26 loss: [6.6638875]\n",
+      "epoch:  27 loss: [8.0759735]\n",
+      "epoch:  28 loss: [6.2821198]\n",
+      "epoch:  29 loss: [9.70084]\n"
+     ]
+    }
+   ],
+   "source": [
+    "run_dcn(dfTrain,dfTest,folds,params)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/RS-tf/DCN/model.py
+++ b/RS-tf/DCN/model.py
+import tensorflow as tf
+import numpy as np
+from time import time
+from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.metrics import roc_auc_score
+
+
+class DCN(BaseEstimator, TransformerMixin):
+    def __init__(self, cate_feature_size,field_size,numeric_feature_size,embedding_size=8,
+                 deep_layers=[32,32],dropout_deep=[0.5,0.5,0.5],
+                deep_layers_activation=tf.nn.relu,epoch=10,batch_size=256,
+                 learning_rate=0.01,optimizer_type='adam',
+                verbose=False,random_seed=2018,loss_type='logloss',
+                eval_metric=roc_auc_score,l2_reg=0.0,cross_layer_num=3):
+        assert loss_type in ["logloss", "rmse"], \
+            "'logloss' for classification or 'rmse' for regression"
+
+        self.cate_feature_size = cate_feature_size
+        self.numeric_feature_size = numeric_feature_size
+        self.field_size = field_size
+        self.embedding_size = embedding_size
+        self.total_size = self.field_size * self.embedding_size + self.numeric_feature_size
+        self.deep_layers = deep_layers
+        self.cross_layer_num = cross_layer_num
+        self.dropout_deep = dropout_deep
+        self.deep_layers_activation = deep_layers_activation
+        self.l2_reg = l2_reg
+
+        self.epoch = epoch
+        self.batch_size = batch_size
+        self.learning_rate = learning_rate
+        self.optimizer_type = optimizer_type
+
+        self.verbose = verbose
+        self.random_seed = random_seed
+        self.loss_type = loss_type
+        self.eval_metric = eval_metric
+        self.train_result,self.valid_result = [],[]
+
+        self._init_graph()
+        
+    def _init_graph(self):
+        self.graph = tf.Graph()
+        with self.graph.as_default():
+            tf.set_random_seed(self.random_seed)
+            self.feat_index = tf.placeholder(tf.int32, shape=[None,None], name='feat_index')
+            self.feat_value = tf.placeholder(tf.float32, shape=[None,None], name='feat_value')
+            self.numeric_value = tf.placeholder(tf.float32, shape=[None,None],name='num_value')
+            self.label = tf.placeholder(tf.float32,shape=[None,1],name='label')
+            self.dropout_keep_deep = tf.placeholder(tf.float32,shape=[None],name='dropout_keep_deep')
+            self.train_phase = tf.placeholder(tf.bool,name='train_phase')
+                
+            self.weights = self._initialize_weights()
+                
+            # model
+            self.embeddings = tf.nn.embedding_lookup(self.weights['feature_embeddings'],self.feat_index) # N * F * K
+            feat_value = tf.reshape(self.feat_value, shape=[-1,self.field_size,1])
+            self.embeddings = tf.multiply(self.embeddings,feat_value)
+                
+            self.x0 = tf.concat([self.numeric_value,tf.reshape(self.embeddings,shape=[-1,self.field_size * self.embedding_size])], axis=1)
+                
+            # deep network
+            self.y_deep = tf.nn.dropout(self.x0,self.dropout_keep_deep[0])
+                
+            for i in range(len(self.deep_layers)):
+                self.y_deep = tf.add(tf.matmul(self.y_deep,self.weights['deep_layer_%d' % i]), self.weights['deep_bias_%d' % i])
+                self.y_deep = self.deep_layers_activation(self.y_deep)
+                self.y_deep = tf.nn.dropout(self.y_deep,self.dropout_keep_deep[i+1])
+                    
+            # cross network
+            self._x0 = tf.reshape(self.x0,(-1,self.total_size,1))
+            x_l = self._x0
+            for l in range(self.cross_layer_num):
+                x_l = tf.tensordot(tf.matmul(self._x0,x_l, transpose_b=True),
+                                       self.weights['cross_layer_%d' % l],1) + self.weights['cross_bias_%d' % l] + x_l
+                self.cross_network_out = tf.reshape(x_l,(-1,self.total_size))
+                    
+                    
+            # concat layer
+            concat_input = tf.concat([self.cross_network_out, self.y_deep], axis=1)
+                
+            self.out = tf.add(tf.matmul(concat_input,self.weights['concat_projection']),self.weights['concat_bias'])
+                
+            # loss
+            if self.loss_type == 'logloss':
+                self.out = tf.nn.sigmoid(self.out)
+                self.loss = tf.losses.log_loss(self.label,self.out)
+            elif self.loss_type == 'rmse':
+                self.loss = tf.sqrt(tf.losses.mean_squared_error(self.label,self.out))
+                    
+                    
+            # l2_reg
+            if self.l2_reg > 0:
+                self.loss += tf.contrib.layers.l2_regularizer(self.l2_reg)(self.weights['concat_projection'])
+                for i in range(len(self.deep_layers)):
+                    self.loss += tf.contrib.layers.l2_regularizer(self.l2_reg)(self.weights['deep_layer_%d' % i])
+                for i in range(len(self.cross_layer_num)):
+                    self.loss += tf.contrib.layers.l2_regularizer(self.l2_reg)(self.weights['cross_layer_%d' % i])
+                        
+                        
+            # optimization
+            if self.optimizer_type == 'adam':
+                self.optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate,beta1=0.9,beta2=0.999,epsilon=1e-8).minimize(self.loss)
+                    
+            elif self.optimizer_type == 'adagrad':
+                self.optimizer = tf.train.AdagradOptimizer(learning_rate=self.learning_rate, initial_accumulator_value=1e-8).minimize(self.loss)
+                    
+            elif self.optimizer_type == 'gd':
+                self.optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.learning_rate).minimize(self.loss)
+                    
+            elif self.optimizer_type == 'momentum':
+                self.optimizer = tf.train.MomentumOptimizer(learning_rate=self.learning_rate, momentum=0.95).minimize(self.loss)
+                    
+            # init 
+            self.saver = tf.train.Saver()
+            init = tf.global_variables_initializer()
+            self.sess = tf.Session()
+            self.sess.run(init)
+                
+            # number of params
+            total_parameters = 0
+            for v in self.weights.values():
+                shape = v.get_shape()
+                value_params = 1
+                for dim in shape:
+                    value_params *= dim.value
+                total_parameters += value_params
+                    
+            if self.verbose > 0:
+                print('Parames: %d' % total_parameters)
+                    
+    def _initialize_weights(self):
+        weights = dict()
+        
+        #embedding
+        weights['feature_embeddings'] = tf.Variable(tf.random_normal([self.cate_feature_size,self.embedding_size],0.0,0.01),name='feature_embeddings')
+        weights['feature_bias'] = tf.Variable(tf.random_normal([self.cate_feature_size,1],0.0,1.0),name='feature_bias')
+        
+        # deep network
+        num_layer = len(self.deep_layers)
+        glorot = np.sqrt(2.0 / (self.total_size + self.deep_layers[0]))
+        
+        weights['deep_layer_0'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(self.total_size,self.deep_layers[0])),dtype=np.float32)
+        weights['deep_bias_0'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(1,self.deep_layers[0])),dtype=np.float32) 
+        
+        
+        for i in range(1, num_layer):
+            glorot = np.sqrt(2.0 / (self.total_size + self.deep_layers[i]))
+            # size = layers[i-1] * layers[i]
+            weights['deep_layer_%d' % i] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(self.deep_layers[i-1],self.deep_layers[i])),dtype=np.float32)
+            #size = 1 * layers[i]
+            weights['deep_bias_%d' % i] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(1,self.deep_layers[i])),dtype=np.float32)
+            
+        # cross network
+        
+        for i in range(self.cross_layer_num):
+            weights['cross_layer_%d' % i] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(self.total_size,1)), dtype=np.float32)
+            weights['cross_bias_%d' % i] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(self.total_size,1)), dtype=np.float32)
+            
+        
+        # Concat layers
+        input_size = self.total_size + self.deep_layers[-1]
+        
+        glorot = np.sqrt(2.0 / (input_size + 1))
+        weights['concat_projection'] = tf.Variable(np.random.normal(loc=0,scale=glorot,size=(input_size,1)),dtype=np.float32)
+        weights['concat_bias'] = tf.Variable(tf.constant(0.01), dtype=np.float32)
+        
+        return weights
+            
+        
+    def get_batch(self,Xi,Xv,Xv2,y,batch_size, index):
+        start = index * batch_size
+        end = (index + 1) * batch_size
+        end = end if end < len(y) else len(y)
+        return Xi[start:end], Xv[start:end],Xv2[start:end],[[y_] for y_ in y[start:end]]
+   
+
+    # shuffle three lists simutaneously
+    def shuffle_in_unison_scary(self,a,b,c,d):
+        rng_state = np.random.get_state()
+        np.random.shuffle(a)
+        np.random.set_state(rng_state)
+        np.random.shuffle(b)
+        np.random.set_state(rng_state)
+        np.random.shuffle(c)
+        np.random.set_state(rng_state)
+        np.random.shuffle(d)
+        
+        
+    def predict(self,Xi,Xv,Xv2,y):
+        feed_dict = {self.feat_index: Xi,
+                     self.feat_value: Xv,
+                     self.numeric_value: Xv2,
+                     self.label: y,
+                     self.dropout_keep_deep: [1.0] * len(self.dropout_deep),
+                     self.train_phase: True}
+        
+        loss = self.sess.run([self.loss], feed_dict=feed_dict)
+        
+        return loss
+    
+    def fit_on_batch(self,Xi,Xv,Xv2,y):
+        feed_dict = {self.feat_index: Xi,
+                     self.feat_value: Xv,
+                     self.numeric_value: Xv2,
+                     self.label: y,
+                     self.dropout_keep_deep: [1.0] * len(self.dropout_deep),
+                     self.train_phase: True}
+        
+        loss, opt = self.sess.run([self.loss,self.optimizer],feed_dict=feed_dict)
+        
+        return loss
+    
+    def fit(self,cate_Xi_train,cate_Xv_train,numeric_Xv_train,y_train,
+           cate_Xi_valid=None,cate_Xv_valid=None,numeric_Xv_valid=None,y_valid=None,
+           early_stopping=False,refit=False):
+        """
+        :Xi_train: feature index of feature field of sample in the training set
+        :Xv_train: feature value of feature field of sample in the training set; can be either binary or float
+        :y_train: label of each sample in the training set
+        :Xi_valid: feature indices of each sample in the validation set
+        :Xv_valid: feature values of each sample in the validation set
+        :y_valid: label of each sample in the validation set
+        :early_stopping: early stopping or not
+        :refit: refit the model on the train+valid dataset or not
+        """
+        
+        print(len(cate_Xi_train))
+        print(len(cate_Xv_train))
+        print(len(numeric_Xv_train))
+        print(len(y_train))
+        
+        has_valid = cate_Xv_valid is not None
+        
+        for epoch in range(self.epoch):
+            t1 = time()
+            self.shuffle_in_unison_scary(cate_Xi_train,cate_Xv_train,numeric_Xv_train,y_train)
+            total_batch = int(len(y_train) / self.batch_size)
+            for i in range(total_batch):
+                cate_Xi_batch, cate_Xv_batch, numeric_Xv_batch, y_batch = self.get_batch(cate_Xi_train,cate_Xv_train,numeric_Xv_train,y_train,self.batch_size,1)
+                
+                self.fit_on_batch(cate_Xi_batch,cate_Xv_batch,numeric_Xv_batch,y_batch)
+                
+                
+            if has_valid:
+                y_valid = np.array(y_valid).reshape((-1,1))
+                loss = self.predict(cate_Xi_valid,cate_Xv_valid,numeric_Xv_valid,y_valid)
+                print('epoch: ',epoch, 'loss:',loss)
\ No newline at end of file