house_price.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
################################################################################
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
################################################################################


"""
File: nets/house_price/house_price.py
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import six
import math
import numpy as np

import paddle.fluid as fluid

from nets.base_net import BaseNet
from datasets.house_price.baseline_sklearn import CityInfo


class HousePrice(BaseNet):
    """
    net class: construct net
    """
    def __init__(self, FLAGS):
        super(HousePrice, self).__init__(FLAGS)
        self.city_info = CityInfo(FLAGS.city_name)
        
    def emb_lookup_fn(self, input, dict_dim, emb_dim, layer_name, FLAGS,
            padding_idx=None, init_val=0.0):
        """
        get embedding out with params
        """
        output = fluid.layers.embedding(
            input=input,
            size=[dict_dim, emb_dim],
            padding_idx=padding_idx,
            param_attr=fluid.ParamAttr(
                name=layer_name,
                initializer=fluid.initializer.ConstantInitializer(init_val)),
                is_sparse=True)
        return output
 
    def fc_fn(self, input, output_size, act, layer_name, FLAGS, num_flatten_dims=1):
        """
        pack fc op
        """
        dev = 1.0 / math.sqrt(output_size)
        _fc = fluid.layers.fc(
            input=input,
            size=output_size,
            num_flatten_dims=num_flatten_dims,
            param_attr=fluid.ParamAttr(
                name=layer_name + "_fc_w",
                initializer=fluid.initializer.Xavier(uniform=False)),
                #initializer=fluid.initializer.TruncatedNormal(loc=0.0, scale=dev)),
            bias_attr=fluid.ParamAttr(
                name=layer_name + "_fc_bias",
                initializer=fluid.initializer.Constant(value=0.0)),
            act=act)
        return _fc
 
    def pred_format(self, result, **kwargs):
        """
            format pred output
        """
        if result is None or result in ['_PRE_']:
            return

        def _softmax(x):
            return np.exp(x) / np.sum(np.exp(x), axis=0)

        if result == '_POST_':
            h_attr_w = fluid.global_scope().find_var("house_self_fc_w").get_tensor()
            h_attr_b = fluid.global_scope().find_var("house_self_fc_bias").get_tensor()
            dis_w = fluid.global_scope().find_var("dis_w").get_tensor()
            bids = fluid.global_scope().find_var("bids").get_tensor()
            print("h_attr_w: %s" % (" ".join(map(str, _softmax(np.array(h_attr_w).flatten())))))
            print("h_attr_b: %s" % (" ".join(map(str, np.array(h_attr_b)))))
            print("dis_w: %s" % (" ".join(map(str, _softmax(np.array(np.mean(dis_w, 0)))))))
            print("bids: %s" % (" ".join(map(str, np.array(bids).flatten()))))
            return

        label = np.array(result[0]).T.flatten().tolist()
        pred = np.array(result[1]).T.flatten().tolist()
        for i in range(len(pred)):
            print("qid\t%s\t%s" % (label[i], pred[i]))

    def net(self, inputs):
        """
        user-defined interface
        """
        """
            feature: dict. {"label": xxx, "ct_onehot": xxxx,,...}
        """
        FLAGS = self._flags

        label = inputs['label']
        public_bids = inputs['public_bid']

        max_house_num = FLAGS.max_house_num
        max_public_num = FLAGS.max_public_num
        pred_keys = inputs.keys() 
        #step1. get house self feature
        if FLAGS.with_house_attr:
            def _get_house_attr(name, attr_vec_size):
                h_onehot = fluid.layers.reshape(inputs[name], [-1, attr_vec_size])
                h_attr = self.fc_fn(h_onehot, 1, act=None, layer_name=name, FLAGS=FLAGS)
                return h_attr
         
            house_business = _get_house_attr("house_business", self.city_info.business_num)
            house_wuye = _get_house_attr("house_wuye", self.city_info.wuye_num)
            house_kfs = _get_house_attr("house_kfs", self.city_info.kfs_num)
            house_age = _get_house_attr("house_age", self.city_info.age_num)
            house_lou = _get_house_attr("house_lou", self.city_info.lou_num)
            
            house_vec = fluid.layers.concat([house_business, house_wuye, house_kfs, house_age, house_lou], 1)
        else:
            #no house attr
            house_vec = fluid.layers.reshape(inputs["house_business"], [-1, self.city_info.business_num])
            pred_keys.remove('house_wuye')
            pred_keys.remove('house_kfs')
            pred_keys.remove('house_age')
            pred_keys.remove('house_lou')

        house_self = self.fc_fn(house_vec, 1, act='sigmoid', layer_name='house_self', FLAGS=FLAGS)
        house_self = fluid.layers.reshape(house_self, [-1, 1])
       
        #step2. get nearby house and public poi feature
        #public poi embeddings matrix
        bid_embed = self.emb_lookup_fn(public_bids, self.city_info.public_num, 1, 'bids', FLAGS, None,
                self.city_info.average_price)
       
        dis_dim = 1 #only line dis
        if FLAGS.with_car_dis:
            dis_dim = 2 #add car drive dis

        #nearby house and public poi distance weight matrix
        dis_w = fluid.layers.create_parameter(shape=[max_house_num + max_public_num, dis_dim],
                dtype='float32', name='dis_w') 
        house_price = inputs['house_price']
        public_price = fluid.layers.reshape(bid_embed, [-1, max_public_num])
        #nearby price
        price_vec = fluid.layers.concat([house_price, public_price], 1)
       
        #nearby price weight
        house_dis = fluid.layers.reshape(inputs['house_dis'], [-1, max_house_num, dis_dim])
        public_dis = fluid.layers.reshape(inputs['public_dis'], [-1, max_public_num, dis_dim])
        dis_vec = fluid.layers.concat([house_dis, public_dis], 1)
        dis_w = fluid.layers.reshape(dis_w, [max_house_num + max_public_num, dis_dim])
        dis_vec = fluid.layers.reduce_sum(dis_vec * dis_w, 2) 
        house_mask = fluid.layers.sequence_mask(fluid.layers.reshape(inputs['house_num'], [-1]),
                max_house_num) #remove padded
        public_mask = fluid.layers.sequence_mask(fluid.layers.reshape(inputs['public_num'], [-1]),
                max_public_num) #remove padded
        combine_mask =  fluid.layers.cast(x=fluid.layers.concat([house_mask, public_mask], 1),
                dtype="float32")
        adder = (1.0 - combine_mask) * -10000.0
        # Since we are adding it to the raw scores before the softmax, this is
        # effectively the same as removing these entirely.
        dis_vec += adder
        price_weight = fluid.layers.softmax(dis_vec)
        
        combine_price = price_vec * price_weight
        
        #step3. merge house_self and nearby house and public price: [-1, 1] * [-1, 1] 
        pred = house_self * fluid.layers.unsqueeze(fluid.layers.reduce_sum(combine_price, 1), [1])
        #fluid.layers.Print(pred, message=None, summarize=-1)
        #fluid.layers.Print(label, message=None, summarize=-1)
        
        loss = fluid.layers.square_error_cost(input=pred, label=label)

        avg_cost = fluid.layers.mean(loss)

        # debug output info during training
        debug_output = {}
        model_output = {}
        net_output = {"debug_output": debug_output, 
                      "model_output": model_output}

        model_output['feeded_var_names'] = pred_keys   
        model_output['fetch_targets'] = [label, pred]
        model_output['loss'] = avg_cost

        #debug_output['pred'] = pred 
        debug_output['loss'] = avg_cost
        #debug_output['label'] = label
        #debug_output['public_bids'] = public_bids
        return net_output