{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Custom components\n",
    "\n",
    "As I mentioned earlier in the example notebooks, and also in the `README`, it is possible to customise almost every component in `pytorch-widedeep`.\n",
    "\n",
    "Let's now go through a couple of simple example to illustrate how that could be done. \n",
    "\n",
    "First let's load and process the data \"as usual\", let's start with a regression and the [airbnb](http://insideairbnb.com/get-the-data.html) dataset."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import os\n",
    "import torch\n",
    "\n",
    "from pytorch_widedeep import Trainer\n",
    "from pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor, TextPreprocessor, ImagePreprocessor\n",
    "from pytorch_widedeep.models import Wide, TabMlp, TabResnet, DeepText, DeepImage, WideDeep\n",
    "from pytorch_widedeep.losses import RMSELoss\n",
    "from pytorch_widedeep.initializers import *\n",
    "from pytorch_widedeep.callbacks import *"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>host_id</th>\n",
       "      <th>description</th>\n",
       "      <th>host_listings_count</th>\n",
       "      <th>host_identity_verified</th>\n",
       "      <th>neighbourhood_cleansed</th>\n",
       "      <th>latitude</th>\n",
       "      <th>longitude</th>\n",
       "      <th>is_location_exact</th>\n",
       "      <th>property_type</th>\n",
       "      <th>room_type</th>\n",
       "      <th>accommodates</th>\n",
       "      <th>bathrooms</th>\n",
       "      <th>bedrooms</th>\n",
       "      <th>beds</th>\n",
       "      <th>guests_included</th>\n",
       "      <th>minimum_nights</th>\n",
       "      <th>instant_bookable</th>\n",
       "      <th>cancellation_policy</th>\n",
       "      <th>has_house_rules</th>\n",
       "      <th>host_gender</th>\n",
       "      <th>accommodates_catg</th>\n",
       "      <th>guests_included_catg</th>\n",
       "      <th>minimum_nights_catg</th>\n",
       "      <th>host_listings_count_catg</th>\n",
       "      <th>bathrooms_catg</th>\n",
       "      <th>bedrooms_catg</th>\n",
       "      <th>beds_catg</th>\n",
       "      <th>amenity_24-hour_check-in</th>\n",
       "      <th>amenity__toilet</th>\n",
       "      <th>amenity_accessible-height_bed</th>\n",
       "      <th>amenity_accessible-height_toilet</th>\n",
       "      <th>amenity_air_conditioning</th>\n",
       "      <th>amenity_air_purifier</th>\n",
       "      <th>amenity_alfresco_bathtub</th>\n",
       "      <th>amenity_amazon_echo</th>\n",
       "      <th>amenity_baby_bath</th>\n",
       "      <th>amenity_baby_monitor</th>\n",
       "      <th>amenity_babysitter_recommendations</th>\n",
       "      <th>amenity_balcony</th>\n",
       "      <th>amenity_bath_towel</th>\n",
       "      <th>amenity_bathroom_essentials</th>\n",
       "      <th>amenity_bathtub</th>\n",
       "      <th>amenity_bathtub_with_bath_chair</th>\n",
       "      <th>amenity_bbq_grill</th>\n",
       "      <th>amenity_beach_essentials</th>\n",
       "      <th>amenity_beach_view</th>\n",
       "      <th>amenity_beachfront</th>\n",
       "      <th>amenity_bed_linens</th>\n",
       "      <th>amenity_bedroom_comforts</th>\n",
       "      <th>...</th>\n",
       "      <th>amenity_roll-in_shower</th>\n",
       "      <th>amenity_room-darkening_shades</th>\n",
       "      <th>amenity_safety_card</th>\n",
       "      <th>amenity_sauna</th>\n",
       "      <th>amenity_self_check-in</th>\n",
       "      <th>amenity_shampoo</th>\n",
       "      <th>amenity_shared_gym</th>\n",
       "      <th>amenity_shared_hot_tub</th>\n",
       "      <th>amenity_shared_pool</th>\n",
       "      <th>amenity_shower_chair</th>\n",
       "      <th>amenity_single_level_home</th>\n",
       "      <th>amenity_ski-in_ski-out</th>\n",
       "      <th>amenity_smart_lock</th>\n",
       "      <th>amenity_smart_tv</th>\n",
       "      <th>amenity_smoke_detector</th>\n",
       "      <th>amenity_smoking_allowed</th>\n",
       "      <th>amenity_soaking_tub</th>\n",
       "      <th>amenity_sound_system</th>\n",
       "      <th>amenity_stair_gates</th>\n",
       "      <th>amenity_stand_alone_steam_shower</th>\n",
       "      <th>amenity_standing_valet</th>\n",
       "      <th>amenity_steam_oven</th>\n",
       "      <th>amenity_stove</th>\n",
       "      <th>amenity_suitable_for_events</th>\n",
       "      <th>amenity_sun_loungers</th>\n",
       "      <th>amenity_table_corner_guards</th>\n",
       "      <th>amenity_tennis_court</th>\n",
       "      <th>amenity_terrace</th>\n",
       "      <th>amenity_toilet_paper</th>\n",
       "      <th>amenity_touchless_faucets</th>\n",
       "      <th>amenity_tv</th>\n",
       "      <th>amenity_walk-in_shower</th>\n",
       "      <th>amenity_warming_drawer</th>\n",
       "      <th>amenity_washer</th>\n",
       "      <th>amenity_washer_dryer</th>\n",
       "      <th>amenity_waterfront</th>\n",
       "      <th>amenity_well-lit_path_to_entrance</th>\n",
       "      <th>amenity_wheelchair_accessible</th>\n",
       "      <th>amenity_wide_clearance_to_shower</th>\n",
       "      <th>amenity_wide_doorway_to_guest_bathroom</th>\n",
       "      <th>amenity_wide_entrance</th>\n",
       "      <th>amenity_wide_entrance_for_guests</th>\n",
       "      <th>amenity_wide_entryway</th>\n",
       "      <th>amenity_wide_hallways</th>\n",
       "      <th>amenity_wifi</th>\n",
       "      <th>amenity_window_guards</th>\n",
       "      <th>amenity_wine_cooler</th>\n",
       "      <th>security_deposit</th>\n",
       "      <th>extra_people</th>\n",
       "      <th>yield</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>13913.jpg</td>\n",
       "      <td>54730</td>\n",
       "      <td>My bright double bedroom with a large window has a relaxed feeling! It comfortably fits one or t...</td>\n",
       "      <td>4.0</td>\n",
       "      <td>f</td>\n",
       "      <td>Islington</td>\n",
       "      <td>51.56802</td>\n",
       "      <td>-0.11121</td>\n",
       "      <td>t</td>\n",
       "      <td>apartment</td>\n",
       "      <td>private_room</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>f</td>\n",
       "      <td>moderate</td>\n",
       "      <td>1</td>\n",
       "      <td>female</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>100.0</td>\n",
       "      <td>15.0</td>\n",
       "      <td>12.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15400.jpg</td>\n",
       "      <td>60302</td>\n",
       "      <td>Lots of windows and light.  St Luke's Gardens are at the end of the block, and the river not too...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>t</td>\n",
       "      <td>Kensington and Chelsea</td>\n",
       "      <td>51.48796</td>\n",
       "      <td>-0.16898</td>\n",
       "      <td>t</td>\n",
       "      <td>apartment</td>\n",
       "      <td>entire_home/apt</td>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>f</td>\n",
       "      <td>strict_14_with_grace_period</td>\n",
       "      <td>1</td>\n",
       "      <td>female</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>150.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>109.50</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>17402.jpg</td>\n",
       "      <td>67564</td>\n",
       "      <td>Open from June 2018 after a 3-year break, we are delighted to be welcoming guests again to this ...</td>\n",
       "      <td>19.0</td>\n",
       "      <td>t</td>\n",
       "      <td>Westminster</td>\n",
       "      <td>51.52098</td>\n",
       "      <td>-0.14002</td>\n",
       "      <td>t</td>\n",
       "      <td>apartment</td>\n",
       "      <td>entire_home/apt</td>\n",
       "      <td>6</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>t</td>\n",
       "      <td>strict_14_with_grace_period</td>\n",
       "      <td>1</td>\n",
       "      <td>female</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>350.0</td>\n",
       "      <td>10.0</td>\n",
       "      <td>149.65</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>24328.jpg</td>\n",
       "      <td>41759</td>\n",
       "      <td>Artist house, bright high ceiling rooms, private parking and a communal garden in a conservation...</td>\n",
       "      <td>2.0</td>\n",
       "      <td>t</td>\n",
       "      <td>Wandsworth</td>\n",
       "      <td>51.47298</td>\n",
       "      <td>-0.16376</td>\n",
       "      <td>t</td>\n",
       "      <td>other</td>\n",
       "      <td>entire_home/apt</td>\n",
       "      <td>2</td>\n",
       "      <td>1.5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "      <td>30</td>\n",
       "      <td>f</td>\n",
       "      <td>moderate</td>\n",
       "      <td>1</td>\n",
       "      <td>male</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>215.60</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>25023.jpg</td>\n",
       "      <td>102813</td>\n",
       "      <td>Large, all comforts, 2-bed flat; first floor; lift; pretty communal gardens + off-street parking...</td>\n",
       "      <td>1.0</td>\n",
       "      <td>f</td>\n",
       "      <td>Wandsworth</td>\n",
       "      <td>51.44687</td>\n",
       "      <td>-0.21874</td>\n",
       "      <td>t</td>\n",
       "      <td>apartment</td>\n",
       "      <td>entire_home/apt</td>\n",
       "      <td>4</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>f</td>\n",
       "      <td>moderate</td>\n",
       "      <td>1</td>\n",
       "      <td>female</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>250.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>79.35</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 223 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "          id  host_id  \\\n",
       "0  13913.jpg    54730   \n",
       "1  15400.jpg    60302   \n",
       "2  17402.jpg    67564   \n",
       "3  24328.jpg    41759   \n",
       "4  25023.jpg   102813   \n",
       "\n",
       "                                                                                           description  \\\n",
       "0  My bright double bedroom with a large window has a relaxed feeling! It comfortably fits one or t...   \n",
       "1  Lots of windows and light.  St Luke's Gardens are at the end of the block, and the river not too...   \n",
       "2  Open from June 2018 after a 3-year break, we are delighted to be welcoming guests again to this ...   \n",
       "3  Artist house, bright high ceiling rooms, private parking and a communal garden in a conservation...   \n",
       "4  Large, all comforts, 2-bed flat; first floor; lift; pretty communal gardens + off-street parking...   \n",
       "\n",
       "   host_listings_count host_identity_verified  neighbourhood_cleansed  \\\n",
       "0                  4.0                      f               Islington   \n",
       "1                  1.0                      t  Kensington and Chelsea   \n",
       "2                 19.0                      t             Westminster   \n",
       "3                  2.0                      t              Wandsworth   \n",
       "4                  1.0                      f              Wandsworth   \n",
       "\n",
       "   latitude  longitude is_location_exact property_type        room_type  \\\n",
       "0  51.56802   -0.11121                 t     apartment     private_room   \n",
       "1  51.48796   -0.16898                 t     apartment  entire_home/apt   \n",
       "2  51.52098   -0.14002                 t     apartment  entire_home/apt   \n",
       "3  51.47298   -0.16376                 t         other  entire_home/apt   \n",
       "4  51.44687   -0.21874                 t     apartment  entire_home/apt   \n",
       "\n",
       "   accommodates  bathrooms  bedrooms  beds  guests_included  minimum_nights  \\\n",
       "0             2        1.0       1.0   0.0                1               1   \n",
       "1             2        1.0       1.0   1.0                2               3   \n",
       "2             6        2.0       3.0   3.0                4               3   \n",
       "3             2        1.5       1.0   1.0                2              30   \n",
       "4             4        1.0       2.0   2.0                2               4   \n",
       "\n",
       "  instant_bookable          cancellation_policy  has_house_rules host_gender  \\\n",
       "0                f                     moderate                1      female   \n",
       "1                f  strict_14_with_grace_period                1      female   \n",
       "2                t  strict_14_with_grace_period                1      female   \n",
       "3                f                     moderate                1        male   \n",
       "4                f                     moderate                1      female   \n",
       "\n",
       "   accommodates_catg  guests_included_catg  minimum_nights_catg  \\\n",
       "0                  2                     1                    1   \n",
       "1                  2                     2                    3   \n",
       "2                  3                     3                    3   \n",
       "3                  2                     2                    3   \n",
       "4                  3                     2                    3   \n",
       "\n",
       "   host_listings_count_catg  bathrooms_catg  bedrooms_catg  beds_catg  \\\n",
       "0                         3               1              1          0   \n",
       "1                         1               1              1          1   \n",
       "2                         3               2              3          3   \n",
       "3                         2               2              1          1   \n",
       "4                         1               1              2          2   \n",
       "\n",
       "   amenity_24-hour_check-in  amenity__toilet  amenity_accessible-height_bed  \\\n",
       "0                         0                0                              1   \n",
       "1                         1                0                              0   \n",
       "2                         0                0                              0   \n",
       "3                         1                0                              0   \n",
       "4                         0                0                              0   \n",
       "\n",
       "   amenity_accessible-height_toilet  amenity_air_conditioning  \\\n",
       "0                                 1                         0   \n",
       "1                                 0                         1   \n",
       "2                                 0                         0   \n",
       "3                                 0                         0   \n",
       "4                                 0                         0   \n",
       "\n",
       "   amenity_air_purifier  amenity_alfresco_bathtub  amenity_amazon_echo  \\\n",
       "0                     0                         0                    0   \n",
       "1                     0                         0                    0   \n",
       "2                     0                         0                    0   \n",
       "3                     0                         0                    0   \n",
       "4                     0                         0                    0   \n",
       "\n",
       "   amenity_baby_bath  amenity_baby_monitor  \\\n",
       "0                  0                     0   \n",
       "1                  0                     0   \n",
       "2                  0                     0   \n",
       "3                  0                     0   \n",
       "4                  0                     0   \n",
       "\n",
       "   amenity_babysitter_recommendations  amenity_balcony  amenity_bath_towel  \\\n",
       "0                                   1                0                   0   \n",
       "1                                   0                0                   0   \n",
       "2                                   0                0                   0   \n",
       "3                                   0                0                   0   \n",
       "4                                   0                0                   0   \n",
       "\n",
       "   amenity_bathroom_essentials  amenity_bathtub  \\\n",
       "0                            0                1   \n",
       "1                            0                0   \n",
       "2                            0                0   \n",
       "3                            0                0   \n",
       "4                            0                0   \n",
       "\n",
       "   amenity_bathtub_with_bath_chair  amenity_bbq_grill  \\\n",
       "0                                1                  0   \n",
       "1                                0                  0   \n",
       "2                                0                  0   \n",
       "3                                0                  0   \n",
       "4                                0                  0   \n",
       "\n",
       "   amenity_beach_essentials  amenity_beach_view  amenity_beachfront  \\\n",
       "0                         0                   0                   0   \n",
       "1                         0                   0                   0   \n",
       "2                         0                   0                   0   \n",
       "3                         0                   0                   0   \n",
       "4                         0                   0                   0   \n",
       "\n",
       "   amenity_bed_linens  amenity_bedroom_comforts  ...  amenity_roll-in_shower  \\\n",
       "0                   1                         0  ...                       1   \n",
       "1                   0                         0  ...                       0   \n",
       "2                   1                         0  ...                       0   \n",
       "3                   0                         0  ...                       0   \n",
       "4                   0                         0  ...                       0   \n",
       "\n",
       "   amenity_room-darkening_shades  amenity_safety_card  amenity_sauna  \\\n",
       "0                              1                    0              0   \n",
       "1                              0                    0              0   \n",
       "2                              0                    0              0   \n",
       "3                              0                    0              0   \n",
       "4                              0                    0              0   \n",
       "\n",
       "   amenity_self_check-in  amenity_shampoo  amenity_shared_gym  \\\n",
       "0                      0                1                   0   \n",
       "1                      0                1                   0   \n",
       "2                      1                1                   0   \n",
       "3                      1                1                   0   \n",
       "4                      0                0                   0   \n",
       "\n",
       "   amenity_shared_hot_tub  amenity_shared_pool  amenity_shower_chair  \\\n",
       "0                       0                    0                     0   \n",
       "1                       0                    0                     0   \n",
       "2                       0                    0                     0   \n",
       "3                       0                    0                     0   \n",
       "4                       0                    0                     0   \n",
       "\n",
       "   amenity_single_level_home  amenity_ski-in_ski-out  amenity_smart_lock  \\\n",
       "0                          0                       0                   0   \n",
       "1                          0                       0                   0   \n",
       "2                          0                       0                   0   \n",
       "3                          0                       0                   0   \n",
       "4                          0                       0                   0   \n",
       "\n",
       "   amenity_smart_tv  amenity_smoke_detector  amenity_smoking_allowed  \\\n",
       "0                 0                       1                        1   \n",
       "1                 0                       1                        0   \n",
       "2                 0                       1                        0   \n",
       "3                 0                       1                        0   \n",
       "4                 0                       1                        0   \n",
       "\n",
       "   amenity_soaking_tub  amenity_sound_system  amenity_stair_gates  \\\n",
       "0                    0                     0                    0   \n",
       "1                    0                     0                    0   \n",
       "2                    0                     0                    0   \n",
       "3                    0                     0                    0   \n",
       "4                    0                     0                    0   \n",
       "\n",
       "   amenity_stand_alone_steam_shower  amenity_standing_valet  \\\n",
       "0                                 0                       0   \n",
       "1                                 0                       0   \n",
       "2                                 0                       0   \n",
       "3                                 0                       0   \n",
       "4                                 0                       0   \n",
       "\n",
       "   amenity_steam_oven  amenity_stove  amenity_suitable_for_events  \\\n",
       "0                   0              1                            0   \n",
       "1                   0              0                            0   \n",
       "2                   0              1                            0   \n",
       "3                   0              0                            0   \n",
       "4                   0              0                            0   \n",
       "\n",
       "   amenity_sun_loungers  amenity_table_corner_guards  amenity_tennis_court  \\\n",
       "0                     0                            0                     0   \n",
       "1                     0                            0                     0   \n",
       "2                     0                            0                     0   \n",
       "3                     0                            0                     0   \n",
       "4                     0                            0                     0   \n",
       "\n",
       "   amenity_terrace  amenity_toilet_paper  amenity_touchless_faucets  \\\n",
       "0                0                     0                          0   \n",
       "1                0                     0                          0   \n",
       "2                0                     0                          0   \n",
       "3                0                     0                          0   \n",
       "4                0                     0                          0   \n",
       "\n",
       "   amenity_tv  amenity_walk-in_shower  amenity_warming_drawer  amenity_washer  \\\n",
       "0           1                       0                       0               1   \n",
       "1           1                       0                       0               1   \n",
       "2           1                       0                       0               1   \n",
       "3           1                       0                       0               1   \n",
       "4           1                       0                       0               1   \n",
       "\n",
       "   amenity_washer_dryer  amenity_waterfront  \\\n",
       "0                     0                   0   \n",
       "1                     0                   0   \n",
       "2                     0                   0   \n",
       "3                     0                   0   \n",
       "4                     0                   0   \n",
       "\n",
       "   amenity_well-lit_path_to_entrance  amenity_wheelchair_accessible  \\\n",
       "0                                  0                              0   \n",
       "1                                  0                              0   \n",
       "2                                  0                              0   \n",
       "3                                  0                              0   \n",
       "4                                  0                              0   \n",
       "\n",
       "   amenity_wide_clearance_to_shower  amenity_wide_doorway_to_guest_bathroom  \\\n",
       "0                                 0                                       1   \n",
       "1                                 0                                       0   \n",
       "2                                 0                                       0   \n",
       "3                                 0                                       0   \n",
       "4                                 0                                       0   \n",
       "\n",
       "   amenity_wide_entrance  amenity_wide_entrance_for_guests  \\\n",
       "0                      1                                 0   \n",
       "1                      0                                 0   \n",
       "2                      0                                 0   \n",
       "3                      0                                 0   \n",
       "4                      0                                 0   \n",
       "\n",
       "   amenity_wide_entryway  amenity_wide_hallways  amenity_wifi  \\\n",
       "0                      0                      0             1   \n",
       "1                      0                      0             1   \n",
       "2                      0                      0             1   \n",
       "3                      0                      0             1   \n",
       "4                      0                      0             1   \n",
       "\n",
       "   amenity_window_guards  amenity_wine_cooler  security_deposit  extra_people  \\\n",
       "0                      0                    0             100.0          15.0   \n",
       "1                      0                    0             150.0           0.0   \n",
       "2                      0                    0             350.0          10.0   \n",
       "3                      0                    0             250.0           0.0   \n",
       "4                      0                    0             250.0          11.0   \n",
       "\n",
       "    yield  \n",
       "0   12.00  \n",
       "1  109.50  \n",
       "2  149.65  \n",
       "3  215.60  \n",
       "4   79.35  \n",
       "\n",
       "[5 rows x 223 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('data/airbnb/airbnb_sample.csv')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "# There are a number of columns that are already binary. Therefore, no need to one hot encode them\n",
    "crossed_cols = [('property_type', 'room_type')]\n",
    "already_dummies = [c for c in df.columns if 'amenity' in c] + ['has_house_rules']\n",
    "wide_cols = ['is_location_exact', 'property_type', 'room_type', 'host_gender',\n",
    "'instant_bookable'] + already_dummies\n",
    "cat_embed_cols = [(c, 16) for c in df.columns if 'catg' in c] + \\\n",
    "    [('neighbourhood_cleansed', 64), ('cancellation_policy', 16)]\n",
    "continuous_cols = ['latitude', 'longitude', 'security_deposit', 'extra_people']\n",
    "# it does not make sense to standarised Latitude and Longitude\n",
    "already_standard = ['latitude', 'longitude']\n",
    "# text and image colnames\n",
    "text_col = 'description'\n",
    "img_col = 'id'\n",
    "# path to pretrained word embeddings and the images\n",
    "word_vectors_path = 'data/glove.6B/glove.6B.100d.txt'\n",
    "img_path = 'data/airbnb/property_picture'\n",
    "# target\n",
    "target_col = 'yield'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "target = df[target_col].values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The vocabulary contains 2192 tokens\n",
      "Indexing word vectors...\n",
      "Loaded 400000 word vectors\n",
      "Preparing embeddings matrix...\n",
      "2175 words in the vocabulary had data/glove.6B/glove.6B.100d.txt vectors and appear more than 5 times\n",
      "Reading Images from data/airbnb/property_picture\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "  4%|▍         | 42/1001 [00:00<00:02, 414.31it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Resizing\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 1001/1001 [00:02<00:00, 387.88it/s]\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Computing normalisation metrics\n"
     ]
    }
   ],
   "source": [
    "wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\n",
    "X_wide = wide_preprocessor.fit_transform(df)\n",
    "\n",
    "tab_preprocessor = TabPreprocessor(embed_cols=cat_embed_cols, continuous_cols=continuous_cols)\n",
    "X_tab = tab_preprocessor.fit_transform(df)\n",
    "\n",
    "text_preprocessor = TextPreprocessor(word_vectors_path=word_vectors_path, text_col=text_col)\n",
    "X_text = text_preprocessor.fit_transform(df)\n",
    "\n",
    "image_processor = ImagePreprocessor(img_col = img_col, img_path = img_path)\n",
    "X_images = image_processor.fit_transform(df)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now we are ready to build a wide and deep model. Three of the four components we will use are included in this package, and they will be combined with a custom `deeptext` component. Then the fit process will run with a custom loss function.\n",
    "\n",
    "Let's have a look"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Linear model\n",
    "wide = Wide(wide_dim=np.unique(X_wide).shape[0], pred_dim=1)\n",
    "\n",
    "# DeepDense: 2 Dense layers\n",
    "deeptabular = TabMlp(\n",
    "    column_idx = tab_preprocessor.column_idx,\n",
    "    mlp_hidden_dims=[128,64],\n",
    "    mlp_dropout = 0.1,\n",
    "    mlp_batchnorm = True,\n",
    "    embed_input=tab_preprocessor.embeddings_input,\n",
    "    embed_dropout = 0.1,\n",
    "    continuous_cols = continuous_cols,\n",
    "    batchnorm_cont = True\n",
    ")\n",
    "   \n",
    "# Pretrained Resnet 18 (default is all but last 2 conv blocks frozen) plus a FC-Head 512->256->128\n",
    "deepimage = DeepImage(pretrained=True, head_hidden_dims=[512, 256, 128])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Custom `deeptext`\n",
    "\n",
    "Standard Pytorch model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MyDeepText(nn.Module):\n",
    "    def __init__(self, vocab_size, padding_idx=1, embed_dim=100, hidden_dim=64):\n",
    "        super(MyDeepText, self).__init__()\n",
    "\n",
    "        # word/token embeddings\n",
    "        self.word_embed = nn.Embedding(\n",
    "            vocab_size, embed_dim, padding_idx=padding_idx\n",
    "        )\n",
    "\n",
    "        # stack of RNNs\n",
    "        self.rnn = nn.GRU(\n",
    "            embed_dim,\n",
    "            hidden_dim,\n",
    "            num_layers=2,\n",
    "            bidirectional=True,\n",
    "            batch_first=True,\n",
    "        )\n",
    "\n",
    "        # Remember, this must be defined. If not WideDeep will through an error\n",
    "        self.output_dim = hidden_dim * 2\n",
    "\n",
    "    def forward(self, X):\n",
    "        embed = self.word_embed(X.long())\n",
    "        o, h = self.rnn(embed)\n",
    "        return torch.cat((h[-2], h[-1]), dim=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "mydeeptext = MyDeepText(vocab_size=len(text_preprocessor.vocab.itos))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = WideDeep(wide=wide, deeptabular=deeptabular, deeptext=mydeeptext, deepimage=deepimage)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Custom loss function\n",
    "\n",
    "Loss functions must simply inherit pytorch's `nn.Module`. For example, let's say we want to use `RMSE` (note that this is already available in the package, but I will pass it here as a custom loss for illustration purposes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "class RMSELoss(nn.Module):\n",
    "    def __init__(self):\n",
    "        \"\"\"root mean squared error\"\"\"\n",
    "        super().__init__()\n",
    "        self.mse = nn.MSELoss()\n",
    "\n",
    "    def forward(self, input: Tensor, target: Tensor) -> Tensor:\n",
    "        return torch.sqrt(self.mse(input, target))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "and now we just instantiate the ``Trainer`` as usual. Needless to say, but this runs with 1000 random observations, so loss and metric values are meaningless. This is just an example"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "trainer = Trainer(model, objective='regression', custom_loss_function=RMSELoss())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "epoch 1: 100%|██████████| 25/25 [02:13<00:00,  5.33s/it, loss=118]\n",
      "valid: 100%|██████████| 7/7 [00:15<00:00,  2.23s/it, loss=101] \n"
     ]
    }
   ],
   "source": [
    "trainer.fit(X_wide=X_wide, X_tab=X_tab, X_text=X_text, X_img=X_images,\n",
    "    target=target, n_epochs=1, batch_size=32, val_split=0.2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In addition to model components and loss functions, we can also use custom callbacks or custom metrics. The former need to be of type `Callback` and the latter need to be of type `Metric`. See:\n",
    "\n",
    "```python\n",
    "pytorch-widedeep.callbacks\n",
    "```\n",
    "and \n",
    "\n",
    "```python\n",
    "pytorch-widedeep.metrics\n",
    "```\n",
    "\n",
    "For this example let me use the adult dataset. Again, we first prepare the data as usual"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education</th>\n",
       "      <th>educational-num</th>\n",
       "      <th>marital-status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>gender</th>\n",
       "      <th>capital-gain</th>\n",
       "      <th>capital-loss</th>\n",
       "      <th>hours-per-week</th>\n",
       "      <th>native-country</th>\n",
       "      <th>income</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>25</td>\n",
       "      <td>Private</td>\n",
       "      <td>226802</td>\n",
       "      <td>11th</td>\n",
       "      <td>7</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Machine-op-inspct</td>\n",
       "      <td>Own-child</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38</td>\n",
       "      <td>Private</td>\n",
       "      <td>89814</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Farming-fishing</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>50</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>28</td>\n",
       "      <td>Local-gov</td>\n",
       "      <td>336951</td>\n",
       "      <td>Assoc-acdm</td>\n",
       "      <td>12</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Protective-serv</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>44</td>\n",
       "      <td>Private</td>\n",
       "      <td>160323</td>\n",
       "      <td>Some-college</td>\n",
       "      <td>10</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Machine-op-inspct</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>7688</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&gt;50K</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>18</td>\n",
       "      <td>?</td>\n",
       "      <td>103497</td>\n",
       "      <td>Some-college</td>\n",
       "      <td>10</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>?</td>\n",
       "      <td>Own-child</td>\n",
       "      <td>White</td>\n",
       "      <td>Female</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>30</td>\n",
       "      <td>United-States</td>\n",
       "      <td>&lt;=50K</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age  workclass  fnlwgt     education  educational-num      marital-status  \\\n",
       "0   25    Private  226802          11th                7       Never-married   \n",
       "1   38    Private   89814       HS-grad                9  Married-civ-spouse   \n",
       "2   28  Local-gov  336951    Assoc-acdm               12  Married-civ-spouse   \n",
       "3   44    Private  160323  Some-college               10  Married-civ-spouse   \n",
       "4   18          ?  103497  Some-college               10       Never-married   \n",
       "\n",
       "          occupation relationship   race  gender  capital-gain  capital-loss  \\\n",
       "0  Machine-op-inspct    Own-child  Black    Male             0             0   \n",
       "1    Farming-fishing      Husband  White    Male             0             0   \n",
       "2    Protective-serv      Husband  White    Male             0             0   \n",
       "3  Machine-op-inspct      Husband  Black    Male          7688             0   \n",
       "4                  ?    Own-child  White  Female             0             0   \n",
       "\n",
       "   hours-per-week native-country income  \n",
       "0              40  United-States  <=50K  \n",
       "1              50  United-States  <=50K  \n",
       "2              40  United-States   >50K  \n",
       "3              40  United-States   >50K  \n",
       "4              30  United-States  <=50K  "
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv('data/adult/adult.csv.zip')\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>age</th>\n",
       "      <th>workclass</th>\n",
       "      <th>fnlwgt</th>\n",
       "      <th>education</th>\n",
       "      <th>educational_num</th>\n",
       "      <th>marital_status</th>\n",
       "      <th>occupation</th>\n",
       "      <th>relationship</th>\n",
       "      <th>race</th>\n",
       "      <th>gender</th>\n",
       "      <th>capital_gain</th>\n",
       "      <th>capital_loss</th>\n",
       "      <th>hours_per_week</th>\n",
       "      <th>native_country</th>\n",
       "      <th>income_label</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>25</td>\n",
       "      <td>Private</td>\n",
       "      <td>226802</td>\n",
       "      <td>11th</td>\n",
       "      <td>7</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>Machine-op-inspct</td>\n",
       "      <td>Own-child</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>38</td>\n",
       "      <td>Private</td>\n",
       "      <td>89814</td>\n",
       "      <td>HS-grad</td>\n",
       "      <td>9</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Farming-fishing</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>50</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>28</td>\n",
       "      <td>Local-gov</td>\n",
       "      <td>336951</td>\n",
       "      <td>Assoc-acdm</td>\n",
       "      <td>12</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Protective-serv</td>\n",
       "      <td>Husband</td>\n",
       "      <td>White</td>\n",
       "      <td>Male</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>44</td>\n",
       "      <td>Private</td>\n",
       "      <td>160323</td>\n",
       "      <td>Some-college</td>\n",
       "      <td>10</td>\n",
       "      <td>Married-civ-spouse</td>\n",
       "      <td>Machine-op-inspct</td>\n",
       "      <td>Husband</td>\n",
       "      <td>Black</td>\n",
       "      <td>Male</td>\n",
       "      <td>7688</td>\n",
       "      <td>0</td>\n",
       "      <td>40</td>\n",
       "      <td>United-States</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>18</td>\n",
       "      <td>?</td>\n",
       "      <td>103497</td>\n",
       "      <td>Some-college</td>\n",
       "      <td>10</td>\n",
       "      <td>Never-married</td>\n",
       "      <td>?</td>\n",
       "      <td>Own-child</td>\n",
       "      <td>White</td>\n",
       "      <td>Female</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>30</td>\n",
       "      <td>United-States</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   age  workclass  fnlwgt     education  educational_num      marital_status  \\\n",
       "0   25    Private  226802          11th                7       Never-married   \n",
       "1   38    Private   89814       HS-grad                9  Married-civ-spouse   \n",
       "2   28  Local-gov  336951    Assoc-acdm               12  Married-civ-spouse   \n",
       "3   44    Private  160323  Some-college               10  Married-civ-spouse   \n",
       "4   18          ?  103497  Some-college               10       Never-married   \n",
       "\n",
       "          occupation relationship   race  gender  capital_gain  capital_loss  \\\n",
       "0  Machine-op-inspct    Own-child  Black    Male             0             0   \n",
       "1    Farming-fishing      Husband  White    Male             0             0   \n",
       "2    Protective-serv      Husband  White    Male             0             0   \n",
       "3  Machine-op-inspct      Husband  Black    Male          7688             0   \n",
       "4                  ?    Own-child  White  Female             0             0   \n",
       "\n",
       "   hours_per_week native_country  income_label  \n",
       "0              40  United-States             0  \n",
       "1              50  United-States             0  \n",
       "2              40  United-States             1  \n",
       "3              40  United-States             1  \n",
       "4              30  United-States             0  "
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# For convenience, we'll replace '-' with '_'\n",
    "df.columns = [c.replace(\"-\", \"_\") for c in df.columns]\n",
    "# binary target\n",
    "df['income_label'] = (df[\"income\"].apply(lambda x: \">50K\" in x)).astype(int)\n",
    "df.drop('income', axis=1, inplace=True)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "wide_cols = ['education', 'relationship','workclass','occupation','native_country','gender']\n",
    "crossed_cols = [('education', 'occupation'), ('native_country', 'occupation')]\n",
    "cat_embed_cols = [('education',16), ('relationship',8), ('workclass',16), ('occupation',16),('native_country',16)]\n",
    "continuous_cols = [\"age\",\"hours_per_week\"]\n",
    "target_col = 'income_label'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# TARGET\n",
    "target = df[target_col].values\n",
    "\n",
    "# wide\n",
    "wide_preprocessor = WidePreprocessor(wide_cols=wide_cols, crossed_cols=crossed_cols)\n",
    "X_wide = wide_preprocessor.fit_transform(df)\n",
    "\n",
    "# deeptabular\n",
    "tab_preprocessor = TabPreprocessor(embed_cols=cat_embed_cols, continuous_cols=continuous_cols)\n",
    "X_tab = tab_preprocessor.fit_transform(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "wide = Wide(wide_dim=np.unique(X_wide).shape[0], pred_dim=1)\n",
    "deeptabular = TabMlp(mlp_hidden_dims=[64,32], \n",
    "                      column_idx=tab_preprocessor.column_idx,\n",
    "                      embed_input=tab_preprocessor.embeddings_input,\n",
    "                      continuous_cols=continuous_cols)\n",
    "model = WideDeep(wide=wide, deeptabular=deeptabular)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Custom metric\n",
    "\n",
    "Let's say we want to use our own accuracy metric (again, this is already available in the package, but I will pass it here as a custom loss for illustration purposes). \n",
    "\n",
    "This could be done as:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "from pytorch_widedeep.metrics import Metric"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Accuracy(Metric):\n",
    "    def __init__(self, top_k: int = 1):\n",
    "        super(Accuracy, self).__init__()\n",
    "\n",
    "        self.top_k = top_k\n",
    "        self.correct_count = 0\n",
    "        self.total_count = 0\n",
    "\n",
    "        # metric name needs to be defined\n",
    "        self._name = \"acc\"\n",
    "\n",
    "    def reset(self):\n",
    "        self.correct_count = 0\n",
    "        self.total_count = 0\n",
    "\n",
    "    def __call__(self, y_pred: Tensor, y_true: Tensor) -> float:\n",
    "        num_classes = y_pred.size(1)\n",
    "\n",
    "        if num_classes == 1:\n",
    "            y_pred = y_pred.round()\n",
    "            y_true = y_true\n",
    "        elif num_classes > 1:\n",
    "            y_pred = y_pred.topk(self.top_k, 1)[1]\n",
    "            y_true = y_true.view(-1, 1).expand_as(y_pred)\n",
    "\n",
    "        self.correct_count += y_pred.eq(y_true).sum().item()\n",
    "        self.total_count += len(y_pred)\n",
    "        accuracy = float(self.correct_count) / float(self.total_count)\n",
    "        return accuracy"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "###  Custom Callback\n",
    "\n",
    "Let's code a callback that records the current epoch at the beginning and the end of each epoch (silly, but you know, this is just an example)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "# have a look to the class\n",
    "from pytorch_widedeep.callbacks import Callback"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SillyCallback(Callback):\n",
    "    def on_train_begin(self, logs = None):\n",
    "        # recordings will be the trainer object attributes\n",
    "        self.trainer.silly_callback = {}\n",
    "\n",
    "        self.trainer.silly_callback['beginning'] = []\n",
    "        self.trainer.silly_callback['end'] = []\n",
    "\n",
    "    def on_epoch_begin(self, epoch, logs=None):\n",
    "        self.trainer.silly_callback['beginning'].append(epoch+1)\n",
    "\n",
    "    def on_epoch_end(self, epoch, logs=None):\n",
    "        self.trainer.silly_callback['end'].append(epoch+1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "and now, as usual:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "trainer = Trainer(model, objective='binary', metrics=[Accuracy], callbacks=[SillyCallback])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "epoch 1: 100%|██████████| 611/611 [00:06<00:00, 92.66it/s, loss=0.397, metrics={'acc': 0.8112}]\n",
      "valid: 100%|██████████| 153/153 [00:00<00:00, 163.83it/s, loss=0.364, metrics={'acc': 0.8154}]\n",
      "epoch 2: 100%|██████████| 611/611 [00:06<00:00, 93.55it/s, loss=0.363, metrics={'acc': 0.8289}]\n",
      "valid: 100%|██████████| 153/153 [00:00<00:00, 167.03it/s, loss=0.356, metrics={'acc': 0.8304}]\n",
      "epoch 3: 100%|██████████| 611/611 [00:06<00:00, 93.64it/s, loss=0.357, metrics={'acc': 0.8325}]\n",
      "valid: 100%|██████████| 153/153 [00:00<00:00, 164.14it/s, loss=0.35, metrics={'acc': 0.834}]  \n",
      "epoch 4: 100%|██████████| 611/611 [00:06<00:00, 92.59it/s, loss=0.352, metrics={'acc': 0.8347}]\n",
      "valid: 100%|██████████| 153/153 [00:00<00:00, 171.96it/s, loss=0.349, metrics={'acc': 0.8359}]\n",
      "epoch 5: 100%|██████████| 611/611 [00:06<00:00, 93.63it/s, loss=0.348, metrics={'acc': 0.8361}]\n",
      "valid: 100%|██████████| 153/153 [00:00<00:00, 162.69it/s, loss=0.347, metrics={'acc': 0.8372}]\n"
     ]
    }
   ],
   "source": [
    "trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=5, batch_size=64, val_split=0.2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'beginning': [1, 2, 3, 4, 5], 'end': [1, 2, 3, 4, 5]}"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "trainer.silly_callback"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}