code cleaned + added workaround for ¨ImportError: libGL.so.1: cannot open...

code cleaned + added workaround for ¨ImportError: libGL.so.1: cannot open shared object file: No such file or directory ``` apt-get update apt-get install ffmpeg libsm6 libxext6 -y ```¨

code cleaned + added workaround for ¨ImportError: libGL.so.1: cannot open...
code cleaned + added workaround for ¨ImportError: libGL.so.1: cannot open shared object file: No such file or directory ``` apt-get update apt-get install ffmpeg libsm6 libxext6 -y ```¨
04d40076 · Pavol Mulinka · ab150000 · 04d40076 · 04d40076 · 04d40076
6 changed file
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -44,6 +44,8 @@ jobs:
        python-version: ${{ matrix.python-version }}
    - name: Install dependencies
      run: |
+        apt-get update
+        apt-get install ffmpeg libsm6 libxext6  -y
        python -m pip install --upgrade pip
        python -m pip install pytest-cov codecov .
        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi

--- a/examples/12_HyperParameter_tuning_w_RayTune.ipynb
+++ b/examples/12_HyperParameter_tuning_w_RayTune.ipynb
@@ -30,7 +30,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -72,7 +72,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
@@ -631,7 +631,7 @@
       "4  0.68 -0.59  2.0 -36.0   -6.9  2.02  0.14 -0.23  "
      ]
     },
-     "execution_count": 6,
+     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -644,7 +644,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
@@ -655,7 +655,7 @@
       "Name: target, dtype: int64"
      ]
     },
-     "execution_count": 7,
+     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -667,7 +667,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -677,7 +677,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -694,7 +694,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -703,7 +703,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -729,7 +729,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -740,7 +740,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
@@ -787,7 +787,7 @@
       ")"
      ]
     },
-     "execution_count": 13,
+     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
@@ -802,7 +802,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -815,7 +815,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -828,19 +828,19 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
-       "== Status ==<br>Memory usage on this node: 2.1/12.2 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/6.69 GiB heap, 0.0/2.29 GiB objects<br>Result logdir: /home/palo/ray_results/_inner_2021-10-15_01-12-33<br>Number of trials: 2/2 (2 TERMINATED)<br><table>\n",
+       "== Status ==<br>Memory usage on this node: 3.8/12.2 GiB<br>Using FIFO scheduling algorithm.<br>Resources requested: 0/8 CPUs, 0/0 GPUs, 0.0/5.86 GiB heap, 0.0/2.0 GiB objects<br>Result logdir: /home/palo/ray_results/_inner_2021-10-17_19-06-33<br>Number of trials: 2/2 (2 TERMINATED)<br><table>\n",
       "<thead>\n",
       "<tr><th>Trial name        </th><th>status    </th><th>loc  </th><th style=\"text-align: right;\">  batch_size</th><th style=\"text-align: right;\">  iter</th><th style=\"text-align: right;\">  total time (s)</th></tr>\n",
       "</thead>\n",
       "<tbody>\n",
-       "<tr><td>_inner_367dc_00000</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">        1000</td><td style=\"text-align: right;\">     5</td><td style=\"text-align: right;\">         14.1115</td></tr>\n",
-       "<tr><td>_inner_367dc_00001</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">        5000</td><td style=\"text-align: right;\">     5</td><td style=\"text-align: right;\">         13.1224</td></tr>\n",
+       "<tr><td>_inner_94413_00000</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">        1000</td><td style=\"text-align: right;\">     5</td><td style=\"text-align: right;\">          16.337</td></tr>\n",
+       "<tr><td>_inner_94413_00001</td><td>TERMINATED</td><td>     </td><td style=\"text-align: right;\">        5000</td><td style=\"text-align: right;\">     5</td><td style=\"text-align: right;\">          15.021</td></tr>\n",
       "</tbody>\n",
       "</table><br><br>"
      ],
@@ -855,7 +855,7 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "2021-10-15 01:12:50,327\tINFO tune.py:448 -- Total run time: 16.66 seconds (16.61 seconds for the tuning loop).\n"
+      "2021-10-17 19:06:52,530\tINFO tune.py:448 -- Total run time: 21.26 seconds (19.21 seconds for the tuning loop).\n"
     ]
    }
   ],
@@ -882,7 +882,7 @@
    "                  initializers={'deeptabular': XavierNormal},\n",
    "                  optimizers={'deeptabular': deep_opt},\n",
    "                  metrics=[accuracy, precision, recall, f1],\n",
-    "                  verbose=0)\n",
+    "                  verbose=1)\n",
    "\n",
    "    trainer.fit(X_train=X_train,\n",
    "                X_val=X_val,\n",
@@ -900,7 +900,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
@@ -909,60 +909,38 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 14,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "\u001b[2m\u001b[36m(pid=14987)\u001b[0m sys:1: ResourceWarning: unclosed file <_io.TextIOWrapper name='/tmp/ray/session_2021-10-17_19-06-31_273508_14906/logs/worker-edb8ddf5edcb134690e06916577fafd5d2bb26af-01000000-14987.out' mode='a' encoding='utf-8'>\n",
+      "\u001b[2m\u001b[36m(pid=14987)\u001b[0m sys:1: ResourceWarning: unclosed file <_io.TextIOWrapper name='/tmp/ray/session_2021-10-17_19-06-31_273508_14906/logs/worker-edb8ddf5edcb134690e06916577fafd5d2bb26af-01000000-14987.err' mode='a' encoding='utf-8'>\n"
+     ]
+    }
+   ],
   "source": [
    "%load_ext tensorboard"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 16,
   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Reusing TensorBoard on port 6006 (pid 2541), started 7:22:22 ago. (Use '!kill 2541' to kill it.)"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/html": [
-       "\n",
-       "      <iframe id=\"tensorboard-frame-de12d04f9fd792fb\" width=\"100%\" height=\"800\" frameborder=\"0\">\n",
-       "      </iframe>\n",
-       "      <script>\n",
-       "        (function() {\n",
-       "          const frame = document.getElementById(\"tensorboard-frame-de12d04f9fd792fb\");\n",
-       "          const url = new URL(\"/\", window.location);\n",
-       "          url.port = 6006;\n",
-       "          frame.src = url;\n",
-       "        })();\n",
-       "      </script>\n",
-       "  "
-      ],
-      "text/plain": [
-       "<IPython.core.display.HTML object>"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
+   "outputs": [],
   "source": [
    "%tensorboard --logdir ~/ray_results"
   ]
  }
 ],
 "metadata": {
+  "interpreter": {
+   "hash": "3b99005fd577fa40f3cce433b2b92303885900e634b2b5344c07c59d06c8792d"
+  },
  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
+   "display_name": "Python 3.8.5 64-bit ('base': conda)",
   "name": "python3"
  },
  "language_info": {

--- a/pytorch_widedeep/callbacks.py
+++ b/pytorch_widedeep/callbacks.py
@@ -150,6 +150,7 @@ class History(Callback):
    This callback runs by default within :obj:`Trainer`, therefore, should not
    be passed to the :obj:`Trainer`. Is included here just for completion.
    """
+
    def on_train_begin(self, logs: Optional[Dict] = None):
        self.trainer.history = {}

@@ -158,11 +159,11 @@ class History(Callback):
    ):
        logs = logs or {}
        for k, v in logs.items():
-            if isinstance(v, np.ndarray):# or isinstance(v, list):
+            if isinstance(v, np.ndarray):
                v = v.tolist()
-            if isinstance(v, list) and len(v)>1:
+            if isinstance(v, list) and len(v) > 1:
                for i in range(len(v)):
-                    self.trainer.history.setdefault(k+'_'+str(i), []).append(v[i])
+                    self.trainer.history.setdefault(k + "_" + str(i), []).append(v[i])
            else:
                self.trainer.history.setdefault(k, []).append(v)

@@ -264,10 +265,9 @@ class LRHistory(Callback):
    >>> trainer = Trainer(model, objective="regression", callbacks=[LRHistory(n_epochs=10)])
    """

-    def __init__(self, n_epochs: int, ray_tune: bool = False):
+    def __init__(self, n_epochs: int):
        super(LRHistory, self).__init__()
        self.n_epochs = n_epochs
-        self.ray_tune = ray_tune

    def on_epoch_begin(self, epoch: int, logs: Optional[Dict] = None):
        if epoch == 0 and self.trainer.lr_scheduler is not None:
@@ -671,48 +671,15 @@ class EarlyStopping(Callback):

 class RayTuneReporter(Callback):
    r"""Callback that allows reporting history and lr_history values to RayTune for Hyperparameter tuning
-
-    Parameters
-    -----------
-
-    Attributes
-    ----------
-    
-    Examples
-    --------
-    >>> from pytorch_widedeep.callbacks import RayTuneReporter
-    >>> from pytorch_widedeep.models import TabMlp, Wide, WideDeep
-    >>> from pytorch_widedeep.training import Trainer
-    >>> from ray import tune
-    >>> import tracemalloc
-    >>> tracemalloc.start()
-    >>>
-    >>> config={"batch_size": tune.grid_search([1000, 5000]),}
-    >>> embed_input = [(u, i, j) for u, i, j in zip(["a", "b", "c"][:4], [4] * 3, [8] * 3)]
-    >>> column_idx = {k: v for v, k in enumerate(["a", "b", "c"])}
-    >>> wide = Wide(10, 1)
-    >>> deep = TabMlp(mlp_hidden_dims=[8, 4], column_idx=column_idx, embed_input=embed_input)
-    >>> model = WideDeep(wide, deep)
-    >>>
-    >>> def training_function(config, X_train, X_val):
-    >>>    batch_size = config["batch_size"]
-    >>>    trainer = Trainer(model, objective="regression", callbacks=[RayTuneReporter])
-    >>>    trainer.fit(X_train=X_train,
-    >>>                X_val=X_val,
-    >>>                n_epochs=5,
-    >>>                batch_size=batch_size)
-    >>> X_train = {"X_wide": X_wide_train, "X_tab": X_tab_train, "target": y_train}
-    >>> X_val = {"X_wide": X_wide_valid, "X_tab": X_tab_valid, "target": y_valid}
-    >>> analysis = tune.run(tune.with_parameters(training_function, X_train=X_train, X_val=X_val),
-    >>>                     config=config)
    """
+
    def on_epoch_end(
        self, epoch: int, logs: Optional[Dict] = None, metric: Optional[float] = None
    ):
        report_dict = {}
        for k, v in self.trainer.history.items():
            report_dict.update({k: v[-1]})
-        if hasattr(self.trainer, 'lr_history'):
+        if hasattr(self.trainer, "lr_history"):
            for k, v in self.trainer.lr_history.items():
                report_dict.update({k: v[-1]})
-        tune.report(report_dict)
\ No newline at end of file
+        tune.report(report_dict)
--- a/pytorch_widedeep/tab2vec.py
+++ b/pytorch_widedeep/tab2vec.py
@@ -126,8 +126,12 @@ class Tab2Vec:
        """
        return self

-    def transform(self, df: pd.DataFrame, new_embed_col_list: bool = False,
-        target_col: Optional[str] = None) -> Union[pd.DataFrame, Tuple[pd.DataFrame, pd.Series]]:
+    def transform(
+        self,
+        df: pd.DataFrame,
+        new_embed_col_list: bool = False,
+        target_col: Optional[str] = None,
+    ) -> Union[pd.DataFrame, Tuple[pd.DataFrame, pd.Series]]:
        r"""
        Parameters
        ----------
@@ -160,22 +164,36 @@ class Tab2Vec:

        col_names = list(self.tab_preprocessor.column_idx.keys())
        embed_col_names = []
-        for col, vec_size in tab_preprocessor.embed_cols:
-            embed_col_names_temp = [col+'_'+str(i) for i in range(vec_size)]
-            embed_col_names.extend(embed_col_names_temp)
-            col_names = list(chain.from_iterable(embed_col_names_temp if item == col
-                                                 else [item] for item in col_names))
-
-        if target_col:
-            if new_embed_col_list:
-                return pd.DataFrame(data=X_vec, columns=col_names), df[target_col], embed_col_names
+        if self.tab_preprocessor.for_transformer:
+            if target_col:
+                return pd.DataFrame(data=X_vec), df[target_col]
            else:
-                return pd.DataFrame(data=X_vec, columns=col_names), df[target_col]
+                return pd.DataFrame(data=X_vec)
        else:
-            if new_embed_col_list:
-                return pd.DataFrame(data=X_vec, columns=col_names), embed_col_names
+            for col, vec_size in self.tab_preprocessor.embed_cols:
+                embed_col_names_temp = [col + "_" + str(i) for i in range(vec_size)]
+                embed_col_names.extend(embed_col_names_temp)
+                col_names = list(
+                    chain.from_iterable(
+                        embed_col_names_temp if item == col else [item]
+                        for item in col_names
+                    )
+                )
+
+            if target_col:
+                if new_embed_col_list:
+                    return (
+                        pd.DataFrame(data=X_vec, columns=col_names),
+                        df[target_col],
+                        embed_col_names,
+                    )
+                else:
+                    return pd.DataFrame(data=X_vec, columns=col_names), df[target_col]
            else:
-                return pd.DataFrame(data=X_vec, columns=col_names)
+                if new_embed_col_list:
+                    return pd.DataFrame(data=X_vec, columns=col_names), embed_col_names
+                else:
+                    return pd.DataFrame(data=X_vec, columns=col_names)

    def fit_transform(
        self, df: pd.DataFrame, target_col: Optional[str] = None

--- a/pytorch_widedeep/training/trainer.py
+++ b/pytorch_widedeep/training/trainer.py
@@ -1242,7 +1242,7 @@ class Trainer:
        else:
            self.metric = None
        sorted_callbacks_list = deepcopy(self.callbacks)
-        for obj,i in zip(self.callbacks[::-1], range(len(self.callbacks))[::-1]):
+        for obj, i in zip(self.callbacks[::-1], range(len(self.callbacks))[::-1]):
            if isinstance(obj, RayTuneReporter):
                sorted_callbacks_list.append(sorted_callbacks_list.pop(i))
        self.callbacks = sorted_callbacks_list

--- a/setup.py
+++ b/setup.py
@@ -65,7 +65,7 @@ setup_kwargs = {
        "einops",
        "wrapt",
        "torchmetrics",
-        "ray",
+        "ray[tune]",
    ],
    "extras_require": extras,
    "python_requires": ">=3.7.0",