diff --git a/VERSION b/VERSION index 9256e2880f39736dac1d01176689dd74e1f03a56..97bceaaf6814ef96f3a00b04fba408b769949cec 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.0.13 \ No newline at end of file +1.0.14 \ No newline at end of file diff --git a/examples/15_AUC_multiclass.ipynb b/examples/15_AUC_multiclass.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..87a8d590760757c292e3d364a3e70e87e1b1c4a8 --- /dev/null +++ b/examples/15_AUC_multiclass.ipynb @@ -0,0 +1,447 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "731975e2", + "metadata": {}, + "source": [ + "# AUC multiclass computation" + ] + }, + { + "cell_type": "markdown", + "id": "ee745c58", + "metadata": {}, + "source": [ + "## Initial imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "fdab94eb", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2021-12-11 21:51:29.255591: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory\n", + "2021-12-11 21:51:29.255638: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "from torch.optim import SGD, lr_scheduler\n", + "\n", + "from pytorch_widedeep import Trainer\n", + "from pytorch_widedeep.preprocessing import TabPreprocessor\n", + "from pytorch_widedeep.models import TabMlp, WideDeep\n", + "from torchmetrics import AUC, AUROC\n", + "from pytorch_widedeep.initializers import XavierNormal\n", + "from pytorch_widedeep.datasets import load_ecoli\n", + "from pytorch_widedeep.utils import LabelEncoder\n", + "\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "# increase displayed columns in jupyter notebook\n", + "pd.set_option(\"display.max_columns\", 200)\n", + "pd.set_option(\"display.max_rows\", 300)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "07c75f0c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
SequenceNamemcggvhlipchgaacalm1alm2class
0AAT_ECOLI0.490.290.480.50.560.240.35cp
1ACEA_ECOLI0.070.400.480.50.540.350.44cp
2ACEK_ECOLI0.560.400.480.50.490.370.46cp
3ACKA_ECOLI0.590.490.480.50.520.450.36cp
4ADI_ECOLI0.230.320.480.50.550.250.35cp
\n", + "
" + ], + "text/plain": [ + " SequenceName mcg gvh lip chg aac alm1 alm2 class\n", + "0 AAT_ECOLI 0.49 0.29 0.48 0.5 0.56 0.24 0.35 cp\n", + "1 ACEA_ECOLI 0.07 0.40 0.48 0.5 0.54 0.35 0.44 cp\n", + "2 ACEK_ECOLI 0.56 0.40 0.48 0.5 0.49 0.37 0.46 cp\n", + "3 ACKA_ECOLI 0.59 0.49 0.48 0.5 0.52 0.45 0.36 cp\n", + "4 ADI_ECOLI 0.23 0.32 0.48 0.5 0.55 0.25 0.35 cp" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = load_ecoli(as_frame=True)\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "1e3f8efc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "cp 143\n", + "im 77\n", + "pp 52\n", + "imU 35\n", + "om 20\n", + "omL 5\n", + "imS 2\n", + "imL 2\n", + "Name: class, dtype: int64" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# imbalance of the classes\n", + "df[\"class\"].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "e4db0d6d", + "metadata": {}, + "outputs": [], + "source": [ + "df = df.loc[~df[\"class\"].isin([\"omL\", \"imS\", \"imL\"])]\n", + "df.reset_index(inplace=True, drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "005531a3", + "metadata": {}, + "outputs": [], + "source": [ + "encoder = LabelEncoder([\"class\"])\n", + "df_enc = encoder.fit_transform(df)\n", + "df_enc[\"class\"] = df_enc[\"class\"]-1" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "214b3071", + "metadata": {}, + "outputs": [], + "source": [ + "# drop columns we won't need in this example\n", + "df_enc = df_enc.drop(columns=[\"SequenceName\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "168c81f1", + "metadata": {}, + "outputs": [], + "source": [ + "df_train, df_valid = train_test_split(df_enc, test_size=0.2, stratify=df_enc[\"class\"], random_state=1)\n", + "df_valid, df_test = train_test_split(df_valid, test_size=0.5, stratify=df_valid[\"class\"], random_state=1)" + ] + }, + { + "cell_type": "markdown", + "id": "87e7b8f0", + "metadata": {}, + "source": [ + "## Preparing the data" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "3a7b246b", + "metadata": {}, + "outputs": [], + "source": [ + "continuous_cols = df_enc.drop(columns=[\"class\"]).columns.values.tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "7a2dac24", + "metadata": {}, + "outputs": [], + "source": [ + "# deeptabular\n", + "tab_preprocessor = TabPreprocessor(continuous_cols=continuous_cols, scale=True)\n", + "X_tab_train = tab_preprocessor.fit_transform(df_train)\n", + "X_tab_valid = tab_preprocessor.transform(df_valid)\n", + "X_tab_test = tab_preprocessor.transform(df_test)\n", + "\n", + "# target\n", + "y_train = df_train[\"class\"].values\n", + "y_valid = df_valid[\"class\"].values\n", + "y_test = df_test[\"class\"].values\n", + "\n", + "X_train = {\"X_tab\": X_tab_train, \"target\": y_train}\n", + "X_val = {\"X_tab\": X_tab_valid, \"target\": y_valid}" + ] + }, + { + "cell_type": "markdown", + "id": "7b9f63e2", + "metadata": {}, + "source": [ + "## Define the model" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "511198d4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "WideDeep(\n", + " (deeptabular): Sequential(\n", + " (0): TabMlp(\n", + " (cat_embed_and_cont): CatEmbeddingsAndCont(\n", + " (cont_norm): BatchNorm1d(7, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " )\n", + " (tab_mlp): MLP(\n", + " (mlp): Sequential(\n", + " (dense_layer_0): Sequential(\n", + " (0): Dropout(p=0.1, inplace=False)\n", + " (1): Linear(in_features=7, out_features=200, bias=True)\n", + " (2): ReLU(inplace=True)\n", + " )\n", + " (dense_layer_1): Sequential(\n", + " (0): Dropout(p=0.1, inplace=False)\n", + " (1): Linear(in_features=200, out_features=100, bias=True)\n", + " (2): ReLU(inplace=True)\n", + " )\n", + " )\n", + " )\n", + " )\n", + " (1): Linear(in_features=100, out_features=5, bias=True)\n", + " )\n", + ")" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "deeptabular = TabMlp(\n", + " column_idx=tab_preprocessor.column_idx,\n", + " continuous_cols=tab_preprocessor.continuous_cols,\n", + ")\n", + "model = WideDeep(deeptabular=deeptabular, pred_dim=df_enc[\"class\"].nunique())\n", + "model" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a5359b0f", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/palo/miniconda3/lib/python3.8/site-packages/torchmetrics/utilities/prints.py:36: UserWarning: Metric `AUROC` will save all targets and predictions in buffer. For large datasets this may lead to large memory footprint.\n", + " warnings.warn(*args, **kwargs)\n" + ] + } + ], + "source": [ + "auroc = AUROC(num_classes=df_enc[\"class\"].nunique())" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "34a18ac0", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "epoch 1: 100%|██████████| 6/6 [00:00<00:00, 84.27it/s, loss=0.111, metrics={'AUROC': 0.285}]\n", + "valid: 100%|██████████| 1/1 [00:00<00:00, 5.57it/s, loss=0.106, metrics={'AUROC': 0.3309}]\n", + "epoch 2: 100%|██████████| 6/6 [00:00<00:00, 111.92it/s, loss=0.106, metrics={'AUROC': 0.3124}]\n", + "valid: 100%|██████████| 1/1 [00:00<00:00, 4.99it/s, loss=0.102, metrics={'AUROC': 0.375}]\n", + "epoch 3: 100%|██████████| 6/6 [00:00<00:00, 109.51it/s, loss=0.102, metrics={'AUROC': 0.3459}]\n", + "valid: 100%|██████████| 1/1 [00:00<00:00, 6.70it/s, loss=0.0967, metrics={'AUROC': 0.4444}]\n", + "epoch 4: 100%|██████████| 6/6 [00:00<00:00, 106.40it/s, loss=0.0984, metrics={'AUROC': 0.3717}]\n", + "valid: 100%|██████████| 1/1 [00:00<00:00, 5.93it/s, loss=0.0963, metrics={'AUROC': 0.4516}]\n", + "epoch 5: 100%|██████████| 6/6 [00:00<00:00, 93.06it/s, loss=0.0975, metrics={'AUROC': 0.3877}]\n", + "valid: 100%|██████████| 1/1 [00:00<00:00, 5.98it/s, loss=0.0961, metrics={'AUROC': 0.4404}]\n" + ] + } + ], + "source": [ + "# Optimizers\n", + "deep_opt = SGD(model.deeptabular.parameters(), lr=0.1)\n", + "# LR Scheduler\n", + "deep_sch = lr_scheduler.StepLR(deep_opt, step_size=3)\n", + "# Hyperparameters\n", + "trainer = Trainer(\n", + " model,\n", + " objective=\"multiclass_focal_loss\",\n", + " lr_schedulers={\"deeptabular\": deep_sch},\n", + " initializers={\"deeptabular\": XavierNormal},\n", + " optimizers={\"deeptabular\": deep_opt},\n", + " metrics=[auroc],\n", + ")\n", + "\n", + "trainer.fit(X_train=X_train, X_val=X_val, n_epochs=5, batch_size=50)" + ] + } + ], + "metadata": { + "interpreter": { + "hash": "3b99005fd577fa40f3cce433b2b92303885900e634b2b5344c07c59d06c8792d" + }, + "kernelspec": { + "display_name": "Python 3.8.5 64-bit ('base': conda)", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pytorch_widedeep/datasets/__init__.py b/pytorch_widedeep/datasets/__init__.py index b4ba41cbc4028a92610058ac32b9d3bf3a94ba5f..00eca1e588d16c9007f29dc4e21f87223b3c9239 100644 --- a/pytorch_widedeep/datasets/__init__.py +++ b/pytorch_widedeep/datasets/__init__.py @@ -1,3 +1,3 @@ -from ._base import load_adult, load_bio_kdd04 +from ._base import load_adult, load_bio_kdd04, load_ecoli -__all__ = ["load_bio_kdd04", "load_adult"] +__all__ = ["load_bio_kdd04", "load_adult", "load_ecoli"] diff --git a/pytorch_widedeep/datasets/_base.py b/pytorch_widedeep/datasets/_base.py index 45e3e76a488b44069c16c5a1042dc444506ec984..e6ea8cc708ac4700291efca2d784a99849b848dc 100644 --- a/pytorch_widedeep/datasets/_base.py +++ b/pytorch_widedeep/datasets/_base.py @@ -5,7 +5,7 @@ import pandas as pd def load_bio_kdd04(as_frame: bool = False): """Load and return the higly imbalanced Protein Homology - Dataset from [KDD cup 2004](https://www.kdd.org/kdd-cup/view/kdd-cup-2004/Data. + Dataset from [KDD cup 2004](https://www.kdd.org/kdd-cup/view/kdd-cup-2004/Data). This datasets include only bio_train.dat part of the dataset @@ -46,3 +46,88 @@ def load_adult(as_frame: bool = False): return df else: return df.to_numpy() + + +def load_ecoli(as_frame: bool = False): + """Load and return the higly imbalanced multiclass classification e.coli dataset + Dataset from [UCI Machine learning Repository](https://archive.ics.uci.edu/ml/datasets/ecoli). + + + 1. Title: Protein Localization Sites + + 2. Creator and Maintainer: + Kenta Nakai + Institue of Molecular and Cellular Biology + Osaka, University + 1-3 Yamada-oka, Suita 565 Japan + nakai@imcb.osaka-u.ac.jp + http://www.imcb.osaka-u.ac.jp/nakai/psort.html + Donor: Paul Horton (paulh@cs.berkeley.edu) + Date: September, 1996 + See also: yeast database + + 3. Past Usage. + Reference: "A Probablistic Classification System for Predicting the Cellular + Localization Sites of Proteins", Paul Horton & Kenta Nakai, + Intelligent Systems in Molecular Biology, 109-115. + St. Louis, USA 1996. + Results: 81% for E.coli with an ad hoc structured + probability model. Also similar accuracy for Binary Decision Tree and + Bayesian Classifier methods applied by the same authors in + unpublished results. + + Predicted Attribute: Localization site of protein. ( non-numeric ). + + 4. The references below describe a predecessor to this dataset and its + development. They also give results (not cross-validated) for classification + by a rule-based expert system with that version of the dataset. + + Reference: "Expert Sytem for Predicting Protein Localization Sites in + Gram-Negative Bacteria", Kenta Nakai & Minoru Kanehisa, + PROTEINS: Structure, Function, and Genetics 11:95-110, 1991. + + Reference: "A Knowledge Base for Predicting Protein Localization Sites in + Eukaryotic Cells", Kenta Nakai & Minoru Kanehisa, + Genomics 14:897-911, 1992. + + 5. Number of Instances: 336 for the E.coli dataset and + + 6. Number of Attributes. + for E.coli dataset: 8 ( 7 predictive, 1 name ) + + 7. Attribute Information. + + 1. Sequence Name: Accession number for the SWISS-PROT database + 2. mcg: McGeoch's method for signal sequence recognition. + 3. gvh: von Heijne's method for signal sequence recognition. + 4. lip: von Heijne's Signal Peptidase II consensus sequence score. + Binary attribute. + 5. chg: Presence of charge on N-terminus of predicted lipoproteins. + Binary attribute. + 6. aac: score of discriminant analysis of the amino acid content of + outer membrane and periplasmic proteins. + 7. alm1: score of the ALOM membrane spanning region prediction program. + 8. alm2: score of ALOM program after excluding putative cleavable signal + regions from the sequence. + + 8. Missing Attribute Values: None. + + 9. Class Distribution. The class is the localization site. Please see Nakai & Kanehisa referenced above for more details. + + cp (cytoplasm) 143 + im (inner membrane without signal sequence) 77 + pp (perisplasm) 52 + imU (inner membrane, uncleavable signal sequence) 35 + om (outer membrane) 20 + omL (outer membrane lipoprotein) 5 + imL (inner membrane lipoprotein) 2 + imS (inner membrane, cleavable signal sequence) 2 + """ + + with resources.path("pytorch_widedeep.datasets.data", "ecoli.csv") as fpath: + df = pd.read_csv(fpath, sep=",") + + if as_frame: + return df + else: + return df.to_numpy() diff --git a/pytorch_widedeep/datasets/data/ecoli.csv b/pytorch_widedeep/datasets/data/ecoli.csv new file mode 100644 index 0000000000000000000000000000000000000000..4b91813be0598b5fd75c747bd94b4facb7eb28f7 --- /dev/null +++ b/pytorch_widedeep/datasets/data/ecoli.csv @@ -0,0 +1,337 @@ +SequenceName,mcg,gvh,lip,chg,aac,alm1,alm2,class +AAT_ECOLI,0.49,0.29,0.48,0.50,0.56,0.24,0.35,cp +ACEA_ECOLI,0.07,0.40,0.48,0.50,0.54,0.35,0.44,cp +ACEK_ECOLI,0.56,0.40,0.48,0.50,0.49,0.37,0.46,cp +ACKA_ECOLI,0.59,0.49,0.48,0.50,0.52,0.45,0.36,cp +ADI_ECOLI,0.23,0.32,0.48,0.50,0.55,0.25,0.35,cp +ALKH_ECOLI,0.67,0.39,0.48,0.50,0.36,0.38,0.46,cp +AMPD_ECOLI,0.29,0.28,0.48,0.50,0.44,0.23,0.34,cp +AMY2_ECOLI,0.21,0.34,0.48,0.50,0.51,0.28,0.39,cp +APT_ECOLI,0.20,0.44,0.48,0.50,0.46,0.51,0.57,cp +ARAC_ECOLI,0.42,0.40,0.48,0.50,0.56,0.18,0.30,cp +ASG1_ECOLI,0.42,0.24,0.48,0.50,0.57,0.27,0.37,cp +BTUR_ECOLI,0.25,0.48,0.48,0.50,0.44,0.17,0.29,cp +CAFA_ECOLI,0.39,0.32,0.48,0.50,0.46,0.24,0.35,cp +CAIB_ECOLI,0.51,0.50,0.48,0.50,0.46,0.32,0.35,cp +CFA_ECOLI,0.22,0.43,0.48,0.50,0.48,0.16,0.28,cp +CHEA_ECOLI,0.25,0.40,0.48,0.50,0.46,0.44,0.52,cp +CHEB_ECOLI,0.34,0.45,0.48,0.50,0.38,0.24,0.35,cp +CHEW_ECOLI,0.44,0.27,0.48,0.50,0.55,0.52,0.58,cp +CHEY_ECOLI,0.23,0.40,0.48,0.50,0.39,0.28,0.38,cp +CHEZ_ECOLI,0.41,0.57,0.48,0.50,0.39,0.21,0.32,cp +CRL_ECOLI,0.40,0.45,0.48,0.50,0.38,0.22,0.00,cp +CSPA_ECOLI,0.31,0.23,0.48,0.50,0.73,0.05,0.14,cp +CYNR_ECOLI,0.51,0.54,0.48,0.50,0.41,0.34,0.43,cp +CYPB_ECOLI,0.30,0.16,0.48,0.50,0.56,0.11,0.23,cp +CYPC_ECOLI,0.36,0.39,0.48,0.50,0.48,0.22,0.23,cp +CYSB_ECOLI,0.29,0.37,0.48,0.50,0.48,0.44,0.52,cp +CYSE_ECOLI,0.25,0.40,0.48,0.50,0.47,0.33,0.42,cp +DAPD_ECOLI,0.21,0.51,0.48,0.50,0.50,0.32,0.41,cp +DCP_ECOLI,0.43,0.37,0.48,0.50,0.53,0.35,0.44,cp +DDLA_ECOLI,0.43,0.39,0.48,0.50,0.47,0.31,0.41,cp +DDLB_ECOLI,0.53,0.38,0.48,0.50,0.44,0.26,0.36,cp +DEOC_ECOLI,0.34,0.33,0.48,0.50,0.38,0.35,0.44,cp +DLDH_ECOLI,0.56,0.51,0.48,0.50,0.34,0.37,0.46,cp +EFG_ECOLI,0.40,0.29,0.48,0.50,0.42,0.35,0.44,cp +EFTS_ECOLI,0.24,0.35,0.48,0.50,0.31,0.19,0.31,cp +EFTU_ECOLI,0.36,0.54,0.48,0.50,0.41,0.38,0.46,cp +ENO_ECOLI,0.29,0.52,0.48,0.50,0.42,0.29,0.39,cp +FABB_ECOLI,0.65,0.47,0.48,0.50,0.59,0.30,0.40,cp +FES_ECOLI,0.32,0.42,0.48,0.50,0.35,0.28,0.38,cp +G3P1_ECOLI,0.38,0.46,0.48,0.50,0.48,0.22,0.29,cp +G3P2_ECOLI,0.33,0.45,0.48,0.50,0.52,0.32,0.41,cp +G6PI_ECOLI,0.30,0.37,0.48,0.50,0.59,0.41,0.49,cp +GCVA_ECOLI,0.40,0.50,0.48,0.50,0.45,0.39,0.47,cp +GLNA_ECOLI,0.28,0.38,0.48,0.50,0.50,0.33,0.42,cp +GLPD_ECOLI,0.61,0.45,0.48,0.50,0.48,0.35,0.41,cp +GLYA_ECOLI,0.17,0.38,0.48,0.50,0.45,0.42,0.50,cp +GSHR_ECOLI,0.44,0.35,0.48,0.50,0.55,0.55,0.61,cp +GT_ECOLI,0.43,0.40,0.48,0.50,0.39,0.28,0.39,cp +HEM6_ECOLI,0.42,0.35,0.48,0.50,0.58,0.15,0.27,cp +HEMN_ECOLI,0.23,0.33,0.48,0.50,0.43,0.33,0.43,cp +HPRT_ECOLI,0.37,0.52,0.48,0.50,0.42,0.42,0.36,cp +IF1_ECOLI,0.29,0.30,0.48,0.50,0.45,0.03,0.17,cp +IF2_ECOLI,0.22,0.36,0.48,0.50,0.35,0.39,0.47,cp +ILVY_ECOLI,0.23,0.58,0.48,0.50,0.37,0.53,0.59,cp +IPYR_ECOLI,0.47,0.47,0.48,0.50,0.22,0.16,0.26,cp +KAD_ECOLI,0.54,0.47,0.48,0.50,0.28,0.33,0.42,cp +KDSA_ECOLI,0.51,0.37,0.48,0.50,0.35,0.36,0.45,cp +LEU3_ECOLI,0.40,0.35,0.48,0.50,0.45,0.33,0.42,cp +LON_ECOLI,0.44,0.34,0.48,0.50,0.30,0.33,0.43,cp +LPLA_ECOLI,0.42,0.38,0.48,0.50,0.54,0.34,0.43,cp +LYSR_ECOLI,0.44,0.56,0.48,0.50,0.50,0.46,0.54,cp +MALQ_ECOLI,0.52,0.36,0.48,0.50,0.41,0.28,0.38,cp +MALZ_ECOLI,0.36,0.41,0.48,0.50,0.48,0.47,0.54,cp +MASY_ECOLI,0.18,0.30,0.48,0.50,0.46,0.24,0.35,cp +METB_ECOLI,0.47,0.29,0.48,0.50,0.51,0.33,0.43,cp +METC_ECOLI,0.24,0.43,0.48,0.50,0.54,0.52,0.59,cp +METK_ECOLI,0.25,0.37,0.48,0.50,0.41,0.33,0.42,cp +METR_ECOLI,0.52,0.57,0.48,0.50,0.42,0.47,0.54,cp +METX_ECOLI,0.25,0.37,0.48,0.50,0.43,0.26,0.36,cp +MURF_ECOLI,0.35,0.48,0.48,0.50,0.56,0.40,0.48,cp +NADA_ECOLI,0.26,0.26,0.48,0.50,0.34,0.25,0.35,cp +NFRC_ECOLI,0.44,0.51,0.48,0.50,0.47,0.26,0.36,cp +NHAR_ECOLI,0.37,0.50,0.48,0.50,0.42,0.36,0.45,cp +NIRD_ECOLI,0.44,0.42,0.48,0.50,0.42,0.25,0.20,cp +OMPR_ECOLI,0.24,0.43,0.48,0.50,0.37,0.28,0.38,cp +OTC1_ECOLI,0.42,0.30,0.48,0.50,0.48,0.26,0.36,cp +OTC2_ECOLI,0.48,0.42,0.48,0.50,0.45,0.25,0.35,cp +PEPE_ECOLI,0.41,0.48,0.48,0.50,0.51,0.44,0.51,cp +PFLA_ECOLI,0.44,0.28,0.48,0.50,0.43,0.27,0.37,cp +PFLB_ECOLI,0.29,0.41,0.48,0.50,0.48,0.38,0.46,cp +PGK_ECOLI,0.34,0.28,0.48,0.50,0.41,0.35,0.44,cp +PHOB_ECOLI,0.41,0.43,0.48,0.50,0.45,0.31,0.41,cp +PHOH_ECOLI,0.29,0.47,0.48,0.50,0.41,0.23,0.34,cp +PMBA_ECOLI,0.34,0.55,0.48,0.50,0.58,0.31,0.41,cp +PNP_ECOLI,0.36,0.56,0.48,0.50,0.43,0.45,0.53,cp +PROB_ECOLI,0.40,0.46,0.48,0.50,0.52,0.49,0.56,cp +PT1A_ECOLI,0.50,0.49,0.48,0.50,0.49,0.46,0.53,cp +PT1_ECOLI,0.52,0.44,0.48,0.50,0.37,0.36,0.42,cp +PTCA_ECOLI,0.50,0.51,0.48,0.50,0.27,0.23,0.34,cp +PTCB_ECOLI,0.53,0.42,0.48,0.50,0.16,0.29,0.39,cp +PTFA_ECOLI,0.34,0.46,0.48,0.50,0.52,0.35,0.44,cp +PTGA_ECOLI,0.40,0.42,0.48,0.50,0.37,0.27,0.27,cp +PTHA_ECOLI,0.41,0.43,0.48,0.50,0.50,0.24,0.25,cp +PTHP_ECOLI,0.30,0.45,0.48,0.50,0.36,0.21,0.32,cp +PTKA_ECOLI,0.31,0.47,0.48,0.50,0.29,0.28,0.39,cp +PTKB_ECOLI,0.64,0.76,0.48,0.50,0.45,0.35,0.38,cp +PTNA_ECOLI,0.35,0.37,0.48,0.50,0.30,0.34,0.43,cp +PTWB_ECOLI,0.57,0.54,0.48,0.50,0.37,0.28,0.33,cp +PTWX_ECOLI,0.65,0.55,0.48,0.50,0.34,0.37,0.28,cp +RHAR_ECOLI,0.51,0.46,0.48,0.50,0.58,0.31,0.41,cp +RHAS_ECOLI,0.38,0.40,0.48,0.50,0.63,0.25,0.35,cp +RIMI_ECOLI,0.24,0.57,0.48,0.50,0.63,0.34,0.43,cp +RIMJ_ECOLI,0.38,0.26,0.48,0.50,0.54,0.16,0.28,cp +RIML_ECOLI,0.33,0.47,0.48,0.50,0.53,0.18,0.29,cp +RNB_ECOLI,0.24,0.34,0.48,0.50,0.38,0.30,0.40,cp +RNC_ECOLI,0.26,0.50,0.48,0.50,0.44,0.32,0.41,cp +RND_ECOLI,0.44,0.49,0.48,0.50,0.39,0.38,0.40,cp +RNE_ECOLI,0.43,0.32,0.48,0.50,0.33,0.45,0.52,cp +SERC_ECOLI,0.49,0.43,0.48,0.50,0.49,0.30,0.40,cp +SLYD_ECOLI,0.47,0.28,0.48,0.50,0.56,0.20,0.25,cp +SOXS_ECOLI,0.32,0.33,0.48,0.50,0.60,0.06,0.20,cp +SYA_ECOLI,0.34,0.35,0.48,0.50,0.51,0.49,0.56,cp +SYC_ECOLI,0.35,0.34,0.48,0.50,0.46,0.30,0.27,cp +SYD_ECOLI,0.38,0.30,0.48,0.50,0.43,0.29,0.39,cp +SYE_ECOLI,0.38,0.44,0.48,0.50,0.43,0.20,0.31,cp +SYFA_ECOLI,0.41,0.51,0.48,0.50,0.58,0.20,0.31,cp +SYFB_ECOLI,0.34,0.42,0.48,0.50,0.41,0.34,0.43,cp +SYGA_ECOLI,0.51,0.49,0.48,0.50,0.53,0.14,0.26,cp +SYGB_ECOLI,0.25,0.51,0.48,0.50,0.37,0.42,0.50,cp +SYH_ECOLI,0.29,0.28,0.48,0.50,0.50,0.42,0.50,cp +SYI_ECOLI,0.25,0.26,0.48,0.50,0.39,0.32,0.42,cp +SYK1_ECOLI,0.24,0.41,0.48,0.50,0.49,0.23,0.34,cp +SYK2_ECOLI,0.17,0.39,0.48,0.50,0.53,0.30,0.39,cp +SYL_ECOLI,0.04,0.31,0.48,0.50,0.41,0.29,0.39,cp +SYM_ECOLI,0.61,0.36,0.48,0.50,0.49,0.35,0.44,cp +SYP_ECOLI,0.34,0.51,0.48,0.50,0.44,0.37,0.46,cp +SYQ_ECOLI,0.28,0.33,0.48,0.50,0.45,0.22,0.33,cp +SYR_ECOLI,0.40,0.46,0.48,0.50,0.42,0.35,0.44,cp +SYS_ECOLI,0.23,0.34,0.48,0.50,0.43,0.26,0.37,cp +SYT_ECOLI,0.37,0.44,0.48,0.50,0.42,0.39,0.47,cp +SYV_ECOLI,0.00,0.38,0.48,0.50,0.42,0.48,0.55,cp +SYW_ECOLI,0.39,0.31,0.48,0.50,0.38,0.34,0.43,cp +SYY_ECOLI,0.30,0.44,0.48,0.50,0.49,0.22,0.33,cp +THGA_ECOLI,0.27,0.30,0.48,0.50,0.71,0.28,0.39,cp +THIK_ECOLI,0.17,0.52,0.48,0.50,0.49,0.37,0.46,cp +TYRB_ECOLI,0.36,0.42,0.48,0.50,0.53,0.32,0.41,cp +UBIC_ECOLI,0.30,0.37,0.48,0.50,0.43,0.18,0.30,cp +UGPQ_ECOLI,0.26,0.40,0.48,0.50,0.36,0.26,0.37,cp +USPA_ECOLI,0.40,0.41,0.48,0.50,0.55,0.22,0.33,cp +UVRB_ECOLI,0.22,0.34,0.48,0.50,0.42,0.29,0.39,cp +UVRC_ECOLI,0.44,0.35,0.48,0.50,0.44,0.52,0.59,cp +XGPT_ECOLI,0.27,0.42,0.48,0.50,0.37,0.38,0.43,cp +XYLA_ECOLI,0.16,0.43,0.48,0.50,0.54,0.27,0.37,cp +EMRA_ECOLI,0.06,0.61,0.48,0.50,0.49,0.92,0.37,im +AAS_ECOLI,0.44,0.52,0.48,0.50,0.43,0.47,0.54,im +AMPE_ECOLI,0.63,0.47,0.48,0.50,0.51,0.82,0.84,im +ARAE_ECOLI,0.23,0.48,0.48,0.50,0.59,0.88,0.89,im +ARAH_ECOLI,0.34,0.49,0.48,0.50,0.58,0.85,0.80,im +AROP_ECOLI,0.43,0.40,0.48,0.50,0.58,0.75,0.78,im +ATKB_ECOLI,0.46,0.61,0.48,0.50,0.48,0.86,0.87,im +ATP6_ECOLI,0.27,0.35,0.48,0.50,0.51,0.77,0.79,im +BETT_ECOLI,0.52,0.39,0.48,0.50,0.65,0.71,0.73,im +CODB_ECOLI,0.29,0.47,0.48,0.50,0.71,0.65,0.69,im +CYDA_ECOLI,0.55,0.47,0.48,0.50,0.57,0.78,0.80,im +CYOC_ECOLI,0.12,0.67,0.48,0.50,0.74,0.58,0.63,im +CYOD_ECOLI,0.40,0.50,0.48,0.50,0.65,0.82,0.84,im +DCTA_ECOLI,0.73,0.36,0.48,0.50,0.53,0.91,0.92,im +DHG_ECOLI,0.84,0.44,0.48,0.50,0.48,0.71,0.74,im +DHSC_ECOLI,0.48,0.45,0.48,0.50,0.60,0.78,0.80,im +DHSD_ECOLI,0.54,0.49,0.48,0.50,0.40,0.87,0.88,im +DPPC_ECOLI,0.48,0.41,0.48,0.50,0.51,0.90,0.88,im +DSBB_ECOLI,0.50,0.66,0.48,0.50,0.31,0.92,0.92,im +ENVZ_ECOLI,0.72,0.46,0.48,0.50,0.51,0.66,0.70,im +EXBB_ECOLI,0.47,0.55,0.48,0.50,0.58,0.71,0.75,im +FRDC_ECOLI,0.33,0.56,0.48,0.50,0.33,0.78,0.80,im +FRDD_ECOLI,0.64,0.58,0.48,0.50,0.48,0.78,0.73,im +FTSW_ECOLI,0.54,0.57,0.48,0.50,0.56,0.81,0.83,im +GABP_ECOLI,0.47,0.59,0.48,0.50,0.52,0.76,0.79,im +GALP_ECOLI,0.63,0.50,0.48,0.50,0.59,0.85,0.86,im +GLNP_ECOLI,0.49,0.42,0.48,0.50,0.53,0.79,0.81,im +GLPT_ECOLI,0.31,0.50,0.48,0.50,0.57,0.84,0.85,im +GLTP_ECOLI,0.74,0.44,0.48,0.50,0.55,0.88,0.89,im +KDGL_ECOLI,0.33,0.45,0.48,0.50,0.45,0.88,0.89,im +KGTP_ECOLI,0.45,0.40,0.48,0.50,0.61,0.74,0.77,im +LACY_ECOLI,0.71,0.40,0.48,0.50,0.71,0.70,0.74,im +LGT_ECOLI,0.50,0.37,0.48,0.50,0.66,0.64,0.69,im +LLDP_ECOLI,0.66,0.53,0.48,0.50,0.59,0.66,0.66,im +LNT_ECOLI,0.60,0.61,0.48,0.50,0.54,0.67,0.71,im +LSPA_ECOLI,0.83,0.37,0.48,0.50,0.61,0.71,0.74,im +LYSP_ECOLI,0.34,0.51,0.48,0.50,0.67,0.90,0.90,im +MALF_ECOLI,0.63,0.54,0.48,0.50,0.65,0.79,0.81,im +MALG_ECOLI,0.70,0.40,0.48,0.50,0.56,0.86,0.83,im +MCP3_ECOLI,0.60,0.50,1.00,0.50,0.54,0.77,0.80,im +MSBB_ECOLI,0.16,0.51,0.48,0.50,0.33,0.39,0.48,im +MTR_ECOLI,0.74,0.70,0.48,0.50,0.66,0.65,0.69,im +NANT_ECOLI,0.20,0.46,0.48,0.50,0.57,0.78,0.81,im +NHAA_ECOLI,0.89,0.55,0.48,0.50,0.51,0.72,0.76,im +NHAB_ECOLI,0.70,0.46,0.48,0.50,0.56,0.78,0.73,im +PHEP_ECOLI,0.12,0.43,0.48,0.50,0.63,0.70,0.74,im +PHOR_ECOLI,0.61,0.52,0.48,0.50,0.54,0.67,0.52,im +PNTA_ECOLI,0.33,0.37,0.48,0.50,0.46,0.65,0.69,im +POTE_ECOLI,0.63,0.65,0.48,0.50,0.66,0.67,0.71,im +PROP_ECOLI,0.41,0.51,0.48,0.50,0.53,0.75,0.78,im +PSTA_ECOLI,0.34,0.67,0.48,0.50,0.52,0.76,0.79,im +PSTC_ECOLI,0.58,0.34,0.48,0.50,0.56,0.87,0.81,im +PTAA_ECOLI,0.59,0.56,0.48,0.50,0.55,0.80,0.82,im +PTBA_ECOLI,0.51,0.40,0.48,0.50,0.57,0.62,0.67,im +PTCC_ECOLI,0.50,0.57,0.48,0.50,0.71,0.61,0.66,im +PTDA_ECOLI,0.60,0.46,0.48,0.50,0.45,0.81,0.83,im +PTFB_ECOLI,0.37,0.47,0.48,0.50,0.39,0.76,0.79,im +PTGB_ECOLI,0.58,0.55,0.48,0.50,0.57,0.70,0.74,im +PTHB_ECOLI,0.36,0.47,0.48,0.50,0.51,0.69,0.72,im +PTMA_ECOLI,0.39,0.41,0.48,0.50,0.52,0.72,0.75,im +PTOA_ECOLI,0.35,0.51,0.48,0.50,0.61,0.71,0.74,im +PTTB_ECOLI,0.31,0.44,0.48,0.50,0.50,0.79,0.82,im +RODA_ECOLI,0.61,0.66,0.48,0.50,0.46,0.87,0.88,im +SECE_ECOLI,0.48,0.49,0.48,0.50,0.52,0.77,0.71,im +SECF_ECOLI,0.11,0.50,0.48,0.50,0.58,0.72,0.68,im +SECY_ECOLI,0.31,0.36,0.48,0.50,0.58,0.94,0.94,im +TNAB_ECOLI,0.68,0.51,0.48,0.50,0.71,0.75,0.78,im +XYLE_ECOLI,0.69,0.39,0.48,0.50,0.57,0.76,0.79,im +YCEE_ECOLI,0.52,0.54,0.48,0.50,0.62,0.76,0.79,im +EXBD_ECOLI,0.46,0.59,0.48,0.50,0.36,0.76,0.23,im +FTSL_ECOLI,0.36,0.45,0.48,0.50,0.38,0.79,0.17,im +FTSN_ECOLI,0.00,0.51,0.48,0.50,0.35,0.67,0.44,im +FTSQ_ECOLI,0.10,0.49,0.48,0.50,0.41,0.67,0.21,im +MOTB_ECOLI,0.30,0.51,0.48,0.50,0.42,0.61,0.34,im +TOLA_ECOLI,0.61,0.47,0.48,0.50,0.00,0.80,0.32,im +TOLQ_ECOLI,0.63,0.75,0.48,0.50,0.64,0.73,0.66,im +EMRB_ECOLI,0.71,0.52,0.48,0.50,0.64,1.00,0.99,im +ATKC_ECOLI,0.85,0.53,0.48,0.50,0.53,0.52,0.35,imS +NFRB_ECOLI,0.63,0.49,0.48,0.50,0.54,0.76,0.79,imS +NLPA_ECOLI,0.75,0.55,1.00,1.00,0.40,0.47,0.30,imL +CYOA_ECOLI,0.70,0.39,1.00,0.50,0.51,0.82,0.84,imL +ATKA_ECOLI,0.72,0.42,0.48,0.50,0.65,0.77,0.79,imU +BCR_ECOLI,0.79,0.41,0.48,0.50,0.66,0.81,0.83,imU +CADB_ECOLI,0.83,0.48,0.48,0.50,0.65,0.76,0.79,imU +CAIT_ECOLI,0.69,0.43,0.48,0.50,0.59,0.74,0.77,imU +CPXA_ECOLI,0.79,0.36,0.48,0.50,0.46,0.82,0.70,imU +CRED_ECOLI,0.78,0.33,0.48,0.50,0.57,0.77,0.79,imU +CYDB_ECOLI,0.75,0.37,0.48,0.50,0.64,0.70,0.74,imU +CYOB_ECOLI,0.59,0.29,0.48,0.50,0.64,0.75,0.77,imU +CYOE_ECOLI,0.67,0.37,0.48,0.50,0.54,0.64,0.68,imU +DMSC_ECOLI,0.66,0.48,0.48,0.50,0.54,0.70,0.74,imU +DPPB_ECOLI,0.64,0.46,0.48,0.50,0.48,0.73,0.76,imU +DSBD_ECOLI,0.76,0.71,0.48,0.50,0.50,0.71,0.75,imU +FEPD_ECOLI,0.84,0.49,0.48,0.50,0.55,0.78,0.74,imU +FEPG_ECOLI,0.77,0.55,0.48,0.50,0.51,0.78,0.74,imU +FTSH_ECOLI,0.81,0.44,0.48,0.50,0.42,0.67,0.68,imU +GLTS_ECOLI,0.58,0.60,0.48,0.50,0.59,0.73,0.76,imU +KEFC_ECOLI,0.63,0.42,0.48,0.50,0.48,0.77,0.80,imU +KUP_ECOLI,0.62,0.42,0.48,0.50,0.58,0.79,0.81,imU +MCP1_ECOLI,0.86,0.39,0.48,0.50,0.59,0.89,0.90,imU +MCP2_ECOLI,0.81,0.53,0.48,0.50,0.57,0.87,0.88,imU +MCP4_ECOLI,0.87,0.49,0.48,0.50,0.61,0.76,0.79,imU +MELB_ECOLI,0.47,0.46,0.48,0.50,0.62,0.74,0.77,imU +MOTA_ECOLI,0.76,0.41,0.48,0.50,0.50,0.59,0.62,imU +NUPC_ECOLI,0.70,0.53,0.48,0.50,0.70,0.86,0.87,imU +NUPG_ECOLI,0.64,0.45,0.48,0.50,0.67,0.61,0.66,imU +PNTB_ECOLI,0.81,0.52,0.48,0.50,0.57,0.78,0.80,imU +PTKC_ECOLI,0.73,0.26,0.48,0.50,0.57,0.75,0.78,imU +RHAT_ECOLI,0.49,0.61,1.00,0.50,0.56,0.71,0.74,imU +SECD_ECOLI,0.88,0.42,0.48,0.50,0.52,0.73,0.75,imU +SECG_ECOLI,0.84,0.54,0.48,0.50,0.75,0.92,0.70,imU +TEHA_ECOLI,0.63,0.51,0.48,0.50,0.64,0.72,0.76,imU +TYRP_ECOLI,0.86,0.55,0.48,0.50,0.63,0.81,0.83,imU +UHPB_ECOLI,0.79,0.54,0.48,0.50,0.50,0.66,0.68,imU +TONB_ECOLI,0.57,0.38,0.48,0.50,0.06,0.49,0.33,imU +LEP_ECOLI,0.78,0.44,0.48,0.50,0.45,0.73,0.68,imU +FADL_ECOLI,0.78,0.68,0.48,0.50,0.83,0.40,0.29,om +FHUA_ECOLI,0.63,0.69,0.48,0.50,0.65,0.41,0.28,om +LAMB_ECOLI,0.67,0.88,0.48,0.50,0.73,0.50,0.25,om +NFRA_ECOLI,0.61,0.75,0.48,0.50,0.51,0.33,0.33,om +NMPC_ECOLI,0.67,0.84,0.48,0.50,0.74,0.54,0.37,om +OMPA_ECOLI,0.74,0.90,0.48,0.50,0.57,0.53,0.29,om +OMPC_ECOLI,0.73,0.84,0.48,0.50,0.86,0.58,0.29,om +OMPF_ECOLI,0.75,0.76,0.48,0.50,0.83,0.57,0.30,om +OMPX_ECOLI,0.77,0.57,0.48,0.50,0.88,0.53,0.20,om +PHOE_ECOLI,0.74,0.78,0.48,0.50,0.75,0.54,0.15,om +TSX_ECOLI,0.68,0.76,0.48,0.50,0.84,0.45,0.27,om +BTUB_ECOLI,0.56,0.68,0.48,0.50,0.77,0.36,0.45,om +CIRA_ECOLI,0.65,0.51,0.48,0.50,0.66,0.54,0.33,om +FECA_ECOLI,0.52,0.81,0.48,0.50,0.72,0.38,0.38,om +FEPA_ECOLI,0.64,0.57,0.48,0.50,0.70,0.33,0.26,om +FHUE_ECOLI,0.60,0.76,1.00,0.50,0.77,0.59,0.52,om +OMPP_ECOLI,0.69,0.59,0.48,0.50,0.77,0.39,0.21,om +OMPT_ECOLI,0.63,0.49,0.48,0.50,0.79,0.45,0.28,om +TOLC_ECOLI,0.71,0.71,0.48,0.50,0.68,0.43,0.36,om +PA1_ECOLI,0.68,0.63,0.48,0.50,0.73,0.40,0.30,om +MULI_ECOLI,0.77,0.57,1.00,0.50,0.37,0.54,0.01,omL +NLPB_ECOLI,0.66,0.49,1.00,0.50,0.54,0.56,0.36,omL +NLPE_ECOLI,0.71,0.46,1.00,0.50,0.52,0.59,0.30,omL +PAL_ECOLI,0.67,0.55,1.00,0.50,0.66,0.58,0.16,omL +SLP_ECOLI,0.68,0.49,1.00,0.50,0.62,0.55,0.28,omL +AGP_ECOLI,0.74,0.49,0.48,0.50,0.42,0.54,0.36,pp +AMY1_ECOLI,0.70,0.61,0.48,0.50,0.56,0.52,0.43,pp +ARAF_ECOLI,0.66,0.86,0.48,0.50,0.34,0.41,0.36,pp +ASG2_ECOLI,0.73,0.78,0.48,0.50,0.58,0.51,0.31,pp +BGLX_ECOLI,0.65,0.57,0.48,0.50,0.47,0.47,0.51,pp +C562_ECOLI,0.72,0.86,0.48,0.50,0.17,0.55,0.21,pp +CN16_ECOLI,0.67,0.70,0.48,0.50,0.46,0.45,0.33,pp +CYPH_ECOLI,0.67,0.81,0.48,0.50,0.54,0.49,0.23,pp +CYSP_ECOLI,0.67,0.61,0.48,0.50,0.51,0.37,0.38,pp +DGAL_ECOLI,0.63,1.00,0.48,0.50,0.35,0.51,0.49,pp +DPPA_ECOLI,0.57,0.59,0.48,0.50,0.39,0.47,0.33,pp +DSBA_ECOLI,0.71,0.71,0.48,0.50,0.40,0.54,0.39,pp +DSBC_ECOLI,0.66,0.74,0.48,0.50,0.31,0.38,0.43,pp +ECOT_ECOLI,0.67,0.81,0.48,0.50,0.25,0.42,0.25,pp +ECPD_ECOLI,0.64,0.72,0.48,0.50,0.49,0.42,0.19,pp +FECB_ECOLI,0.68,0.82,0.48,0.50,0.38,0.65,0.56,pp +FECR_ECOLI,0.32,0.39,0.48,0.50,0.53,0.28,0.38,pp +FEPB_ECOLI,0.70,0.64,0.48,0.50,0.47,0.51,0.47,pp +FIMC_ECOLI,0.63,0.57,0.48,0.50,0.49,0.70,0.20,pp +GGT_ECOLI,0.74,0.82,0.48,0.50,0.49,0.49,0.41,pp +GLNH_ECOLI,0.63,0.86,0.48,0.50,0.39,0.47,0.34,pp +GLPQ_ECOLI,0.63,0.83,0.48,0.50,0.40,0.39,0.19,pp +HTRA_ECOLI,0.63,0.71,0.48,0.50,0.60,0.40,0.39,pp +LIVJ_ECOLI,0.71,0.86,0.48,0.50,0.40,0.54,0.32,pp +LIVK_ECOLI,0.68,0.78,0.48,0.50,0.43,0.44,0.42,pp +MALE_ECOLI,0.64,0.84,0.48,0.50,0.37,0.45,0.40,pp +MALM_ECOLI,0.74,0.47,0.48,0.50,0.50,0.57,0.42,pp +MEPA_ECOLI,0.75,0.84,0.48,0.50,0.35,0.52,0.33,pp +MODA_ECOLI,0.63,0.65,0.48,0.50,0.39,0.44,0.35,pp +NRFA_ECOLI,0.69,0.67,0.48,0.50,0.30,0.39,0.24,pp +NRFF_ECOLI,0.70,0.71,0.48,0.50,0.42,0.84,0.85,pp +OPPA_ECOLI,0.69,0.80,0.48,0.50,0.46,0.57,0.26,pp +OSMY_ECOLI,0.64,0.66,0.48,0.50,0.41,0.39,0.20,pp +POTD_ECOLI,0.63,0.80,0.48,0.50,0.46,0.31,0.29,pp +POTF_ECOLI,0.66,0.71,0.48,0.50,0.41,0.50,0.35,pp +PPA_ECOLI,0.69,0.59,0.48,0.50,0.46,0.44,0.52,pp +PPB_ECOLI,0.68,0.67,0.48,0.50,0.49,0.40,0.34,pp +PROX_ECOLI,0.64,0.78,0.48,0.50,0.50,0.36,0.38,pp +PSTS_ECOLI,0.62,0.78,0.48,0.50,0.47,0.49,0.54,pp +PTR_ECOLI,0.76,0.73,0.48,0.50,0.44,0.39,0.39,pp +RBSB_ECOLI,0.64,0.81,0.48,0.50,0.37,0.39,0.44,pp +SPEA_ECOLI,0.29,0.39,0.48,0.50,0.52,0.40,0.48,pp +SUBI_ECOLI,0.62,0.83,0.48,0.50,0.46,0.36,0.40,pp +TBPA_ECOLI,0.56,0.54,0.48,0.50,0.43,0.37,0.30,pp +TESA_ECOLI,0.69,0.66,0.48,0.50,0.41,0.50,0.25,pp +TOLB_ECOLI,0.69,0.65,0.48,0.50,0.63,0.48,0.41,pp +TORA_ECOLI,0.43,0.59,0.48,0.50,0.52,0.49,0.56,pp +TREA_ECOLI,0.74,0.56,0.48,0.50,0.47,0.68,0.30,pp +UGPB_ECOLI,0.71,0.57,0.48,0.50,0.48,0.35,0.32,pp +USHA_ECOLI,0.61,0.60,0.48,0.50,0.44,0.39,0.38,pp +XYLF_ECOLI,0.59,0.61,0.48,0.50,0.42,0.42,0.37,pp +YTFQ_ECOLI,0.74,0.74,0.48,0.50,0.31,0.53,0.52,pp diff --git a/pytorch_widedeep/metrics.py b/pytorch_widedeep/metrics.py index 2486eb13ab8fa562becdc0c930deffa66a63a6ea..8825fe626a2fe27a3b9c9b50ff0e8da6d83fea65 100644 --- a/pytorch_widedeep/metrics.py +++ b/pytorch_widedeep/metrics.py @@ -38,10 +38,7 @@ class MultipleMetrics(object): if isinstance(metric, Metric): logs[self.prefix + metric._name] = metric(y_pred, y_true) if isinstance(metric, TorchMetric): - if metric.num_classes == 2: - metric.update(torch.round(y_pred).int(), y_true.int()) - if metric.num_classes > 2: # type: ignore[operator] - metric.update(torch.max(y_pred, dim=1).indices, y_true.int()) # type: ignore[attr-defined] + metric.update(y_pred, y_true.int()) # type: ignore[attr-defined] logs[self.prefix + type(metric).__name__] = ( metric.compute().detach().cpu().numpy() ) diff --git a/pytorch_widedeep/training/trainer.py b/pytorch_widedeep/training/trainer.py index 4d32c829cff0c7a45fa961941306ad1b38c8f116..2a3bd564fdb8140a080209dd124ece3abb4b8545 100644 --- a/pytorch_widedeep/training/trainer.py +++ b/pytorch_widedeep/training/trainer.py @@ -149,8 +149,8 @@ class Trainer: - List of objects of type :obj:`torchmetrics.Metric`. This can be any metric from torchmetrics library `Examples `_. This can also be a custom metric as - long as it is an object of type :obj:`Metric`. See `the instructions + classification-metrics>`_. This can also be a custom metric as long as + it is an object of type :obj:`Metric`. See `the instructions `_. class_weight: float, List or Tuple. optional. default=None - float indicating the weight of the minority class in binary classification diff --git a/pytorch_widedeep/version.py b/pytorch_widedeep/version.py index 66c607f6d82c50015033cd3de8bd680298605129..b19b12ea3abb417f521fac58cf86c65032f1e659 100644 --- a/pytorch_widedeep/version.py +++ b/pytorch_widedeep/version.py @@ -1 +1 @@ -__version__ = "1.0.13" +__version__ = "1.0.14" diff --git a/tests/test_datasets/test_datasets.py b/tests/test_datasets/test_datasets.py index acfca3c231fd2cd615adef0518f469043ecd6ef3..7c0a8bb8ca3e870fd2cfa29e6502962affeea7ff 100644 --- a/tests/test_datasets/test_datasets.py +++ b/tests/test_datasets/test_datasets.py @@ -2,7 +2,7 @@ import numpy as np import pandas as pd import pytest -from pytorch_widedeep.datasets import load_adult, load_bio_kdd04 +from pytorch_widedeep.datasets import load_adult, load_bio_kdd04, load_ecoli @pytest.mark.parametrize( @@ -33,3 +33,18 @@ def test_load_adult(as_frame): assert (df.shape, type(df)) == ((48842, 15), pd.DataFrame) else: assert (df.shape, type(df)) == ((48842, 15), np.ndarray) + + +@pytest.mark.parametrize( + "as_frame", + [ + (True), + (False), + ], +) +def test_load_ecoli(as_frame): + df = load_ecoli(as_frame=as_frame) + if as_frame: + assert (df.shape, type(df)) == ((336, 9), pd.DataFrame) + else: + assert (df.shape, type(df)) == ((336, 9), np.ndarray) diff --git a/tests/test_metrics/test_torchmetrics.py b/tests/test_metrics/test_torchmetrics.py index a5bd69ae2aa375db8df16f493a4e21100799d978..2fcaa5c0c545c40a1c1fd0a872a68f77c6689755 100644 --- a/tests/test_metrics/test_torchmetrics.py +++ b/tests/test_metrics/test_torchmetrics.py @@ -30,11 +30,11 @@ y_pred_bin_pt = torch.from_numpy(y_pred_bin_np) @pytest.mark.parametrize( "metric_name, sklearn_metric, torch_metric", [ - ("Accuracy", accuracy_score, Accuracy(num_classes=2)), - ("Precision", precision_score, Precision(num_classes=2, average="none")), - ("Recall", recall_score, Recall(num_classes=2, average="none")), - ("F1", f1_score, F1(num_classes=2, average="none")), - ("FBeta", f2_score_bin, FBeta(beta=2, num_classes=2, average="none")), + ("Accuracy", accuracy_score, Accuracy()), + ("Precision", precision_score, Precision()), + ("Recall", recall_score, Recall()), + ("F1", f1_score, F1()), + ("FBeta", f2_score_bin, FBeta(beta=2)), ], ) def test_binary_metrics(metric_name, sklearn_metric, torch_metric): @@ -58,8 +58,8 @@ y_pred_muli_np = np.array( [0.1, 0.1, 0.8], [0.1, 0.6, 0.3], [0.1, 0.8, 0.1], - [0.1, 0.6, 0.6], - [0.2, 0.6, 0.8], + [0.1, 0.3, 0.6], + [0.1, 0.1, 0.8], [0.6, 0.1, 0.3], [0.7, 0.2, 0.1], [0.1, 0.7, 0.2],