From 5f5bb84b88ac0893d49929fed244edb290344379 Mon Sep 17 00:00:00 2001
From: jrzaurin <jrzaurin@gmail.com>
Date: Mon, 13 Jul 2020 10:37:17 +0100
Subject: [PATCH] minor adjustment to setup.py

---
 docs/pypiREADME.md | 161 ---------------------------------------------
 setup.py           |   2 +-
 2 files changed, 1 insertion(+), 162 deletions(-)
 delete mode 100644 docs/pypiREADME.md

diff --git a/docs/pypiREADME.md b/docs/pypiREADME.md
deleted file mode 100644
index 2df198b..0000000
--- a/docs/pypiREADME.md
+++ /dev/null
@@ -1,161 +0,0 @@
-# pytorch-widedeep
-
-A flexible package to combine tabular data with text and images using wide and
-deep models.
-
-### Introduction
-
-`pytorch-widedeep` is based on Google's Wide and Deep Algorithm. Details of
-the original algorithm can be found
-[here](https://www.tensorflow.org/tutorials/wide_and_deep), and the nice
-research paper can be found [here](https://arxiv.org/abs/1606.07792).
-
-In general terms, `pytorch-widedeep` is a package to use deep learning with
-tabular data. In particular, is intended to facilitate the combination of text
-and images with corresponding tabular data using wide and deep models. With
-that in mind there are two architectures that can be implemented with just a
-few lines of code. For details on these architectures please visit the
-[repo](https://github.com/jrzaurin/pytorch-widedeep).
-
-### Installation
-
-Install using pip:
-
-```bash
-pip install pytorch-widedeep
-```
-
-Or install directly from github
-
-```bash
-pip install git+https://github.com/jrzaurin/pytorch-widedeep.git
-```
-
-#### Developer Install
-
-```bash
-# Clone the repository
-git clone https://github.com/jrzaurin/pytorch-widedeep
-cd pytorch-widedeep
-
-# Install in dev mode
-pip install -e .
-```
-
-### Examples
-
-There are a number of notebooks in the `examples` folder plus some additional
-files. These notebooks cover most of the utilities of this package and can
-also act as documentation. In the case that github does not render the
-notebooks, or it renders them missing some parts, they are saved as markdown
-files in the `docs` folder.
-
-### Quick start
-
-Binary classification with the [adult
-dataset]([adult](https://www.kaggle.com/wenruliu/adult-income-dataset))
-using `Wide` and `DeepDense` and defaults settings.
-
-```python
-import pandas as pd
-from sklearn.model_selection import train_test_split
-
-from pytorch_widedeep.preprocessing import WidePreprocessor, DeepPreprocessor
-from pytorch_widedeep.models import Wide, DeepDense, WideDeep
-from pytorch_widedeep.metrics import BinaryAccuracy
-
-# these next 4 lines are not directly related to pytorch-widedeep. I assume
-# you have downloaded the dataset and place it in a dir called data/adult/
-df = pd.read_csv("data/adult/adult.csv.zip")
-df["income_label"] = (df["income"].apply(lambda x: ">50K" in x)).astype(int)
-df.drop("income", axis=1, inplace=True)
-df_train, df_test = train_test_split(df, test_size=0.2, stratify=df.income_label)
-
-# prepare wide, crossed, embedding and continuous columns
-wide_cols = [
-    "education",
-    "relationship",
-    "workclass",
-    "occupation",
-    "native-country",
-    "gender",
-]
-cross_cols = [("education", "occupation"), ("native-country", "occupation")]
-embed_cols = [
-    ("education", 16),
-    ("workclass", 16),
-    ("occupation", 16),
-    ("native-country", 32),
-]
-cont_cols = ["age", "hours-per-week"]
-target_col = "income_label"
-
-# target
-target = df_train[target_col].values
-
-# wide
-preprocess_wide = WidePreprocessor(wide_cols=wide_cols, crossed_cols=cross_cols)
-X_wide = preprocess_wide.fit_transform(df_train)
-wide = Wide(wide_dim=X_wide.shape[1], output_dim=1)
-
-# deepdense
-preprocess_deep = DeepPreprocessor(embed_cols=embed_cols, continuous_cols=cont_cols)
-X_deep = preprocess_deep.fit_transform(df_train)
-deepdense = DeepDense(
-    hidden_layers=[64, 32],
-    deep_column_idx=preprocess_deep.deep_column_idx,
-    embed_input=preprocess_deep.embeddings_input,
-    continuous_cols=cont_cols,
-)
-
-# build, compile and fit
-model = WideDeep(wide=wide, deepdense=deepdense)
-model.compile(method="binary", metrics=[BinaryAccuracy])
-model.fit(
-    X_wide=X_wide,
-    X_deep=X_deep,
-    target=target,
-    n_epochs=5,
-    batch_size=256,
-    val_split=0.1,
-)
-
-# predict
-X_wide_te = preprocess_wide.transform(df_test)
-X_deep_te = preprocess_deep.transform(df_test)
-preds = model.predict(X_wide=X_wide_te, X_deep=X_deep_te)
-```
-
-Of course, one can do much more, such as using different initializations,
-optimizers or learning rate schedulers for each component of the overall
-model. Adding FC-Heads to the Text and Image components. Using the [Focal
-Loss](https://arxiv.org/abs/1708.02002), warming up individual components
-before joined training, etc. See the `examples` or the `docs` folders for a
-better understanding of the content of the package and its functionalities.
-
-### Testing
-
-```
-pytest tests
-```
-
-### Acknowledgments
-
-This library takes from a series of other libraries, so I think it is just
-fair to mention them here in the README (specific mentions are also included
-in the code).
-
-The `Callbacks` and `Initializers` structure and code is inspired by the
-[`torchsample`](https://github.com/ncullen93/torchsample) library, which in
-itself partially inspired by [`Keras`](https://keras.io/).
-
-The `TextProcessor` class in this library uses the
-[`fastai`](https://docs.fast.ai/text.transform.html#BaseTokenizer.tokenizer)'s
-`Tokenizer` and `Vocab`. The code at `utils.fastai_transforms` is a minor
-adaptation of their code so it functions within this library. To my experience
-their `Tokenizer` is the best in class.
-
-The `ImageProcessor` class in this library uses code from the fantastic [Deep
-Learning for Computer
-Vision](https://www.pyimagesearch.com/deep-learning-computer-vision-python-book/)
-(DL4CV) book by Adrian Rosebrock.
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 2149a82..61c24f8 100644
--- a/setup.py
+++ b/setup.py
@@ -26,7 +26,7 @@ setup_kwargs = {
     "name": "pytorch-widedeep",
     "version": version,
     "description": "Combine tabular data with text and images using Wide and Deep models in Pytorch",
-    "long_description": open("docs/pypiREADME.md", "r", encoding="utf-8").read(),
+    "long_description": open("pypi_README.md", "r", encoding="utf-8").read(),
     "long_description_content_type": "text/markdown",
     # "long_description": long_description,
     "author": "Javier Rodriguez Zaurin",
-- 
GitLab