travis

82ebd95e · Amirsina Torfi · 6af2784d · 82ebd95e · 6af2784d · 82ebd95e
4 changed file
--- a/README.rst
+++ b/README.rst
@@ -260,8 +260,8 @@ Basic Machine Learning
   :align: right


-.. _ipythonlinearreg: codes/ipython/basics_in_machine_learning/linearRegressionOneVariable.ipynb
-.. _pythonlinearreg: codes/python/basics_in_machine_learning/linearregressiononevariable.py
+.. _ipythonlinearreg: codes/ipython/basics_in_machine_learning/linearregression.ipynb
+.. _pythonlinearreg: codes/python/basics_in_machine_learning/linearregression.py
 .. _tutoriallinearreg: https://www.machinelearningmindset.com/linear-regression-with-tensorflow/

 .. _tutorialdataaugmentation: https://www.machinelearningmindset.com/data-augmentation-with-tensorflow/

--- a/codes/ipython/basics_in_machine_learning/linearRegressionOneVariable.ipynb
+++ b/codes/ipython/basics_in_machine_learning/linearRegressionOneVariable.ipynb
--- a/codes/ipython/basics_in_machine_learning/linearregression.ipynb
+++ b/codes/ipython/basics_in_machine_learning/linearregression.ipynb
--- a/codes/python/basics_in_machine_learning/linearregressiononevariable.py
+++ b/codes/python/basics_in_machine_learning/linearregressiononevariable.py
 # -*- coding: utf-8 -*-
-"""linearRegressionOneVariable
+"""linearregression.ipynb

 Automatically generated by Colaboratory.

 Original file is located at
-    https://colab.research.google.com/github/instillai/TensorFlow-Course/blob/master/codes/ipython/basics_in_machine_learning/linearRegressionOneVariable.ipynb
+    https://colab.research.google.com/drive/1SFWk7Ap06ZkvP2HmLhXLiyyqo-ei35M1
 """

 from __future__ import absolute_import, division, print_function, unicode_literals
@@ -14,55 +14,14 @@ import numpy as np
 import pandas as pd
 import seaborn as sns
 from datetime import datetime
-
-# Commented out IPython magic to ensure Python compatibility.
-try:
-  # %tensorflow_version only exists in Colab.
-#   %tensorflow_version 2.x
-except Exception:
-  pass
 import tensorflow as tf
 from tensorflow import keras
 from tensorflow.keras import layers
 print(tf.__version__)

-# Load the TensorBoard notebook extension.
-# %load_ext tensorboard
-
-# Clear logs from previous calls
-!rm -rf ./logs/ 
-
-# Check
-!ls
-
 # Download the daset with keras.utils.get_file
 dataset_path = keras.utils.get_file("housing.data", "https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data")

-"""## Boston house prices dataset
-
-#### Characteristics
-* Number of Instances: 506
-* The first 13 features are numeric/categorical predictive features. 
-* The last one (attribute 14): Median Value is the target variable.
-
-#### Attributes
-
-1. CRIM per capita crime rate by town
-2. ZN proportion of residential land zoned for lots over 25,000 sq.ft.
-3. INDUS proportion of non-retail business acres per town
-4. CHAS Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
-5. NOX nitric oxides concentration (parts per 10 million)
-6. RM average number of rooms per dwelling
-7. AGE proportion of owner-occupied units built prior to 1940
-8. DIS weighted distances to five Boston employment centres
-9. RAD index of accessibility to radial highways
-10. TAX full-value property-tax rate per \$10,000
-11. PTRATIO pupil-teacher ratio by town
-12. B 1000(Bk - 0.63)^2 where Bk is the proportion of blacks by town
-13. LSTAT \% lower status of the population
-14. MEDV Median value of owner-occupied homes in \$1000’s [target attribute]
-"""
-
 column_names = ['CRIM','ZN','INDUS','CHAS','NOX',
                'RM', 'AGE', 'DIS','RAD','TAX','PTRATION', 'B', 'LSTAT', 'MEDV']
 raw_dataset = pd.read_csv(dataset_path, names=column_names,
@@ -75,28 +34,12 @@ dataset = raw_dataset.copy()
 # based on position.
 dataset.tail(n=10)

-"""### Data processing (train/test split)"""
-
 # Split data into train/test
 # p = training data portion
 p=0.8
 trainDataset = dataset.sample(frac=p,random_state=0)
 testDataset = dataset.drop(trainDataset.index)

-"""## linear regression with one variable
-
-Here, we desire to model the relationship between the dependent variable and the independent variable. In the linear regression with one variable, *we only have **one** independent variable*.
-
-* Independent variable: 'RM'
-* Dependent variable: 'MEDV'
-
-In a simple word, we want to **predict** the Median value of owner-occupied homes in $1000’s [target attribute] based on the average number of rooms per dwelling (RM).
-
-### Plot dependecy to one variable (linear regression with one variable)
-
-Here we want to plot the MEDV against RM, i.e, visualize how MEDV is changed by changing RM. Basically we have $MEDV=f(RM)$ and we desire to estimate the function $f(.)$ using a linear regression.
-"""
-
 # Visual representation of training data
 import matplotlib.pyplot as plt
 fig, ax = plt.subplots()
@@ -108,16 +51,6 @@ ax.set_xlabel('RM')
 ax.set_ylabel('MEDV')
 plt.show()

-"""### Split train/test data and labels for linear regression for one variable experiments
-
-We can use two approaches to access the columns:
-
-1. **Pop command:** It returns an item and drops it from the frame. After using trainDataset.pop('RM'), the 'RM' column does not exist in the trainDataset frame anymore!
-2. Using the **indexing with labels**. Example trainDataset['RM']
-
-We use approach **(2)**.
-"""
-
 # Pop command return item and drop it from frame.
 # After using trainDataset.pop('RM'), the 'RM' column 
 # does not exist in the trainDataset frame anymore!
@@ -126,30 +59,6 @@ trainTarget = trainDataset['MEDV']
 testInput = testDataset['RM']
 testTarget = testDataset['MEDV']

-"""### Data normalization/standardization
-
-*It is not needed for simple linear regression (linear regression with one variable).*
-
-1. **Standardization**: Standardizing the features around the center and 0 with a standard deviation of 1. Assume we have features that have different units. So just becasue of the scaling do not contribute equally to the analysis and create misleading result. Formula: $\hat{X}=\frac{X-\mu}{\sigma}$
-
-2. **Normalization**: Normalization aims to put the values of different features to a common scale (usually [0,1] or [-1,1]). This is used when features have different ranges but the same units. **Example**: Assume we have an RGB image. *Each channel has a different range but all channels have the same units: image pixel*! Formula: $\hat{X}=\frac{X-X_{min}}{X_{max}-X_{min}}$
-
-### Create Model
-
-1. The architecture of the model
-2. Defining the optimizer
-3. Compile the model and return the graph
-
-Assume we desire to find the parameters (**W**) that predict the
-output y from x in a linear fashion:
-
-$y = w_1 x + w_0$
-
-The above can be defined with the following dense layer:
-
-*layers.Dense(1, use_bias=True, input_shape=(1,))*
-"""
-
 # We don't specify anything for activation -> no activation is applied (ie. "linear" activation: a(x) = x)
 # Check: https://www.tensorflow.org/api_docs/python/tf/keras/layers/Dense

@@ -183,15 +92,6 @@ tf.keras.utils.plot_model(
 # Print the model summary
 model.summary()

-"""### Training loop
-Fit the model to the data
-
-* n_epochs: number of epochs
-* validation_split: keep a portion of training data for unbiased validation
-* verbose: set to 0 as we want a short summary and not all the details!!
-* callbacks: A callback is a tool to customize the behavior of a the model during training, testing, etc.
-"""
-
 # params
 n_epochs = 4000
 batch_size = 256
@@ -246,12 +146,6 @@ history = model.fit(
  trainInput, trainTarget, batch_size=batch_size,
  epochs=n_epochs, validation_split = 0.1, verbose=0, callbacks=[earlyStopping,log_display,tensorboard_callback,checkpointCallback])

-# %tensorboard --logdir logs
-
-import numpy as np
-import pandas as pd
-import seaborn as sns
-
 # The fit model returns the history object for each Keras model
 # Let's explore what is inside history
 print('keys:', history.history.keys())
@@ -284,8 +178,6 @@ plt.xlim(lims)
 plt.ylim(lims)
 _ = plt.plot(lims, lims)

-"""### Track the model improvement progression"""
-
 # Get the saved checkpoint files
 checkpoints = []
 for f_name in os.listdir(checkpoint_dir):
@@ -328,5 +220,4 @@ if model_improvement_progress:
    # Plot the line
    y_hat = w1*x + w0
    plt.plot(x, y_hat, '-r')
-    plt.savefig(os.path.join('/content/drive/linearregression', str(checkpoint)+'.png'))
-
+    plt.savefig(os.path.join('/content/drive/linearregression', str(checkpoint)+'.png'))
\ No newline at end of file