Restructure Pandas gradebook examples

66c2f3f8 · Bryan W. Weber · e0190ed5 · 66c2f3f8 · 66c2f3f8 · 66c2f3f8
10 changed file
--- a/pandas-gradebook-project/loading-the-data/gradebook.py
+++ b/pandas-gradebook-project/loading-the-data/gradebook.py
@@ -12,22 +12,24 @@ from pathlib import Path
 import pandas as pd
 HERE = Path(__file__).parent
-DATA_FOLDER = HERE.parent / "data"
+DATA_FOLDER = HERE / "data"
 roster = pd.read_csv(
    DATA_FOLDER / "roster.csv",
    converters={"NetID": str.lower, "Email Address": str.lower},
+    usecols=["Section", "Email Address", "NetID"],
+    index_col="NetID",
 )
 # print(
-#     roster.loc[
+#     roster.loc[["wxb12345", "mxl12345", "txj12345", "jgf12345"]].to_markdown()
-#         roster["ID"].isin([1234567, 2345678, 3456789, 4567890])
-#     ].to_markdown()
 # )
 hw_exam_grades = pd.read_csv(
    DATA_FOLDER / "hw_exam_grades.csv",
    converters={"SID": str.lower, "Email Address": str.lower},
    usecols=lambda x: "Submission" not in x,
+    index_col="SID",
 )
 # print(
 #     hw_exam_grades.loc[

--- a/pandas-gradebook-project/merging-dataframes/gradebook.py
+++ b/pandas-gradebook-project/merging-dataframes/gradebook.py
@@ -12,7 +12,7 @@ from pathlib import Path
 import pandas as pd
 HERE = Path(__file__).parent
-DATA_FOLDER = HERE.parent / "data"
+DATA_FOLDER = HERE / "data"
 roster = pd.read_csv(
    DATA_FOLDER / "roster.csv",

--- a/pandas-gradebook-project/calculating-grades/gradebook.py
+++ b/pandas-gradebook-project/calculating-grades/gradebook.py
@@ -13,7 +13,7 @@ import pandas as pd
 import numpy as np
 HERE = Path(__file__).parent
-DATA_FOLDER = HERE.parent / "data"
+DATA_FOLDER = HERE / "data"
 roster = pd.read_csv(
    DATA_FOLDER / "roster.csv",
@@ -83,24 +83,24 @@ final_data["Total Homework"] = sum_of_hw_scores / sum_of_hw_max
 # )
 hw_max_renamed = homework_max_points.set_axis(homework_scores.columns, axis=1)
-overall_hw_scores = (homework_scores / hw_max_renamed).sum(axis=1)
+average_hw_scores = (homework_scores / hw_max_renamed).sum(axis=1)
-final_data["Overall Homework"] = overall_hw_scores / homework_scores.shape[1]
+final_data["Average Homework"] = average_hw_scores / homework_scores.shape[1]
 # print(
-#     pd.concat([overall_hw_scores, final_data["Overall Homework"]], axis=1)
+#     pd.concat([average_hw_scores, final_data["Average Homework"]], axis=1)
-#     .set_axis(["Sum of Overall Homework Scores", "Overall Homework"], axis=1)
+#     .set_axis(["Sum of Average Homework Scores", "Average Homework"], axis=1)
 #     .loc[["wxb12345", "mxl12345", "txj12345", "jgf12345"]]
 #     .to_markdown()
 # )
 final_data["Homework Score"] = final_data[
-    ["Total Homework", "Overall Homework"]
+    ["Total Homework", "Average Homework"]
 ].max(axis=1)
 # print(
 #     final_data.loc[
 #         ["wxb12345", "mxl12345", "txj12345", "jgf12345"],
-#         ["Total Homework", "Overall Homework", "Homework Score"],
+#         ["Total Homework", "Average Homework", "Homework Score"],
 #     ].to_markdown()
 # )
@@ -113,17 +113,17 @@ sum_of_quiz_scores = quiz_scores.sum(axis=1)
 sum_of_quiz_max = quiz_max_points.sum()
 final_data["Total Quizzes"] = sum_of_hw_scores / sum_of_hw_max
-overall_quiz_scores = (quiz_scores / quiz_max_points).sum(axis=1)
+average_quiz_scores = (quiz_scores / quiz_max_points).sum(axis=1)
-final_data["Overall Quizzes"] = overall_quiz_scores / quiz_scores.shape[1]
+final_data["Average Quizzes"] = average_quiz_scores / quiz_scores.shape[1]
 final_data["Quiz Score"] = final_data[
-    ["Total Quizzes", "Overall Quizzes"]
+    ["Total Quizzes", "Average Quizzes"]
 ].max(axis=1)
 # print(
 #     final_data.loc[
 #         ["wxb12345", "mxl12345", "txj12345", "jgf12345"],
-#         ["Total Quizzes", "Overall Quizzes", "Quiz Score"],
+#         ["Total Quizzes", "Average Quizzes", "Quiz Score"],
 #     ].to_markdown()
 # )

--- a/pandas-gradebook-project/grouping-the-data/gradebook.py
+++ b/pandas-gradebook-project/grouping-the-data/gradebook.py
@@ -13,7 +13,7 @@ import pandas as pd
 import numpy as np
 HERE = Path(__file__).parent
-DATA_FOLDER = HERE.parent / "data"
+DATA_FOLDER = HERE / "data"
 roster = pd.read_csv(
    DATA_FOLDER / "roster.csv",
@@ -126,5 +126,5 @@ final_data["Final Grade"] = pd.Categorical(
 for section, table in final_data.groupby("Section"):
    print(f"In Section {section} there are {table.shape[0]} students.")
    table.sort_values(by=["Last Name", "First Name"]).to_csv(
-        DATA_FOLDER / "Section {section} Grades.csv"
+        DATA_FOLDER / f"Section {section} Grades.csv"
    )
--- a/pandas-gradebook-project/gradebook.py
+++ b/pandas-gradebook-project/gradebook.py
--- a/pandas-gradebook-project/plotting-summary-statistics/gradebook.py
+++ b/pandas-gradebook-project/plotting-summary-statistics/gradebook.py
@@ -14,7 +14,7 @@ import numpy as np
 import matplotlib.pyplot as plt
 HERE = Path(__file__).parent
-DATA_FOLDER = HERE.parent / "data"
+DATA_FOLDER = HERE / "data"
 roster = pd.read_csv(
    DATA_FOLDER / "roster.csv",

--- a/pandas-gradebook-project/README.md
+++ b/pandas-gradebook-project/README.md
 # Pandas Project: Make a Gradebook With Pandas
-The code in this folder is used to generate and process the data for the tutorial [_Pandas Project: Make a Gradebook With Pandas_](https://realpython.com/pandas-project-gradebook).
+The code in this folder is used to generate and process the data for the tutorial [Pandas Project: Make a Gradebook With Pandas_](https://realpython.com/pandas-project-gradebook).
 ## `generate_data.py`
 The script `generate_data.py` uses the [Faker](https://faker.readthedocs.io/en/master/) library to generate fake student names and NumPy to generate scores for homework, exams, and quizzes. The data are stored as CSV files in the `data` folder. With the seed that is set in the script, the data used in the article can be reproduced. To try out new data, change the seed for the NumPy random number generator.
-## Folders with `gradebook.py` Scripts
+## Numbered Python Scripts
-In the article, you create a script called `gradebook.py`. For didactic purposes, each of the steps in the development process is broken into a separate folder here. The `gradebook.py` scripts build from one folder to the next, to generate the final copy in the root directory here. The order in the article is:
+In the article, you create a script called `gradebook.py`. For didactic purposes, each of the steps in the development process is broken into a separate script here. The `.py` scripts build from one example to the next, to generate the final copy called `06-final-gradebook.py`. The order in the article is:
-1. `loading-the-data`
+1. `01-loading-the-data.py`
-2. `merging-dataframes`
+2. `02-merging-dataframes.py`
-3. `calculating-grades`
+3. `03-calculating-grades.py`
-4. `grouping-the-data`
+4. `04-grouping-the-data.py`
-5. `plotting-summary-statistics`
+5. `05-plotting-summary-statistics.py`
 ## Installing Dependencies
-There are two `requirements.txt`-style files in this repository. The `generate_data-reqs.txt` contains the dependencies for the `generate_data.py` script. The `gradebook-reqs.txt` file contains the dependencies for the `gradebook.py` script.
+There are two `requirements.txt` files in this repository. The `data/requirements.txt` contains the dependencies for the `generate_data.py` script. The `requirements.txt` file in the root folder contains the dependencies for the `gradebook.py` script.
--- a/pandas-gradebook-project/generate_data.py
+++ b/pandas-gradebook-project/generate_data.py
--- a/pandas-gradebook-project/generate_data-reqs.txt
+++ b/pandas-gradebook-project/generate_data-reqs.txt
--- a/pandas-gradebook-project/gradebook-reqs.txt
+++ b/pandas-gradebook-project/gradebook-reqs.txt