Restructure Pandas gradebook examples

上级 e0190ed5
...@@ -12,22 +12,24 @@ from pathlib import Path ...@@ -12,22 +12,24 @@ from pathlib import Path
import pandas as pd import pandas as pd
HERE = Path(__file__).parent HERE = Path(__file__).parent
DATA_FOLDER = HERE.parent / "data" DATA_FOLDER = HERE / "data"
roster = pd.read_csv( roster = pd.read_csv(
DATA_FOLDER / "roster.csv", DATA_FOLDER / "roster.csv",
converters={"NetID": str.lower, "Email Address": str.lower}, converters={"NetID": str.lower, "Email Address": str.lower},
usecols=["Section", "Email Address", "NetID"],
index_col="NetID",
) )
# print( # print(
# roster.loc[ # roster.loc[["wxb12345", "mxl12345", "txj12345", "jgf12345"]].to_markdown()
# roster["ID"].isin([1234567, 2345678, 3456789, 4567890])
# ].to_markdown()
# ) # )
hw_exam_grades = pd.read_csv( hw_exam_grades = pd.read_csv(
DATA_FOLDER / "hw_exam_grades.csv", DATA_FOLDER / "hw_exam_grades.csv",
converters={"SID": str.lower, "Email Address": str.lower}, converters={"SID": str.lower, "Email Address": str.lower},
usecols=lambda x: "Submission" not in x, usecols=lambda x: "Submission" not in x,
index_col="SID",
) )
# print( # print(
# hw_exam_grades.loc[ # hw_exam_grades.loc[
......
...@@ -12,7 +12,7 @@ from pathlib import Path ...@@ -12,7 +12,7 @@ from pathlib import Path
import pandas as pd import pandas as pd
HERE = Path(__file__).parent HERE = Path(__file__).parent
DATA_FOLDER = HERE.parent / "data" DATA_FOLDER = HERE / "data"
roster = pd.read_csv( roster = pd.read_csv(
DATA_FOLDER / "roster.csv", DATA_FOLDER / "roster.csv",
......
...@@ -13,7 +13,7 @@ import pandas as pd ...@@ -13,7 +13,7 @@ import pandas as pd
import numpy as np import numpy as np
HERE = Path(__file__).parent HERE = Path(__file__).parent
DATA_FOLDER = HERE.parent / "data" DATA_FOLDER = HERE / "data"
roster = pd.read_csv( roster = pd.read_csv(
DATA_FOLDER / "roster.csv", DATA_FOLDER / "roster.csv",
...@@ -83,24 +83,24 @@ final_data["Total Homework"] = sum_of_hw_scores / sum_of_hw_max ...@@ -83,24 +83,24 @@ final_data["Total Homework"] = sum_of_hw_scores / sum_of_hw_max
# ) # )
hw_max_renamed = homework_max_points.set_axis(homework_scores.columns, axis=1) hw_max_renamed = homework_max_points.set_axis(homework_scores.columns, axis=1)
overall_hw_scores = (homework_scores / hw_max_renamed).sum(axis=1) average_hw_scores = (homework_scores / hw_max_renamed).sum(axis=1)
final_data["Overall Homework"] = overall_hw_scores / homework_scores.shape[1] final_data["Average Homework"] = average_hw_scores / homework_scores.shape[1]
# print( # print(
# pd.concat([overall_hw_scores, final_data["Overall Homework"]], axis=1) # pd.concat([average_hw_scores, final_data["Average Homework"]], axis=1)
# .set_axis(["Sum of Overall Homework Scores", "Overall Homework"], axis=1) # .set_axis(["Sum of Average Homework Scores", "Average Homework"], axis=1)
# .loc[["wxb12345", "mxl12345", "txj12345", "jgf12345"]] # .loc[["wxb12345", "mxl12345", "txj12345", "jgf12345"]]
# .to_markdown() # .to_markdown()
# ) # )
final_data["Homework Score"] = final_data[ final_data["Homework Score"] = final_data[
["Total Homework", "Overall Homework"] ["Total Homework", "Average Homework"]
].max(axis=1) ].max(axis=1)
# print( # print(
# final_data.loc[ # final_data.loc[
# ["wxb12345", "mxl12345", "txj12345", "jgf12345"], # ["wxb12345", "mxl12345", "txj12345", "jgf12345"],
# ["Total Homework", "Overall Homework", "Homework Score"], # ["Total Homework", "Average Homework", "Homework Score"],
# ].to_markdown() # ].to_markdown()
# ) # )
...@@ -113,17 +113,17 @@ sum_of_quiz_scores = quiz_scores.sum(axis=1) ...@@ -113,17 +113,17 @@ sum_of_quiz_scores = quiz_scores.sum(axis=1)
sum_of_quiz_max = quiz_max_points.sum() sum_of_quiz_max = quiz_max_points.sum()
final_data["Total Quizzes"] = sum_of_hw_scores / sum_of_hw_max final_data["Total Quizzes"] = sum_of_hw_scores / sum_of_hw_max
overall_quiz_scores = (quiz_scores / quiz_max_points).sum(axis=1) average_quiz_scores = (quiz_scores / quiz_max_points).sum(axis=1)
final_data["Overall Quizzes"] = overall_quiz_scores / quiz_scores.shape[1] final_data["Average Quizzes"] = average_quiz_scores / quiz_scores.shape[1]
final_data["Quiz Score"] = final_data[ final_data["Quiz Score"] = final_data[
["Total Quizzes", "Overall Quizzes"] ["Total Quizzes", "Average Quizzes"]
].max(axis=1) ].max(axis=1)
# print( # print(
# final_data.loc[ # final_data.loc[
# ["wxb12345", "mxl12345", "txj12345", "jgf12345"], # ["wxb12345", "mxl12345", "txj12345", "jgf12345"],
# ["Total Quizzes", "Overall Quizzes", "Quiz Score"], # ["Total Quizzes", "Average Quizzes", "Quiz Score"],
# ].to_markdown() # ].to_markdown()
# ) # )
......
...@@ -13,7 +13,7 @@ import pandas as pd ...@@ -13,7 +13,7 @@ import pandas as pd
import numpy as np import numpy as np
HERE = Path(__file__).parent HERE = Path(__file__).parent
DATA_FOLDER = HERE.parent / "data" DATA_FOLDER = HERE / "data"
roster = pd.read_csv( roster = pd.read_csv(
DATA_FOLDER / "roster.csv", DATA_FOLDER / "roster.csv",
...@@ -126,5 +126,5 @@ final_data["Final Grade"] = pd.Categorical( ...@@ -126,5 +126,5 @@ final_data["Final Grade"] = pd.Categorical(
for section, table in final_data.groupby("Section"): for section, table in final_data.groupby("Section"):
print(f"In Section {section} there are {table.shape[0]} students.") print(f"In Section {section} there are {table.shape[0]} students.")
table.sort_values(by=["Last Name", "First Name"]).to_csv( table.sort_values(by=["Last Name", "First Name"]).to_csv(
DATA_FOLDER / "Section {section} Grades.csv" DATA_FOLDER / f"Section {section} Grades.csv"
) )
...@@ -14,7 +14,7 @@ import numpy as np ...@@ -14,7 +14,7 @@ import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
HERE = Path(__file__).parent HERE = Path(__file__).parent
DATA_FOLDER = HERE.parent / "data" DATA_FOLDER = HERE / "data"
roster = pd.read_csv( roster = pd.read_csv(
DATA_FOLDER / "roster.csv", DATA_FOLDER / "roster.csv",
......
# Pandas Project: Make a Gradebook With Pandas # Pandas Project: Make a Gradebook With Pandas
The code in this folder is used to generate and process the data for the tutorial [_Pandas Project: Make a Gradebook With Pandas_](https://realpython.com/pandas-project-gradebook). The code in this folder is used to generate and process the data for the tutorial [Pandas Project: Make a Gradebook With Pandas_](https://realpython.com/pandas-project-gradebook).
## `generate_data.py` ## `generate_data.py`
The script `generate_data.py` uses the [Faker](https://faker.readthedocs.io/en/master/) library to generate fake student names and NumPy to generate scores for homework, exams, and quizzes. The data are stored as CSV files in the `data` folder. With the seed that is set in the script, the data used in the article can be reproduced. To try out new data, change the seed for the NumPy random number generator. The script `generate_data.py` uses the [Faker](https://faker.readthedocs.io/en/master/) library to generate fake student names and NumPy to generate scores for homework, exams, and quizzes. The data are stored as CSV files in the `data` folder. With the seed that is set in the script, the data used in the article can be reproduced. To try out new data, change the seed for the NumPy random number generator.
## Folders with `gradebook.py` Scripts ## Numbered Python Scripts
In the article, you create a script called `gradebook.py`. For didactic purposes, each of the steps in the development process is broken into a separate folder here. The `gradebook.py` scripts build from one folder to the next, to generate the final copy in the root directory here. The order in the article is: In the article, you create a script called `gradebook.py`. For didactic purposes, each of the steps in the development process is broken into a separate script here. The `.py` scripts build from one example to the next, to generate the final copy called `06-final-gradebook.py`. The order in the article is:
1. `loading-the-data` 1. `01-loading-the-data.py`
2. `merging-dataframes` 2. `02-merging-dataframes.py`
3. `calculating-grades` 3. `03-calculating-grades.py`
4. `grouping-the-data` 4. `04-grouping-the-data.py`
5. `plotting-summary-statistics` 5. `05-plotting-summary-statistics.py`
## Installing Dependencies ## Installing Dependencies
There are two `requirements.txt`-style files in this repository. The `generate_data-reqs.txt` contains the dependencies for the `generate_data.py` script. The `gradebook-reqs.txt` file contains the dependencies for the `gradebook.py` script. There are two `requirements.txt` files in this repository. The `data/requirements.txt` contains the dependencies for the `generate_data.py` script. The `requirements.txt` file in the root folder contains the dependencies for the `gradebook.py` script.
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册