Changed directory struct and added readme

e8f3b6a2 · Jim Anderson · a105c783 · e8f3b6a2 · e8f3b6a2 · e8f3b6a2
20 changed file
--- a/python-interview-problems-parsing-csv/README.md
+++ b/python-interview-problems-parsing-csv/README.md
+# Python Interview Problems – Parsing CSV Files
+
+Corresponding code for ["Python Interview Problems – Parsing CSV Files."](https://realpython.com/python-interview-problems-parsing-csv-python-files/)
+
+The `skeleton_code` directory contains pytest fixtures and module files to get you set up to run pytest. There are no tests in the files, which pytest tells you when you run it:
+
+```console
+$ pytest test_weather_v1.py 
+======================================= test session starts ========================================
+platform linux -- Python 3.7.1, pytest-6.2.1, py-1.10.0, pluggy-0.13.1
+rootdir: /home/jima/coding/materials_realpy/python-interview-problems-parsing-csv/skeleton_code
+collected 0 items                                                                                  
+
+====================================== no tests ran in 0.00s =======================================
+```
+
+The `full_code` directory contains the source files we used to generate the examples in the article. 
+
+Good luck!
\ No newline at end of file
--- a/python-interview-problems-parsing-csv/full_code/csv_parser.py
+++ b/python-interview-problems-parsing-csv/full_code/csv_parser.py
+#!/usr/bin/env python3
+""" Reusable CSV parser for both football and weather data.  """
+import csv
+
+
+def get_next_result(csv_file, func):
+    for stats in csv.DictReader(csv_file):
+        yield func(stats)
--- a/python-interview-problems-parsing-csv/full_code/football_final.py
+++ b/python-interview-problems-parsing-csv/full_code/football_final.py
+#!/usr/bin/env python3
+""" Find Minimum Goal Differential
+    Write a program that takes a filename on the command line and processes the
+    CSV contents. The contents will be a CSV file with end-of-season football
+    standings for the English Premier League.
+    Determine which team had the smallest goal differential that season.
+    The first line of the CSV file will be column headers:
+
+        Team,Games,Wins,Losses,Draws,Goals For,Goals Against
+
+    Write unit tests with Pytest to test your program.
+"""
+import csv_reader
+
+
+def get_name_and_diff(team_stats):
+    diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
+    return team_stats["Team"], abs(diff)
+
+
+def get_min_score_difference(filename):
+    with open(filename, "r", newline="") as csv_data:
+        return min(
+            csv_reader.get_next_result(csv_data, get_name_and_diff),
+            key=lambda item: item[1],
+        )
--- a/python-interview-problems-parsing-csv/full_code/football_pandas.py
+++ b/python-interview-problems-parsing-csv/full_code/football_pandas.py
+import pandas as pd
+
+
+def read_data(csv_file):
+    return (
+        pd.read_csv(csv_file)
+        .rename(
+            columns={
+                "Team": "team_name",
+                "Goals For": "goals",
+                "Goals Against": "goals_allowed",
+            }
+        )
+        .assign(goal_difference=lambda df: abs(df.goals - df.goals_allowed))
+    )
+
+
+def get_min_difference(parsed_data):
+    return parsed_data.goal_difference.min()
+
+
+def get_team(parsed_data, min_score_difference):
+    return (
+        parsed_data.query(f"goal_difference == {min_score_difference}")
+        .reset_index()
+        .loc[0, "team_name"]
+    )
+
+
+def get_min_score_difference(csv_file):
+    df = read_data(csv_file)
+    min_diff = get_min_difference(df)
+    team = get_team(df, min_diff)
+    return team, min_diff
--- a/python-interview-problems-parsing-csv/full_code/football_v1.py
+++ b/python-interview-problems-parsing-csv/full_code/football_v1.py
+#!/usr/bin/env python3
+""" Find Minimum Goal Differential
+    Write a program that takes a filename on the command line and processes the
+    CSV contents. The contents will be a CSV file with end-of-season football
+    standings for the English Premier League.
+    Determine which team had the smallest goal differential that season.
+    The first line of the CSV file will be column headers, with each subsequent
+    line showing the data for one team:
+
+        Team,Games,Wins,Losses,Draws,Goals For,Goals Against
+        Arsenal,38,26,9,3,79,36
+
+    The columns labeled "Goals" and "Goals Allowed" contain the total number of
+    goals scored for and against each team in that season (so Arsenal scored 79
+    goals against opponents and had 36 goals scored against them).
+
+    Write a program to read the file, then print the name of the team with the
+    smallest difference in "for" and "against" goals.  Create unit tests with
+    Pytest to test your program.
+"""
+import csv
+
+
+def parse_next_line(csv_file):
+    for line in csv.DictReader(csv_file):
+        yield line
+
+
+def get_name_and_diff(team_stats):
+    diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
+    return team_stats["Team"], abs(diff)
+
+
+def get_min_score_difference(filename):
+    with open(filename, "r", newline="") as csv_file:
+        min_diff = 10000
+        min_team = None
+        for line in parse_next_line(csv_file):
+            team, diff = get_name_and_diff(line)
+            if diff < min_diff:
+                min_diff = diff
+                min_team = team
+    return min_team, min_diff
--- a/python-interview-problems-parsing-csv/full_code/football_v2.py
+++ b/python-interview-problems-parsing-csv/full_code/football_v2.py
+#!/usr/bin/env python3
+""" Find Minimum Goal Differential
+    Write a program that takes a filename on the command line and processes the
+    CSV contents. The contents will be a CSV file with end-of-season football
+    standings for the English Premier League.
+    Determine which team had the smallest goal differential that season.
+    The first line of the CSV file will be column headers:
+
+        Team,Games,Wins,Losses,Draws,Goals For,Goals Against
+
+    Write unit tests with Pytest to test your program.
+"""
+import csv
+
+
+def get_next_name_and_diff(csv_file):
+    for team_stats in csv.DictReader(csv_file):
+        diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
+        yield team_stats["Team"], abs(diff)
+
+
+def get_min_score_difference(filename):
+    with open(filename, "r", newline="") as csv_data:
+        return min(get_next_name_and_diff(csv_data), key=lambda item: item[1])
--- a/python-interview-problems-parsing-csv/full_code/football_v3.py
+++ b/python-interview-problems-parsing-csv/full_code/football_v3.py
+#!/usr/bin/env python3
+""" Find Minimum Goal Differential
+    Write a program that takes a filename on the command line and processes the
+    CSV contents. The contents will be a CSV file with end-of-season football
+    standings for the English Premier League.
+    Determine which team had the smallest goal differential that season.
+    The first line of the CSV file will be column headers:
+
+        Team,Games,Wins,Losses,Draws,Goals For,Goals Against
+
+    Write unit tests with Pytest to test your program.
+"""
+import csv
+
+
+def get_name_and_diff(team_stats):
+    diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
+    return team_stats["Team"], abs(diff)
+
+
+def get_next_name_and_diff(csv_file):
+    for team_stats in csv.DictReader(csv_file):
+        yield get_name_and_diff(team_stats)
+
+
+def get_min_score_difference(filename):
+    with open(filename, "r", newline="") as csv_data:
+        return min(get_next_name_and_diff(csv_data), key=lambda item: item[1])
--- a/python-interview-problems-parsing-csv/full_code/football_v4.py
+++ b/python-interview-problems-parsing-csv/full_code/football_v4.py
+#!/usr/bin/env python3
+""" Find Minimum Goal Differential
+    Write a program that takes a filename on the command line and processes the
+    CSV contents. The contents will be a CSV file with end-of-season football
+    standings for the English Premier League.
+    Determine which team had the smallest goal differential that season.
+    The first line of the CSV file will be column headers:
+
+        Team,Games,Wins,Losses,Draws,Goals For,Goals Against
+
+    Write unit tests with Pytest to test your program.
+"""
+import csv
+
+
+def get_name_and_diff(team_stats):
+    diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
+    return team_stats["Team"], abs(diff)
+
+
+def get_next_name_and_diff(csv_file, func):
+    for team_stats in csv.DictReader(csv_file):
+        yield func(team_stats)
+
+
+def get_min_score_difference(filename):
+    with open(filename, "r", newline="") as csv_data:
+        return min(
+            get_next_name_and_diff(csv_data, get_name_and_diff),
+            key=lambda item: item[1],
+        )
--- a/python-interview-problems-parsing-csv/full_code/test_football_pandas.py
+++ b/python-interview-problems-parsing-csv/full_code/test_football_pandas.py
+import pytest
+import football_pandas as fb
+
+
+@pytest.fixture
+def mock_csv_file(tmp_path):
+    mock_csv_data = [
+        "Team,Games,Wins,Losses,Draws,Goals For,Goals Against",
+        "Liverpool FC, 38, 32, 3, 3, 85, 33",
+        "Norwich City FC, 38, 5, 27, 6, 26, 75",
+    ]
+    datafile = tmp_path / "football.csv"
+    datafile.write_text("\n".join(mock_csv_data))
+    return str(datafile)
+
+
+def test_read_data(mock_csv_file):
+    df = fb.read_data(mock_csv_file)
+    rows, cols = df.shape
+    assert rows == 2
+    # The dataframe df has all seven of the cols in the original dataset plus
+    # the goal_difference col added in read_data().
+    assert cols == 8
+
+
+def test_score_difference(mock_csv_file):
+    df = fb.read_data(mock_csv_file)
+    assert df.team_name[0] == "Liverpool FC"
+    assert df.goal_difference[0] == 52
+    assert df.team_name[1] == "Norwich City FC"
+    assert df.goal_difference[1] == 49
+
+
+def test_get_min_diff(mock_csv_file):
+    df = fb.read_data(mock_csv_file)
+    diff = fb.get_min_difference(df)
+    assert diff == 49
+
+
+def test_get_team_name(mock_csv_file):
+    df = fb.read_data(mock_csv_file)
+    assert fb.get_team(df, 49) == "Norwich City FC"
+    assert fb.get_team(df, 52) == "Liverpool FC"
+
+
+def test_get_min_score(mock_csv_file):
+    assert fb.get_min_score_difference(mock_csv_file) == (
+        "Norwich City FC",
+        49,
+    )
--- a/python-interview-problems-parsing-csv/full_code/test_football_v1.py
+++ b/python-interview-problems-parsing-csv/full_code/test_football_v1.py
+#!/usr/bin/env python3
+""" Pytest functions for CSV Football problem """
+import pytest
+import football_v1 as fb
+
+
+@pytest.fixture
+def mock_csv_data():
+    return [
+        "Team,Games,Wins,Losses,Draws,Goals For,Goals Against",
+        "Liverpool FC, 38, 32, 3, 3, 85, 33",
+        "Norwich City FC, 38, 5, 27, 6, 26, 75",
+    ]
+
+
+@pytest.fixture
+def mock_csv_file(tmp_path, mock_csv_data):
+    datafile = tmp_path / "football.csv"
+    datafile.write_text("\n".join(mock_csv_data))
+    return str(datafile)
+
+
+def test_get_min_score(mock_csv_file):
+    assert fb.get_min_score_difference(mock_csv_file) == (
+        "Norwich City FC",
+        49,
+    )
+
+
+def test_parse_next_line(mock_csv_data):
+    all_lines = [line for line in fb.parse_next_line(mock_csv_data)]
+    assert len(all_lines) == 2
+    for line in all_lines:
+        assert len(line) == 7
+
+
+def test_get_score_difference(mock_csv_data):
+    reader = fb.parse_next_line(mock_csv_data)
+    assert fb.get_name_and_diff(next(reader)) == ("Liverpool FC", 52)
+    assert fb.get_name_and_diff(next(reader)) == ("Norwich City FC", 49)
--- a/python-interview-problems-parsing-csv/full_code/test_football_v2.py
+++ b/python-interview-problems-parsing-csv/full_code/test_football_v2.py
+#!/usr/bin/env python3
+""" Pytest functions for CSV Football problem """
+import pytest
+import football_v2 as fb
+
+
+@pytest.fixture
+def mock_csv_data():
+    return [
+        "Team,Games,Wins,Losses,Draws,Goals For,Goals Against",
+        "Liverpool FC, 38, 32, 3, 3, 85, 33",
+        "Norwich City FC, 38, 5, 27, 6, 26, 75",
+    ]
+
+
+@pytest.fixture
+def mock_csv_file(tmp_path, mock_csv_data):
+    datafile = tmp_path / "football.csv"
+    datafile.write_text("\n".join(mock_csv_data))
+    return str(datafile)
+
+
+def test_get_min_score(mock_csv_file):
+    assert fb.get_min_score_difference(mock_csv_file) == (
+        "Norwich City FC",
+        49,
+    )
+
+
+def test_get_score_difference(mock_csv_data):
+    reader = fb.get_next_name_and_diff(mock_csv_data)
+    assert next(reader) == ("Liverpool FC", 52)
+    assert next(reader) == ("Norwich City FC", 49)
+    with pytest.raises(StopIteration):
+        next(reader)
--- a/python-interview-problems-parsing-csv/full_code/test_weather_final.py
+++ b/python-interview-problems-parsing-csv/full_code/test_weather_final.py
+#!/usr/bin/env python3
+""" Find the day with the highest average temperature.
+    Write a program that takes a filename on the command line and processes the
+    CSV contents. The contents will be a CSV file with a month of weather data,
+    one day per line.
+
+    Determine which day had the highest average temperature where the average
+    temperature is the average of the day's high and low temperatures. This is
+    not normally how average temperature is computed, but it will work for our
+    demonstration.
+
+    The first line of the CSV file will be column headers:
+
+        Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP
+
+    The day number, max temperature, and min temperature are the first three
+    columns.
+
+    Write unit tests with Pytest to test your program.
+"""
+import pytest
+import weather_final as wthr
+
+
+@pytest.fixture
+def mock_csv_data():
+    return [
+        "Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP",
+        "1,88,59,74,53.8,0,280,9.6,270,17,1.6,93,23,1004.5",
+        "2,79,63,71,46.5,0,330,8.7,340,23,3.3,70,28,1004.5",
+    ]
+
+
+@pytest.fixture
+def mock_csv_file(tmp_path, mock_csv_data):
+    datafile = tmp_path / "weather.csv"
+    datafile.write_text("\n".join(mock_csv_data))
+    return str(datafile)
+
+
+def test_get_max_avg(mock_csv_file):
+    day_number, avg = wthr.get_max_avg(mock_csv_file)
+    assert wthr.get_max_avg(mock_csv_file) == (1, 73.5)
--- a/python-interview-problems-parsing-csv/full_code/test_weather_v1.py
+++ b/python-interview-problems-parsing-csv/full_code/test_weather_v1.py
+#!/usr/bin/env python3
+""" Find the day with the highest average temperature.
+    Write a program that takes a filename on the command line and processes the
+    CSV contents. The contents will be a CSV file with a month of weather data,
+    one day per line.
+
+    Determine which day had the highest average temperature where the average
+    temperature is the average of the day's high and low temperatures. This is
+    not normally how average temperature is computed, but it will work for our
+    demonstration.
+
+    The first line of the CSV file will be column headers:
+
+        Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP
+
+    The day number, max temperature, and min temperature are the first three
+    columns.
+
+    Write unit tests with Pytest to test your program.
+"""
+import pytest
+import weather_v1 as wthr
+
+
+@pytest.fixture
+def mock_csv_data():
+    return [
+        "Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP",
+        "1,88,59,74,53.8,0,280,9.6,270,17,1.6,93,23,1004.5",
+        "2,79,63,71,46.5,0,330,8.7,340,23,3.3,70,28,1004.5",
+    ]
+
+
+@pytest.fixture
+def mock_csv_file(tmp_path, mock_csv_data):
+    datafile = tmp_path / "weather.csv"
+    datafile.write_text("\n".join(mock_csv_data))
+    return str(datafile)
+
+
+def test_no_lines():
+    no_data = []
+    for _ in wthr.get_next_day_and_avg(no_data):
+        assert False
+
+
+def test_trailing_blank_lines(mock_csv_data):
+    mock_csv_data.append("")
+    all_lines = [x for x in wthr.get_next_day_and_avg(mock_csv_data)]
+    assert len(all_lines) == 2
+    for line in all_lines:
+        assert len(line) == 2
+
+
+def test_mid_blank_lines(mock_csv_data):
+    mock_csv_data.insert(1, "")
+    all_lines = [x for x in wthr.get_next_day_and_avg(mock_csv_data)]
+    assert len(all_lines) == 2
+    for line in all_lines:
+        assert len(line) == 2
+
+
+def test_get_max_avg(mock_csv_file):
+    assert wthr.get_max_avg(mock_csv_file) == (1, 73.5)
+
+
+def test_get_next_day_and_avg(mock_csv_data):
+    reader = wthr.get_next_day_and_avg(mock_csv_data)
+    assert next(reader) == (1, 73.5)
+    assert next(reader) == (2, 71)
+    with pytest.raises(StopIteration):
+        next(reader)
--- a/python-interview-problems-parsing-csv/full_code/weather_final.py
+++ b/python-interview-problems-parsing-csv/full_code/weather_final.py
+#!/usr/bin/env python3
+""" Find the day with the highest average temperature.
+    Write a program that takes a filename on the command line and processes the
+    CSV contents. The contents will be a CSV file with a month of weather data,
+    one day per line.
+
+    Determine which day had the highest average temperature where the average
+    temperature is the average of the day's high and low temperatures. This is
+    not normally how average temperature is computed, but it will work for our
+    demonstration.
+
+    The first line of the CSV file will be column headers:
+
+        Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP
+
+    The day number, max temperature, and min temperature are the first three
+    columns.
+
+    Write unit tests with Pytest to test your program.
+"""
+import csv_parser
+
+
+def get_name_and_avg(day_stats):
+    day_number = int(day_stats["Day"])
+    avg = (int(day_stats["MxT"]) + int(day_stats["MnT"])) / 2
+    return day_number, avg
+
+
+def get_max_avg(filename):
+    with open(filename, "r", newline="") as csv_file:
+        return max(
+            csv_parser.get_next_result(csv_file, get_name_and_avg),
+            key=lambda item: item[1],
+        )
--- a/python-interview-problems-parsing-csv/full_code/weather_v1.py
+++ b/python-interview-problems-parsing-csv/full_code/weather_v1.py
+#!/usr/bin/env python3
+""" Find the day with the highest average temperature.
+    Write a program that takes a filename on the command line and processes the
+    CSV contents. The contents will be a CSV file with a month of weather data,
+    one day per line.
+
+    Determine which day had the highest average temperature where the average
+    temperature is the average of the day's high and low temperatures. This is
+    not normally how average temperature is computed, but it will work for our
+    demonstration.
+
+    The first line of the CSV file will be column headers:
+
+        Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP
+
+    The day number, max temperature, and min temperature are the first three
+    columns.
+
+    Write unit tests with Pytest to test your program.
+"""
+import csv
+
+
+def get_next_day_and_avg(csv_file):
+    for day_stats in csv.DictReader(csv_file):
+        day_number = int(day_stats["Day"])
+        avg = (int(day_stats["MxT"]) + int(day_stats["MnT"])) / 2
+        yield day_number, avg
+
+
+def get_max_avg(filename):
+    with open(filename, "r", newline="") as csv_file:
+        return max(get_next_day_and_avg(csv_file), key=lambda item: item[1])
--- a/python-interview-problems-parsing-csv/full_code/weather_v2.py
+++ b/python-interview-problems-parsing-csv/full_code/weather_v2.py
+#!/usr/bin/env python3
+""" Find the day with the highest average temperature.
+    Write a program that takes a filename on the command line and processes the
+    CSV contents. The contents will be a CSV file with a month of weather data,
+    one day per line.
+
+    Determine which day had the highest average temperature where the average
+    temperature is the average of the day's high and low temperatures. This is
+    not normally how average temperature is computed, but it will work for our
+    demonstration.
+
+    The first line of the CSV file will be column headers:
+
+        Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP
+
+    The day number, max temperature, and min temperature are the first three
+    columns.
+
+    Write unit tests with Pytest to test your program.
+"""
+import csv
+
+
+def get_day_and_avg(day_stats):
+    day_number = int(day_stats["Day"])
+    avg = (int(day_stats["MxT"]) + int(day_stats["MnT"])) / 2
+    return day_number, avg
+
+
+def get_next_day_and_avg(csv_file, func):
+    for day_stats in csv.DictReader(csv_file):
+        yield func(day_stats)
+
+
+def get_max_avg(filename):
+    with open(filename, "r", newline="") as csv_file:
+        return max(
+            get_next_day_and_avg(csv_file, get_day_and_avg),
+            key=lambda item: item[1],
+        )
--- a/python-interview-problems-parsing-csv/football_v1.py
+++ b/python-interview-problems-parsing-csv/football_v1.py
--- a/python-interview-problems-parsing-csv/test_football_v1.py
+++ b/python-interview-problems-parsing-csv/test_football_v1.py
--- a/python-interview-problems-parsing-csv/test_weather_v1.py
+++ b/python-interview-problems-parsing-csv/test_weather_v1.py
--- a/python-interview-problems-parsing-csv/weather_v1.py
+++ b/python-interview-problems-parsing-csv/weather_v1.py