提交 e8f3b6a2 编写于 作者: J Jim Anderson

Changed directory struct and added readme

上级 a105c783
# Python Interview Problems – Parsing CSV Files
Corresponding code for ["Python Interview Problems – Parsing CSV Files."](https://realpython.com/python-interview-problems-parsing-csv-python-files/)
The `skeleton_code` directory contains pytest fixtures and module files to get you set up to run pytest. There are no tests in the files, which pytest tells you when you run it:
```console
$ pytest test_weather_v1.py
======================================= test session starts ========================================
platform linux -- Python 3.7.1, pytest-6.2.1, py-1.10.0, pluggy-0.13.1
rootdir: /home/jima/coding/materials_realpy/python-interview-problems-parsing-csv/skeleton_code
collected 0 items
====================================== no tests ran in 0.00s =======================================
```
The `full_code` directory contains the source files we used to generate the examples in the article.
Good luck!
\ No newline at end of file
#!/usr/bin/env python3
""" Reusable CSV parser for both football and weather data. """
import csv
def get_next_result(csv_file, func):
for stats in csv.DictReader(csv_file):
yield func(stats)
#!/usr/bin/env python3
""" Find Minimum Goal Differential
Write a program that takes a filename on the command line and processes the
CSV contents. The contents will be a CSV file with end-of-season football
standings for the English Premier League.
Determine which team had the smallest goal differential that season.
The first line of the CSV file will be column headers:
Team,Games,Wins,Losses,Draws,Goals For,Goals Against
Write unit tests with Pytest to test your program.
"""
import csv_reader
def get_name_and_diff(team_stats):
diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
return team_stats["Team"], abs(diff)
def get_min_score_difference(filename):
with open(filename, "r", newline="") as csv_data:
return min(
csv_reader.get_next_result(csv_data, get_name_and_diff),
key=lambda item: item[1],
)
import pandas as pd
def read_data(csv_file):
return (
pd.read_csv(csv_file)
.rename(
columns={
"Team": "team_name",
"Goals For": "goals",
"Goals Against": "goals_allowed",
}
)
.assign(goal_difference=lambda df: abs(df.goals - df.goals_allowed))
)
def get_min_difference(parsed_data):
return parsed_data.goal_difference.min()
def get_team(parsed_data, min_score_difference):
return (
parsed_data.query(f"goal_difference == {min_score_difference}")
.reset_index()
.loc[0, "team_name"]
)
def get_min_score_difference(csv_file):
df = read_data(csv_file)
min_diff = get_min_difference(df)
team = get_team(df, min_diff)
return team, min_diff
#!/usr/bin/env python3
""" Find Minimum Goal Differential
Write a program that takes a filename on the command line and processes the
CSV contents. The contents will be a CSV file with end-of-season football
standings for the English Premier League.
Determine which team had the smallest goal differential that season.
The first line of the CSV file will be column headers, with each subsequent
line showing the data for one team:
Team,Games,Wins,Losses,Draws,Goals For,Goals Against
Arsenal,38,26,9,3,79,36
The columns labeled "Goals" and "Goals Allowed" contain the total number of
goals scored for and against each team in that season (so Arsenal scored 79
goals against opponents and had 36 goals scored against them).
Write a program to read the file, then print the name of the team with the
smallest difference in "for" and "against" goals. Create unit tests with
Pytest to test your program.
"""
import csv
def parse_next_line(csv_file):
for line in csv.DictReader(csv_file):
yield line
def get_name_and_diff(team_stats):
diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
return team_stats["Team"], abs(diff)
def get_min_score_difference(filename):
with open(filename, "r", newline="") as csv_file:
min_diff = 10000
min_team = None
for line in parse_next_line(csv_file):
team, diff = get_name_and_diff(line)
if diff < min_diff:
min_diff = diff
min_team = team
return min_team, min_diff
#!/usr/bin/env python3
""" Find Minimum Goal Differential
Write a program that takes a filename on the command line and processes the
CSV contents. The contents will be a CSV file with end-of-season football
standings for the English Premier League.
Determine which team had the smallest goal differential that season.
The first line of the CSV file will be column headers:
Team,Games,Wins,Losses,Draws,Goals For,Goals Against
Write unit tests with Pytest to test your program.
"""
import csv
def get_next_name_and_diff(csv_file):
for team_stats in csv.DictReader(csv_file):
diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
yield team_stats["Team"], abs(diff)
def get_min_score_difference(filename):
with open(filename, "r", newline="") as csv_data:
return min(get_next_name_and_diff(csv_data), key=lambda item: item[1])
#!/usr/bin/env python3
""" Find Minimum Goal Differential
Write a program that takes a filename on the command line and processes the
CSV contents. The contents will be a CSV file with end-of-season football
standings for the English Premier League.
Determine which team had the smallest goal differential that season.
The first line of the CSV file will be column headers:
Team,Games,Wins,Losses,Draws,Goals For,Goals Against
Write unit tests with Pytest to test your program.
"""
import csv
def get_name_and_diff(team_stats):
diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
return team_stats["Team"], abs(diff)
def get_next_name_and_diff(csv_file):
for team_stats in csv.DictReader(csv_file):
yield get_name_and_diff(team_stats)
def get_min_score_difference(filename):
with open(filename, "r", newline="") as csv_data:
return min(get_next_name_and_diff(csv_data), key=lambda item: item[1])
#!/usr/bin/env python3
""" Find Minimum Goal Differential
Write a program that takes a filename on the command line and processes the
CSV contents. The contents will be a CSV file with end-of-season football
standings for the English Premier League.
Determine which team had the smallest goal differential that season.
The first line of the CSV file will be column headers:
Team,Games,Wins,Losses,Draws,Goals For,Goals Against
Write unit tests with Pytest to test your program.
"""
import csv
def get_name_and_diff(team_stats):
diff = int(team_stats["Goals For"]) - int(team_stats["Goals Against"])
return team_stats["Team"], abs(diff)
def get_next_name_and_diff(csv_file, func):
for team_stats in csv.DictReader(csv_file):
yield func(team_stats)
def get_min_score_difference(filename):
with open(filename, "r", newline="") as csv_data:
return min(
get_next_name_and_diff(csv_data, get_name_and_diff),
key=lambda item: item[1],
)
import pytest
import football_pandas as fb
@pytest.fixture
def mock_csv_file(tmp_path):
mock_csv_data = [
"Team,Games,Wins,Losses,Draws,Goals For,Goals Against",
"Liverpool FC, 38, 32, 3, 3, 85, 33",
"Norwich City FC, 38, 5, 27, 6, 26, 75",
]
datafile = tmp_path / "football.csv"
datafile.write_text("\n".join(mock_csv_data))
return str(datafile)
def test_read_data(mock_csv_file):
df = fb.read_data(mock_csv_file)
rows, cols = df.shape
assert rows == 2
# The dataframe df has all seven of the cols in the original dataset plus
# the goal_difference col added in read_data().
assert cols == 8
def test_score_difference(mock_csv_file):
df = fb.read_data(mock_csv_file)
assert df.team_name[0] == "Liverpool FC"
assert df.goal_difference[0] == 52
assert df.team_name[1] == "Norwich City FC"
assert df.goal_difference[1] == 49
def test_get_min_diff(mock_csv_file):
df = fb.read_data(mock_csv_file)
diff = fb.get_min_difference(df)
assert diff == 49
def test_get_team_name(mock_csv_file):
df = fb.read_data(mock_csv_file)
assert fb.get_team(df, 49) == "Norwich City FC"
assert fb.get_team(df, 52) == "Liverpool FC"
def test_get_min_score(mock_csv_file):
assert fb.get_min_score_difference(mock_csv_file) == (
"Norwich City FC",
49,
)
#!/usr/bin/env python3
""" Pytest functions for CSV Football problem """
import pytest
import football_v1 as fb
@pytest.fixture
def mock_csv_data():
return [
"Team,Games,Wins,Losses,Draws,Goals For,Goals Against",
"Liverpool FC, 38, 32, 3, 3, 85, 33",
"Norwich City FC, 38, 5, 27, 6, 26, 75",
]
@pytest.fixture
def mock_csv_file(tmp_path, mock_csv_data):
datafile = tmp_path / "football.csv"
datafile.write_text("\n".join(mock_csv_data))
return str(datafile)
def test_get_min_score(mock_csv_file):
assert fb.get_min_score_difference(mock_csv_file) == (
"Norwich City FC",
49,
)
def test_parse_next_line(mock_csv_data):
all_lines = [line for line in fb.parse_next_line(mock_csv_data)]
assert len(all_lines) == 2
for line in all_lines:
assert len(line) == 7
def test_get_score_difference(mock_csv_data):
reader = fb.parse_next_line(mock_csv_data)
assert fb.get_name_and_diff(next(reader)) == ("Liverpool FC", 52)
assert fb.get_name_and_diff(next(reader)) == ("Norwich City FC", 49)
#!/usr/bin/env python3
""" Pytest functions for CSV Football problem """
import pytest
import football_v2 as fb
@pytest.fixture
def mock_csv_data():
return [
"Team,Games,Wins,Losses,Draws,Goals For,Goals Against",
"Liverpool FC, 38, 32, 3, 3, 85, 33",
"Norwich City FC, 38, 5, 27, 6, 26, 75",
]
@pytest.fixture
def mock_csv_file(tmp_path, mock_csv_data):
datafile = tmp_path / "football.csv"
datafile.write_text("\n".join(mock_csv_data))
return str(datafile)
def test_get_min_score(mock_csv_file):
assert fb.get_min_score_difference(mock_csv_file) == (
"Norwich City FC",
49,
)
def test_get_score_difference(mock_csv_data):
reader = fb.get_next_name_and_diff(mock_csv_data)
assert next(reader) == ("Liverpool FC", 52)
assert next(reader) == ("Norwich City FC", 49)
with pytest.raises(StopIteration):
next(reader)
#!/usr/bin/env python3
""" Find the day with the highest average temperature.
Write a program that takes a filename on the command line and processes the
CSV contents. The contents will be a CSV file with a month of weather data,
one day per line.
Determine which day had the highest average temperature where the average
temperature is the average of the day's high and low temperatures. This is
not normally how average temperature is computed, but it will work for our
demonstration.
The first line of the CSV file will be column headers:
Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP
The day number, max temperature, and min temperature are the first three
columns.
Write unit tests with Pytest to test your program.
"""
import pytest
import weather_final as wthr
@pytest.fixture
def mock_csv_data():
return [
"Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP",
"1,88,59,74,53.8,0,280,9.6,270,17,1.6,93,23,1004.5",
"2,79,63,71,46.5,0,330,8.7,340,23,3.3,70,28,1004.5",
]
@pytest.fixture
def mock_csv_file(tmp_path, mock_csv_data):
datafile = tmp_path / "weather.csv"
datafile.write_text("\n".join(mock_csv_data))
return str(datafile)
def test_get_max_avg(mock_csv_file):
day_number, avg = wthr.get_max_avg(mock_csv_file)
assert wthr.get_max_avg(mock_csv_file) == (1, 73.5)
#!/usr/bin/env python3
""" Find the day with the highest average temperature.
Write a program that takes a filename on the command line and processes the
CSV contents. The contents will be a CSV file with a month of weather data,
one day per line.
Determine which day had the highest average temperature where the average
temperature is the average of the day's high and low temperatures. This is
not normally how average temperature is computed, but it will work for our
demonstration.
The first line of the CSV file will be column headers:
Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP
The day number, max temperature, and min temperature are the first three
columns.
Write unit tests with Pytest to test your program.
"""
import pytest
import weather_v1 as wthr
@pytest.fixture
def mock_csv_data():
return [
"Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP",
"1,88,59,74,53.8,0,280,9.6,270,17,1.6,93,23,1004.5",
"2,79,63,71,46.5,0,330,8.7,340,23,3.3,70,28,1004.5",
]
@pytest.fixture
def mock_csv_file(tmp_path, mock_csv_data):
datafile = tmp_path / "weather.csv"
datafile.write_text("\n".join(mock_csv_data))
return str(datafile)
def test_no_lines():
no_data = []
for _ in wthr.get_next_day_and_avg(no_data):
assert False
def test_trailing_blank_lines(mock_csv_data):
mock_csv_data.append("")
all_lines = [x for x in wthr.get_next_day_and_avg(mock_csv_data)]
assert len(all_lines) == 2
for line in all_lines:
assert len(line) == 2
def test_mid_blank_lines(mock_csv_data):
mock_csv_data.insert(1, "")
all_lines = [x for x in wthr.get_next_day_and_avg(mock_csv_data)]
assert len(all_lines) == 2
for line in all_lines:
assert len(line) == 2
def test_get_max_avg(mock_csv_file):
assert wthr.get_max_avg(mock_csv_file) == (1, 73.5)
def test_get_next_day_and_avg(mock_csv_data):
reader = wthr.get_next_day_and_avg(mock_csv_data)
assert next(reader) == (1, 73.5)
assert next(reader) == (2, 71)
with pytest.raises(StopIteration):
next(reader)
#!/usr/bin/env python3
""" Find the day with the highest average temperature.
Write a program that takes a filename on the command line and processes the
CSV contents. The contents will be a CSV file with a month of weather data,
one day per line.
Determine which day had the highest average temperature where the average
temperature is the average of the day's high and low temperatures. This is
not normally how average temperature is computed, but it will work for our
demonstration.
The first line of the CSV file will be column headers:
Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP
The day number, max temperature, and min temperature are the first three
columns.
Write unit tests with Pytest to test your program.
"""
import csv_parser
def get_name_and_avg(day_stats):
day_number = int(day_stats["Day"])
avg = (int(day_stats["MxT"]) + int(day_stats["MnT"])) / 2
return day_number, avg
def get_max_avg(filename):
with open(filename, "r", newline="") as csv_file:
return max(
csv_parser.get_next_result(csv_file, get_name_and_avg),
key=lambda item: item[1],
)
#!/usr/bin/env python3
""" Find the day with the highest average temperature.
Write a program that takes a filename on the command line and processes the
CSV contents. The contents will be a CSV file with a month of weather data,
one day per line.
Determine which day had the highest average temperature where the average
temperature is the average of the day's high and low temperatures. This is
not normally how average temperature is computed, but it will work for our
demonstration.
The first line of the CSV file will be column headers:
Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP
The day number, max temperature, and min temperature are the first three
columns.
Write unit tests with Pytest to test your program.
"""
import csv
def get_next_day_and_avg(csv_file):
for day_stats in csv.DictReader(csv_file):
day_number = int(day_stats["Day"])
avg = (int(day_stats["MxT"]) + int(day_stats["MnT"])) / 2
yield day_number, avg
def get_max_avg(filename):
with open(filename, "r", newline="") as csv_file:
return max(get_next_day_and_avg(csv_file), key=lambda item: item[1])
#!/usr/bin/env python3
""" Find the day with the highest average temperature.
Write a program that takes a filename on the command line and processes the
CSV contents. The contents will be a CSV file with a month of weather data,
one day per line.
Determine which day had the highest average temperature where the average
temperature is the average of the day's high and low temperatures. This is
not normally how average temperature is computed, but it will work for our
demonstration.
The first line of the CSV file will be column headers:
Day,MxT,MnT,AvT,AvDP,1HrP TPcn,PDir,AvSp,Dir,MxS,SkyC,MxR,Mn,R AvSLP
The day number, max temperature, and min temperature are the first three
columns.
Write unit tests with Pytest to test your program.
"""
import csv
def get_day_and_avg(day_stats):
day_number = int(day_stats["Day"])
avg = (int(day_stats["MxT"]) + int(day_stats["MnT"])) / 2
return day_number, avg
def get_next_day_and_avg(csv_file, func):
for day_stats in csv.DictReader(csv_file):
yield func(day_stats)
def get_max_avg(filename):
with open(filename, "r", newline="") as csv_file:
return max(
get_next_day_and_avg(csv_file, get_day_and_avg),
key=lambda item: item[1],
)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册