提交 c0904135 编写于 作者: D Doug Farrell

Reworking the code to match the updated article text

上级 68c47a7a
# Example 1
This example uses the raw temp_data.csv file
to get data into the program and run
various Python functions on it.
In particular getting the average temperature
for a date across all the samples, and getting the
average temperature for every week for the entire
year and returning it sorted.
\ No newline at end of file
"""
This program gathers information from the temp_data.csv file about temperature
This is the example 4 program file
"""
import csv
from pkg_resources import resource_filename
from datetime import datetime
from datetime import timedelta
from typing import List, Dict
from collections import defaultdict
from importlib import resources
from typing import List
from uuid import uuid4
import pandas as pd
from treelib import Tree
def get_temperature_data(filepath: str) -> Dict:
"""
This function gets the temperature data from the csv file
"""
with open(filepath) as csvfile:
csv_reader = csv.DictReader(csvfile)
data = {row["name"]: row for row in csv_reader}
for value in data.values():
value.pop("name")
return data
def get_author_book_publisher_data(filepath: str) -> pd.DataFrame:
"""Get book data from the csv file"""
return pd.read_csv(filepath)
def get_average_temp_by_date(
date_string: str, temperature_data: Dict
) -> float:
"""
This function gets the average temperature for all the samples
taken by the students by date
:param date_string: date to find average temperature for
:param connection: database connection
:return: average temp for date, or None if not found
def get_total_number_of_books_by_publishers(data, ascending=True) -> List:
"""
# Target date
target_date = datetime.strptime(date_string, "%Y-%m-%d").date()
:param data: author/book/publisher data
:param direction: direction to sort the data by
:return: List of sorted data
"""
return (
data.loc[:, ["title", "publisher"]]
.groupby("publisher")["title"]
.count()
.sort_values(ascending=ascending)
)
# Iterate through the data and get the data
data = []
for samples in temperature_data.values():
# Iterate through the samples
for sample_date, sample in samples.items():
def get_total_number_of_authors_by_publishers(data, ascending=True) -> List:
"""
:param data: author/book/publisher data
:param direction: direction to sort the data by
:return: List of sorted data
"""
return (
data.assign(name=data.fname.str.cat(data.lname, sep=" "))
.loc[:, ["name", "publisher"]]
.groupby("publisher")["name"]
.nunique()
.sort_values(ascending=ascending)
)
# Generate a date range for the sample
min_date = datetime.strptime(
sample_date, "%Y-%m-%d"
).date() + timedelta(days=-3)
max_date = datetime.strptime(
sample_date, "%Y-%m-%d"
).date() + timedelta(days=3)
if min_date <= target_date <= max_date:
data.append(float(sample))
# Get the average temp
return sum(data) / len(data)
def add_new_book(data, author_name, book_title, publisher_name):
"""
This function adds a new book to the data
:param data: author/book/publisher data
:param author_name: author's name
:param book_title: book title
:param publisher_name: publishers name
:return: updated data
"""
# Does the book exist?
if book_title in data["title"].values:
raise Exception("Book exists", book_title)
# Does the author exist?
fname, _, lname = author_name.partition(" ")
if not any(
data["fname"].str.contains(fname) & data["lname"].str.contains(lname)
):
raise Exception("No author found", author_name)
# Does the publisher exist?
if publisher_name not in data["publisher"].values:
raise Exception("No publisher found", publisher_name)
# Add the new book
return data.append(
{
"fname": fname,
"lname": lname,
"title": book_title,
"publisher": publisher_name,
},
ignore_index=True,
)
def get_average_temp_sorted(direction: str, temperature_data: Dict) -> List:
dir = direction.lower()
if dir not in ["asc", "desc"]:
raise Exception(f"Unknown direction: {direction}")
results = defaultdict(int)
for data in temperature_data.values():
for date, value in data.items():
results[date] += float(value)
def output_hierarchical_author_data(data):
"""
This function outputs the author/book/publisher information in
a hierarchical manner
for date, total in results.items():
results[date] = float(total) / float(len(temperature_data.keys()))
:param authors: the collection of root author objects
:return: None
"""
authors = data.assign(name=data.fname.str.cat(data.lname, sep=" "))
# Convert dictionary to list
results = results.items()
authors_tree = Tree()
authors_tree.create_node("Authors", "authors")
for author, books in authors.groupby("name"):
authors_tree.create_node(author, author, parent="authors")
for book, publishers in books.groupby("title")["publisher"]:
authors_tree.create_node(book, book, parent=author)
for publisher in publishers:
authors_tree.create_node(publisher, uuid4(), parent=book)
# Sort the list in the appropriate order
return sorted(
results, key=lambda v: v[1], reverse=False if dir == "asc" else True
)
# Output the hierarchical authors data
authors_tree.show()
def main():
"""
The main entry point of the program
"""
print("starting")
# Get the temperature data into a dictionary structure
filepath = resource_filename("project.data", "temp_data.csv")
temperature_data = get_temperature_data(filepath)
# Connect to the database using SqlAlchemy
with resources.path(
"project.data", "author_book_publisher.csv"
) as filepath:
author_book_publisher_data = get_author_book_publisher_data(filepath)
# Get the total number of books printed by each publisher
total_books_by_publisher = get_total_number_of_books_by_publishers(
author_book_publisher_data, ascending=False
)
# Get the average temperature by date
date_string = "2019-02-10"
average_temp = get_average_temp_by_date(date_string, temperature_data)
print(f"Average temp {date_string}: {average_temp:.2f}")
for publisher, total_books in total_books_by_publisher.items():
print(f"Publisher: {publisher}, total books: {total_books}")
print()
# Get the average temps for the year sorted ascending or descending
average_temps = get_average_temp_sorted("asc", temperature_data)
for date, average_temp in average_temps:
print(f"Date: {date}, average temp: {average_temp:.2f}")
# Get the total number of authors each publisher publishes
total_authors_by_publisher = get_total_number_of_authors_by_publishers(
author_book_publisher_data, ascending=False
)
for publisher, total_authors in total_authors_by_publisher.items():
print(f"Publisher: {publisher}, total authors: {total_authors}")
print()
# Output hierarchical authors data
output_hierarchical_author_data(author_book_publisher_data)
# Add a new book to the data structure
author_book_publisher_data = add_new_book(
author_book_publisher_data,
author_name="Stephen King",
book_title="The Stand",
publisher_name="Random House",
)
# Output the updated hierarchical authors data
output_hierarchical_author_data(author_book_publisher_data)
print("finished")
......
# Example 2
This example uses temp_data.db database
file to get data into the program and run
various functions on it that use Sqlite SQL
to access the data.
In particular getting the average temperature
for a date across all the samples, and getting the
average temperature for every week for the entire
year and returning it sorted.
\ No newline at end of file
"""
This program gathers information from the database file about temperature
This program gathers information from the temp_data.csv file about temperature
"""
from pkg_resources import resource_filename
from datetime import datetime
from datetime import timedelta
import sqlite3
from typing import List
from uuid import uuid4
from sqlalchemy import create_engine
from sqlalchemy import and_
from sqlalchemy.sql import func, asc, desc
from sqlalchemy.orm import sessionmaker
from treelib import Tree
from project.modules.models import Author
from project.modules.models import Book
from project.modules.models import Publisher
def get_total_number_of_books_by_publishers(session, direction: str) -> List:
"""
Get a list of publishers and the total number of books
they've published
:param session: database session to work with
:param direction:
:return:
"""
if direction not in ["asc", "desc"]:
raise Exception(f"Unknown direction: {direction}")
dir = desc if direction == "desc" else asc
return (
session.query(
Publisher.name, func.count(Book.title).label("total_books")
)
.join(Publisher.books)
.group_by(Publisher.name)
.order_by(dir("total_books"))
)
def get_average_temp_by_date(date_string, connection):
def get_total_number_of_authors_by_publishers(session, direction: str) -> List:
"""
This function gets the average temperature for all the samples
taken by the students by date
Get a list of publishers and the total number of authors
they've published
:param date_string: date to find average temperature for
:param connection: database connection
:return: average temp for date, or None if not found
:param session: database session to work with
:param direction:
:return:
"""
# Target date
target_date = datetime.strptime(date_string, "%Y-%m-%d").date()
min_date = target_date + timedelta(days=-3)
max_date = target_date + timedelta(days=3)
cursor = connection.cursor()
sql = """
SELECT
AVG(value) AS average
FROM temperature_data
WHERE date BETWEEN ? and ?
if direction not in ["asc", "desc"]:
raise Exception(f"Unknown direction: {direction}")
dir = desc if direction == "desc" else asc
return (
session.query(
Publisher.name, func.count(Author.fname).label("total_authors")
)
.join(Publisher.authors)
.group_by(Publisher.name)
.order_by(dir("total_authors"))
)
def get_authors(session) -> List:
"""
result = cursor.execute(sql, (min_date, max_date)).fetchone()
return result[0] if result else None
This function returns a list of author objects
:param session: database session to work with
:return: list of Author objects
"""
return session.query(Author).order_by(Author.lname).all()
def get_average_temp_sorted(direction: str, connection) -> list:
dir = direction.lower()
if dir not in ["asc", "desc"]:
raise Exception(f"Unknown direction: {direction}")
def add_new_item(session, author_name, book_title, publisher_name):
"""
This function adds a new item to the database
:param session: database session to work with
:param author_name: authors full name
:param book_title: book title
:param publisher_name: publisher of book
:return: None
"""
# Get the author if exists
fname, lname = author_name.split(" ")
author = (
session.query(Author)
.filter(and_(Author.fname == fname, Author.lname == lname))
.one_or_none()
)
# Get the book if exists
book = (
session.query(Book)
.filter(Book.title == book_title)
.one_or_none()
)
# Get the publisher if exists
publisher = (
session.query(Publisher)
.filter(Publisher.name == publisher_name)
.one_or_none()
)
# Does new item exist?
if author is not None and book is not None and publisher is not None:
raise Exception(
"New item exists",
author_name,
book_title,
publisher_name
)
# Create the author if didn't exist
if author is None:
author = Author(fname=fname, lname=lname)
# Create the book if didn't exist
if book is None:
book = Book(title=book_title)
# Create the publisher if didn't exist
if publisher is None:
publisher = Publisher(name=publisher_name)
# Add the book to the author's books collection if didn't exist
if book not in author.books:
author.books.append(book)
# Add the author to the publisher's collection if didn't exist
if author not in publisher.authors:
publisher.authors.append(author)
# Add the book to the publisher's collection if didn't exist
if book not in publisher.books:
publisher.books.append(book)
# Commit to the database
session.commit()
def output_hierarchical_author_data(authors):
"""
This function outputs the author/book/publisher information in
a hierarchical manner
cursor = connection.cursor()
sql = f"""
SELECT
date,
AVG(value) AS average_temp
FROM temperature_data
GROUP BY date
ORDER BY average_temp {dir}
:param authors: the collection of root author objects
:return: None
"""
results = cursor.execute(sql).fetchall()
return results
authors_tree = Tree()
authors_tree.create_node("Authors", "authors")
for author in authors:
authors_tree.create_node(
f"{author.fname} {author.lname}",
f"{author.fname} {author.lname}",
parent="authors",
)
for book in author.books:
authors_tree.create_node(
f"{book.title}",
f"{book.title}",
parent=f"{author.fname} {author.lname}",
)
for publisher in book.publishers:
authors_tree.create_node(
f"{publisher.name}", uuid4(), parent=f"{book.title}"
)
# Output the hierarchical authors data
authors_tree.show()
def main():
"""
Main entry point of program
"""
print("starting")
# Connect to the sqlite database
sqlite_filepath = resource_filename("project.data", "temp_data.db")
connection = sqlite3.connect(sqlite_filepath)
# Get the average temperature by date
date_string = "2019-02-10"
average_temp = get_average_temp_by_date(date_string, connection)
print(f"Average temp {date_string}: {average_temp:.2f}")
# Connect to the database using SqlAlchemy
sqlite_filepath = resource_filename(
"project.data", "author_book_publisher.db"
)
engine = create_engine(f"sqlite:///{sqlite_filepath}")
Session = sessionmaker()
Session.configure(bind=engine)
session = Session()
# Get the total number of books printed by each publisher
total_books_by_publisher = get_total_number_of_books_by_publishers(
session, "desc"
)
for row in total_books_by_publisher:
print(f"Publisher: {row.name}, total books: {row.total_books}")
print()
# Get the average temps for the year sorted ascending or descending
average_temps = get_average_temp_sorted("asc", connection)
for date, average_temp in average_temps:
print(f"Date: {date}, average temp: {average_temp:.2f}")
# Get the total number of authors each publisher publishes
total_authors_by_publisher = get_total_number_of_authors_by_publishers(
session, "desc"
)
for row in total_authors_by_publisher:
print(f"Publisher: {row.name}, total authors: {row.total_authors}")
print()
# Output hierarchical authors data
authors = get_authors(session)
output_hierarchical_author_data(authors)
# Add a new book
add_new_item(
session,
author_name="Stephen King",
book_title="The Stand",
publisher_name="Random House",
)
# Output the updated hierarchical authors data
authors = get_authors(session)
output_hierarchical_author_data(authors)
print("finished")
......
# Example 3
This example uses SqlAlchemy to access the temp_data.db
database to get data into the program and run
various SqlAlchemy object methods to get the data.
In particular getting the average temperature
for a date across all the samples, and getting a list
of all the average temps for the year in sorted order.
\ No newline at end of file
......@@ -11,7 +11,7 @@ Chinook Database Web Server
{% endblock %}
{% block navbar %}
<nav class="navbar navbar-expand-lg fixed-top navbar-dark" style="background-color: #563D7C;">
<nav class="navbar navbar-expand-lg fixed-top navbar-dark" style="background-color: #361Ddc;">
<a class="navbar-brand" href="#">Example 8</a>
<button class="navbar-toggler" type="button" data-toggle="collapse" data-target="#navbarNavAltMarkup" aria-controls="navbarNavAltMarkup" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
......
"""
This program gathers information from the temp_data.csv file about temperature
"""
from pkg_resources import resource_filename
from datetime import datetime
from datetime import timedelta
from sqlalchemy import create_engine
from sqlalchemy import Column, String, Integer, Float, Date
from sqlalchemy.sql import func, and_, asc, desc
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
class TemperatureData(Base):
__tablename__ = "temperature_data"
id = Column(Integer, primary_key=True)
name = Column(String)
date = Column(Date)
value = Column(Float)
def get_average_temp_by_date(date_string, session) -> float:
"""
This function gets the average temperature for all the samples
taken by the students by date
:param date_string: date to find average temperature for
:param session: SqlAlchemy session
:return: average temp for date, or None if not found
"""
target_date = datetime.strptime(date_string, "%Y-%m-%d").date()
min_date = target_date + timedelta(days=-3)
max_date = target_date + timedelta(days=3)
result = (
session.query(func.avg(TemperatureData.value))
.filter(
and_(
TemperatureData.date >= min_date,
TemperatureData.date <= max_date,
)
)
.one()
)
return result[0]
def get_average_temp_sorted(direction: str, session) -> list:
if direction.lower() not in ["asc", "desc"]:
raise Exception(f"Unknown direction: {direction}")
dir = asc if direction.lower() == "asc" else desc
results = (
session.query(
TemperatureData.date,
func.avg(TemperatureData.value).label("average_temp"),
)
.group_by(TemperatureData.date)
.order_by(dir("average_temp"))
.all()
)
return results
def main():
print("starting")
# Connect to the database using SqlAlchemy
sqlite_filepath = resource_filename("project.data", "temp_data.db")
engine = create_engine(f"sqlite:///{sqlite_filepath}")
Session = sessionmaker()
Session.configure(bind=engine)
session = Session()
# Get the average temperature by date
date_string = "2019-02-10"
average_temp = get_average_temp_by_date(date_string, session)
print(f"Average temp {date_string}: {average_temp:.2f}")
print()
# Get the average temps for the year sorted ascending or descending
average_temps = get_average_temp_sorted("asc", session)
for row in average_temps:
print(f"Date: {row.date}, average temp: {row.average_temp:.2f}")
print()
print("finished")
if __name__ == "__main__":
main()
# Example 4
This example uses the raw author_book_publisher.csv file to
get data into the program and run various python functions
on it.
The csv file uses redundant data to hold relationship data
in the file. The main.py program examines this file to use
those reletionships to get different kinds of information
from the file.
\ No newline at end of file
"""
This is the example 4 program file
"""
import csv
import copy
from pkg_resources import resource_filename
from typing import List
from collections import defaultdict
from uuid import uuid4
from treelib import Tree
def get_author_book_publisher_data(filepath: str) -> List:
"""
This function gets the temperature data from the csv file
"""
with open(filepath) as csvfile:
csv_reader = csv.DictReader(csvfile)
data = [row for row in csv_reader]
return data
def get_total_number_of_books_by_publishers(data, direction) -> List:
"""
:param data: author/book/publisher data
:param direction: direction to sort the data by
:return: List of sorted data
"""
if direction not in ["asc", "desc"]:
raise Exception(f"Unknown direction: {direction}")
# Get total number of books for each publisher
publishers = defaultdict(int)
for row in data:
publishers[row["publisher"]] += 1
# Convert the dictionary to a list of tuples and sort it
return sorted(
[(k, v) for k, v in publishers.items()],
key=lambda v: v[1],
reverse=False if direction == "asc" else True,
)
def get_total_number_of_authors_by_publishers(data, direction: str) -> List:
"""
:param data: author/book/publisher data
:param direction: direction to sort the data by
:return: List of sorted data
"""
if direction not in ["asc", "desc"]:
raise Exception("Unknown direction", direction)
# Get total number of authors for each publisher
publishers = defaultdict(list)
for row in data:
publishers[row["publisher"]] = []
for row in data:
author = f"{row['fname']} {row['lname']}"
if author not in publishers[row["publisher"]]:
publishers[row["publisher"]].append(author)
return sorted(
[(k, len(v)) for k, v in publishers.items()],
key=lambda v: v[1],
reverse=False if direction == "asc" else True,
)
def get_authors(data) -> List:
"""
This function returns a list of authors, including their hierarchical
data
:param data: author/book/publisher data
:return: list of authors data
"""
# Get the authors
authors = {f"{row['fname']} {row['lname']}": {} for row in data}
# Get the books/publisher associated with the authors hierarchy
for row in data:
author = f"{row['fname']} {row['lname']}"
title = row["title"]
authors[author][title] = set()
publisher = row["publisher"]
authors[author][title].add(publisher)
return authors
def add_new_item(data, author_name, book_title, publisher_name):
"""
This function adds a new item (author, book, publisher) to the data
:param data: author/book/publisher data
:param author_name: author's name
:param book_title: book title
:param publisher_name: publishers name
:return: updated data
"""
# Iterate through the data
new_item_exists = False
for row in data:
author_exists = author_name == f"{row['fname']} {row['lname']}"
book_exists = book_title == row["title"]
publisher_exists = publisher_name == row["publisher"]
# Set new_item_exists flag if new item found in data
if author_exists and book_exists and publisher_exists:
new_item_exists = True
break
# Does the new item exist already?
if new_item_exists:
raise Exception(
"New item exists",
author_name,
book_title,
publisher_name
)
# Don't modify the input data
new_data = copy.deepcopy(data)
# Add the new item
fname, lname = author_name.split(" ")
new_data.append(
{
"fname": fname,
"lname": lname,
"title": book_title,
"publisher": publisher_name,
}
)
return new_data
def output_hierarchical_author_data(authors):
"""
This function outputs the author/book/publisher information in
a hierarchical manner
:param authors: the collection of root author objects
:return: None
"""
authors_tree = Tree()
authors_tree.create_node("Authors", "authors")
for author, books in authors.items():
authors_tree.create_node(author, author, parent="authors")
for book, publishers in books.items():
authors_tree.create_node(book, book, parent=author)
for publisher in publishers:
authors_tree.create_node(publisher, uuid4(), parent=book)
# Output the hierarchical authors data
authors_tree.show()
def main():
"""
The main entry point of the program
"""
print("starting")
# Get the data into memory
filepath = resource_filename("project.data", "author_book_publisher.csv")
author_book_publisher_data = get_author_book_publisher_data(filepath)
# Get the total number of books printed by each publisher
total_books_by_publisher = get_total_number_of_books_by_publishers(
author_book_publisher_data, "desc"
)
for publisher, total_books in total_books_by_publisher:
print(f"Publisher: {publisher}, total books: {total_books}")
print()
# Get the total number of authors each publisher publishes
total_authors_by_publisher = get_total_number_of_authors_by_publishers(
author_book_publisher_data, "desc"
)
for publisher, total_authors in total_authors_by_publisher:
print(f"Publisher: {publisher}, total authors: {total_authors}")
print()
# Output hierarchical authors data
authors = get_authors(author_book_publisher_data)
output_hierarchical_author_data(authors)
# Add a new book to the data structure
author_book_publisher_data = add_new_item(
author_book_publisher_data,
author_name="Stephen King",
book_title="The Stand",
publisher_name="Random House",
)
# Output the updated hierarchical authors data
authors = get_authors(author_book_publisher_data)
output_hierarchical_author_data(authors)
print("finished")
if __name__ == "__main__":
main()
"""
This is the example 4 program file
"""
from importlib import resources
from typing import List
from uuid import uuid4
import pandas as pd
from treelib import Tree
def get_author_book_publisher_data(filepath: str) -> pd.DataFrame:
"""Get book data from the csv file"""
return pd.read_csv(filepath)
def get_total_number_of_books_by_publishers(data, ascending=True) -> List:
"""
:param data: author/book/publisher data
:param direction: direction to sort the data by
:return: List of sorted data
"""
return (
data.loc[:, ["title", "publisher"]]
.groupby("publisher")["title"]
.count()
.sort_values(ascending=ascending)
)
def get_total_number_of_authors_by_publishers(data, ascending=True) -> List:
"""
:param data: author/book/publisher data
:param direction: direction to sort the data by
:return: List of sorted data
"""
return (
data.assign(name=data.fname.str.cat(data.lname, sep=" "))
.loc[:, ["name", "publisher"]]
.groupby("publisher")["name"]
.nunique()
.sort_values(ascending=ascending)
)
def add_new_book(data, author_name, book_title, publisher_name):
"""
This function adds a new book to the data
:param data: author/book/publisher data
:param author_name: author's name
:param book_title: book title
:param publisher_name: publishers name
:return: updated data
"""
# Does the book exist?
if book_title in data["title"].values:
raise Exception("Book exists", book_title)
# Does the author exist?
fname, _, lname = author_name.partition(" ")
if not any(
data["fname"].str.contains(fname) & data["lname"].str.contains(lname)
):
raise Exception("No author found", author_name)
# Does the publisher exist?
if publisher_name not in data["publisher"].values:
raise Exception("No publisher found", publisher_name)
# Add the new book
return data.append(
{
"fname": fname,
"lname": lname,
"title": book_title,
"publisher": publisher_name,
},
ignore_index=True,
)
def output_hierarchical_author_data(data):
"""
This function outputs the author/book/publisher information in
a hierarchical manner
:param authors: the collection of root author objects
:return: None
"""
authors = data.assign(name=data.fname.str.cat(data.lname, sep=" "))
authors_tree = Tree()
authors_tree.create_node("Authors", "authors")
for author, books in authors.groupby("name"):
authors_tree.create_node(author, author, parent="authors")
for book, publishers in books.groupby("title")["publisher"]:
authors_tree.create_node(book, book, parent=author)
for publisher in publishers:
authors_tree.create_node(publisher, uuid4(), parent=book)
# Output the hierarchical authors data
authors_tree.show()
def main():
"""
The main entry point of the program
"""
print("starting")
# Connect to the database using SqlAlchemy
with resources.path(
"project.data", "author_book_publisher.csv"
) as filepath:
author_book_publisher_data = get_author_book_publisher_data(filepath)
# Get the total number of books printed by each publisher
total_books_by_publisher = get_total_number_of_books_by_publishers(
author_book_publisher_data, ascending=False
)
for publisher, total_books in total_books_by_publisher.items():
print(f"Publisher: {publisher}, total books: {total_books}")
print()
# Get the total number of authors each publisher publishes
total_authors_by_publisher = get_total_number_of_authors_by_publishers(
author_book_publisher_data, ascending=False
)
for publisher, total_authors in total_authors_by_publisher.items():
print(f"Publisher: {publisher}, total authors: {total_authors}")
print()
# Output hierarchical authors data
output_hierarchical_author_data(author_book_publisher_data)
# Add a new book to the data structure
author_book_publisher_data = add_new_book(
author_book_publisher_data,
author_name="Stephen King",
book_title="The Stand",
publisher_name="Random House",
)
# Output the updated hierarchical authors data
output_hierarchical_author_data(author_book_publisher_data)
print("finished")
if __name__ == "__main__":
main()
# Example 5
This example uses Sqlite and SQL queries to access the
author_book_publisher.db database file to
get data into the program and run various python functions
on it.
The database captures the relationships between tables, and
this is used to generate interesting data about the database.
However, the original CSV file is essentially re-created in
the *get_authors()* function in order to create the authors
hierarchical data necessary to generate the tree view
presented by the main program.
"""
Example 5 program file
"""
from pkg_resources import resource_filename
import sqlite3
from uuid import uuid4
from typing import List
from treelib import Tree
def get_total_number_of_books_by_publishers(connection, direction) -> List:
"""
Get a list of publishers and the total number of books
they've published
:param connection: connection to the database
:return: List of sorted data
"""
if direction not in ["asc", "desc"]:
raise Exception("Unknown direction", direction)
cursor = connection.cursor()
sql = f"""
SELECT
p.name AS publisher_name,
COUNT(b.title) AS total_books
FROM publisher p
JOIN book_publisher bp ON bp.publisher_id = p.publisher_id
JOIN book b ON b.book_id = bp.book_id
GROUP BY publisher_name
ORDER BY total_books {direction};
"""
return cursor.execute(sql).fetchall()
def get_total_number_of_authors_by_publishers(connection, direction) -> List:
"""
Get a list of publishers and the total number of authors
they've published
:param connection: connection to the database
:return: List of sorted data
"""
if direction not in ["asc", "desc"]:
raise Exception("Unknown direction", direction)
cursor = connection.cursor()
sql = f"""
SELECT
p.name AS publisher_name,
COUNT(a.lname) AS total_authors
FROM publisher p
JOIN author_publisher ap ON ap.publisher_id = p.publisher_id
JOIN author a ON a.author_id = ap.author_id
GROUP BY publisher_name
ORDER BY total_authors {direction};
"""
return cursor.execute(sql).fetchall()
def get_authors(connection) -> List:
"""
This method gets the authors data an builds a hierarchical data
structure from there for use by the treelib
:param connection: connection to the database
:return: list of authors data
"""
cursor = connection.cursor()
sql = f"""
SELECT
a.fname || ' ' || a.lname AS author,
b.title,
p.name
FROM author a
JOIN book b ON b.author_id = a.author_id
JOIN author_publisher ap ON ap.author_id = a.author_id
JOIN publisher p ON p.publisher_id = ap.publisher_id
"""
result = cursor.execute(sql).fetchall()
# Get the authors
authors = {row[0]: {} for row in result}
# Get the books/publisher associated with the authors hierarchy
for row in result:
author = row[0]
title = row[1]
authors[author][title] = set()
publisher = row[2]
authors[author][title].add(publisher)
return authors
def add_new_item(connection, author_name, book_title, publisher_name):
"""
This function adds a new item to the database
:param connection: connection to the database
:param author_name: authors full name
:param book_title: book title
:param publisher_name: publisher of book
:return: None
"""
cursor = connection.cursor()
# Does the author exist in the database?
sql = """
SELECT
author_id
FROM author
WHERE fname = ? AND lname = ?
"""
fname, lname = author_name.split(" ")
row = cursor.execute(sql, (fname, lname)).fetchone()
author_id = row[0] if row is not None else None
# Does the book exist in the database?
sql = """
SELECT
book_id
FROM book
WHERE title = ?
"""
row = cursor.execute(sql, (book_title,)).fetchone()
book_id = row[0] if row is not None else None
# Does the publisher exist in the database?
sql = """
SELECT
publisher_id
FROM publisher
WHERE name = ?
"""
row = cursor.execute(sql, (publisher_name,)).fetchone()
publisher_id = row[0] if row is not None else None
# Does new item exist?
if (
author_id is not None
and book_id is not None
and publisher_id is not None
):
raise Exception(
"New item exists", author_name, book_title, publisher_name
)
# Create the author if didn't exist
if author_id is None:
sql = """INSERT INTO author (fname, lname) VALUES(?, ?)"""
cursor.execute(sql, (fname, lname))
author_id = cursor.lastrowid
# Create the book if didn't exist
if book_id is None:
sql = """
INSERT INTO book
(author_id, title)
VALUES(?, ?)
"""
cursor.execute(sql, (author_id, book_title))
book_id = cursor.lastrowid
# Create the publisher if didn't exist
if publisher_id is None:
sql = """INSERT INTO publisher (publisher_name) VALUES(?)"""
cursor.execute(sql, (publisher_name,))
publisher_id = cursor.lastrowid
# Does author publisher association exist?
sql = """
SELECT
1
FROM author_publisher
WHERE author_id = ?
AND publisher_id = ?
"""
row = cursor.execute(sql, (author_id, publisher_id)).fetchone()
author_publisher_exists = row[0] if row is not None else None
# Create author publisher association is necessary
if author_publisher_exists is None:
sql = """
INSERT INTO author_publisher
(author_id, publisher_id)
VALUES(?, ?)
"""
cursor.execute(sql, (author_id, publisher_id))
# Does book publisher association exist?
sql = """
SELECT
1
FROM book_publisher
WHERE book_id = ?
AND publisher_id = ?
"""
row = cursor.execute(sql, (book_id, publisher_id)).fetchone()
book_publisher_exists = row[0] if row is not None else None
# Create book publisher association is necessary
if book_publisher_exists is None:
sql = """
INSERT INTO book_publisher
(book_id, publisher_id)
VALUES(?, ?)
"""
cursor.execute(sql, (book_id, publisher_id))
# Commit the transactions to the database
connection.commit()
def output_hierarchical_author_data(authors):
"""
This function outputs the author/book/publisher information in
a hierarchical manner
:param authors: the collection of root author objects
:return: None
"""
authors_tree = Tree()
authors_tree.create_node("Authors", "authors")
for author, books in authors.items():
authors_tree.create_node(author, author, parent="authors")
for book, publishers in books.items():
authors_tree.create_node(book, book, parent=author)
for publisher in publishers:
authors_tree.create_node(publisher, uuid4(), parent=book)
# Output the hierarchical authors data
authors_tree.show()
def main():
"""
Main program entry point
"""
print("starting")
# Connect to the sqlite database
sqlite_filepath = resource_filename(
"project.data", "author_book_publisher.db"
)
connection = sqlite3.connect(sqlite_filepath)
# Get the total number of books printed by each publisher
total_books_by_publisher = get_total_number_of_books_by_publishers(
connection, "desc"
)
for publisher, total_books in total_books_by_publisher:
print(f"Publisher: {publisher}, total books: {total_books}")
print()
# Get the total number of authors each publisher publishes
total_authors_by_publisher = get_total_number_of_authors_by_publishers(
connection, "desc"
)
for publisher, total_authors in total_authors_by_publisher:
print(f"Publisher: {publisher}, total authors: {total_authors}")
print()
# Output hierarchical authors data
authors = get_authors(connection)
output_hierarchical_author_data(authors)
# Add a new book
add_new_item(
connection,
author_name="Stephen King",
book_title="The Stand",
publisher_name="Random House",
)
# Output hierarchical authors data
authors = get_authors(connection)
output_hierarchical_author_data(authors)
print("finished")
if __name__ == "__main__":
main()
"""
This program gathers information from the temp_data.csv file about temperature
"""
from pkg_resources import resource_filename
from typing import List
from uuid import uuid4
from sqlalchemy import create_engine
from sqlalchemy import and_
from sqlalchemy.sql import func, asc, desc
from sqlalchemy.orm import sessionmaker
from treelib import Tree
from project.modules.models import Author
from project.modules.models import Book
from project.modules.models import Publisher
def get_total_number_of_books_by_publishers(session, direction: str) -> List:
"""
Get a list of publishers and the total number of books
they've published
:param session: database session to work with
:param direction:
:return:
"""
if direction not in ["asc", "desc"]:
raise Exception(f"Unknown direction: {direction}")
dir = desc if direction == "desc" else asc
return (
session.query(
Publisher.name, func.count(Book.title).label("total_books")
)
.join(Publisher.books)
.group_by(Publisher.name)
.order_by(dir("total_books"))
)
def get_total_number_of_authors_by_publishers(session, direction: str) -> List:
"""
Get a list of publishers and the total number of authors
they've published
:param session: database session to work with
:param direction:
:return:
"""
if direction not in ["asc", "desc"]:
raise Exception(f"Unknown direction: {direction}")
dir = desc if direction == "desc" else asc
return (
session.query(
Publisher.name, func.count(Author.fname).label("total_authors")
)
.join(Publisher.authors)
.group_by(Publisher.name)
.order_by(dir("total_authors"))
)
def get_authors(session) -> List:
"""
This function returns a list of author objects
:param session: database session to work with
:return: list of Author objects
"""
return session.query(Author).order_by(Author.lname).all()
def add_new_item(session, author_name, book_title, publisher_name):
"""
This function adds a new item to the database
:param session: database session to work with
:param author_name: authors full name
:param book_title: book title
:param publisher_name: publisher of book
:return: None
"""
# Get the author if exists
fname, lname = author_name.split(" ")
author = (
session.query(Author)
.filter(and_(Author.fname == fname, Author.lname == lname))
.one_or_none()
)
# Get the book if exists
book = (
session.query(Book)
.filter(Book.title == book_title)
.one_or_none()
)
# Get the publisher if exists
publisher = (
session.query(Publisher)
.filter(Publisher.name == publisher_name)
.one_or_none()
)
# Does new item exist?
if author is not None and book is not None and publisher is not None:
raise Exception(
"New item exists",
author_name,
book_title,
publisher_name
)
# Create the author if didn't exist
if author is None:
author = Author(fname=fname, lname=lname)
# Create the book if didn't exist
if book is None:
book = Book(title=book_title)
# Create the publisher if didn't exist
if publisher is None:
publisher = Publisher(name=publisher_name)
# Add the book to the author's books collection if didn't exist
if book not in author.books:
author.books.append(book)
# Add the author to the publisher's collection if didn't exist
if author not in publisher.authors:
publisher.authors.append(author)
# Add the book to the publisher's collection if didn't exist
if book not in publisher.books:
publisher.books.append(book)
# Commit to the database
session.commit()
def output_hierarchical_author_data(authors):
"""
This function outputs the author/book/publisher information in
a hierarchical manner
:param authors: the collection of root author objects
:return: None
"""
authors_tree = Tree()
authors_tree.create_node("Authors", "authors")
for author in authors:
authors_tree.create_node(
f"{author.fname} {author.lname}",
f"{author.fname} {author.lname}",
parent="authors",
)
for book in author.books:
authors_tree.create_node(
f"{book.title}",
f"{book.title}",
parent=f"{author.fname} {author.lname}",
)
for publisher in book.publishers:
authors_tree.create_node(
f"{publisher.name}", uuid4(), parent=f"{book.title}"
)
# Output the hierarchical authors data
authors_tree.show()
def main():
"""
Main entry point of program
"""
print("starting")
# Connect to the database using SqlAlchemy
sqlite_filepath = resource_filename(
"project.data", "author_book_publisher.db"
)
engine = create_engine(f"sqlite:///{sqlite_filepath}")
Session = sessionmaker()
Session.configure(bind=engine)
session = Session()
# Get the total number of books printed by each publisher
total_books_by_publisher = get_total_number_of_books_by_publishers(
session, "desc"
)
for row in total_books_by_publisher:
print(f"Publisher: {row.name}, total books: {row.total_books}")
print()
# Get the total number of authors each publisher publishes
total_authors_by_publisher = get_total_number_of_authors_by_publishers(
session, "desc"
)
for row in total_authors_by_publisher:
print(f"Publisher: {row.name}, total authors: {row.total_authors}")
print()
# Output hierarchical authors data
authors = get_authors(session)
output_hierarchical_author_data(authors)
# Add a new book
add_new_item(
session,
author_name="Stephen King",
book_title="The Stand",
publisher_name="Random House",
)
# Output the updated hierarchical authors data
authors = get_authors(session)
output_hierarchical_author_data(authors)
print("finished")
if __name__ == "__main__":
main()
"""
This program gathers information from the temp_data.csv file about temperature
"""
from pkg_resources import resource_filename
from typing import List
from uuid import uuid4
from sqlalchemy import create_engine
from sqlalchemy import and_
from sqlalchemy.sql import func, asc, desc
from sqlalchemy.orm import sessionmaker
from treelib import Tree
from project.modules.models import Author
from project.modules.models import Book
from project.modules.models import Publisher
def get_total_number_of_books_by_publishers(session, direction: str) -> List:
"""
Get a list of publishers and the total number of books
they've published
:param session: database session to work with
:param direction:
:return:
"""
if direction not in ["asc", "desc"]:
raise Exception(f"Unknown direction: {direction}")
dir = desc if direction == "desc" else asc
return (
session.query(
Publisher.name, func.count(Book.title).label("total_books")
)
.join(Publisher.books)
.group_by(Publisher.name)
.order_by(dir("total_books"))
)
def get_total_number_of_authors_by_publishers(session, direction: str) -> List:
"""
Get a list of publishers and the total number of authors
they've published
:param session: database session to work with
:param direction:
:return:
"""
if direction not in ["asc", "desc"]:
raise Exception(f"Unknown direction: {direction}")
dir = desc if direction == "desc" else asc
return (
session.query(
Publisher.name, func.count(Author.fname).label("total_authors")
)
.join(Publisher.authors)
.group_by(Publisher.name)
.order_by(dir("total_authors"))
)
def get_authors(session) -> List:
"""
This function returns a list of author objects
:param session: database session to work with
:return: list of Author objects
"""
return session.query(Author).order_by(Author.lname).all()
def add_new_item(session, author_name, book_title, publisher_name):
"""
This function adds a new item to the database
:param session: database session to work with
:param author_name: authors full name
:param book_title: book title
:param publisher_name: publisher of book
:return: None
"""
# Get the author if exists
fname, lname = author_name.split(" ")
author = (
session.query(Author)
.filter(and_(Author.fname == fname, Author.lname == lname))
.one_or_none()
)
# Get the book if exists
book = (
session.query(Book)
.filter(Book.title == book_title)
.one_or_none()
)
# Get the publisher if exists
publisher = (
session.query(Publisher)
.filter(Publisher.name == publisher_name)
.one_or_none()
)
# Does new item exist?
if author is not None and book is not None and publisher is not None:
raise Exception(
"New item exists",
author_name,
book_title,
publisher_name
)
# Create the author if didn't exist
if author is None:
author = Author(fname=fname, lname=lname)
# Create the book if didn't exist
if book is None:
book = Book(title=book_title)
# Create the publisher if didn't exist
if publisher is None:
publisher = Publisher(name=publisher_name)
# Add the book to the author's books collection if didn't exist
if book not in author.books:
author.books.append(book)
# Add the author to the publisher's collection if didn't exist
if author not in publisher.authors:
publisher.authors.append(author)
# Add the book to the publisher's collection if didn't exist
if book not in publisher.books:
publisher.books.append(book)
# Commit to the database
session.commit()
def output_hierarchical_author_data(authors):
"""
This function outputs the author/book/publisher information in
a hierarchical manner
:param authors: the collection of root author objects
:return: None
"""
authors_tree = Tree()
authors_tree.create_node("Authors", "authors")
for author in authors:
authors_tree.create_node(
f"{author.fname} {author.lname}",
f"{author.fname} {author.lname}",
parent="authors",
)
for book in author.books:
authors_tree.create_node(
f"{book.title}",
f"{book.title}",
parent=f"{author.fname} {author.lname}",
)
for publisher in book.publishers:
authors_tree.create_node(
f"{publisher.name}", uuid4(), parent=f"{book.title}"
)
# Output the hierarchical authors data
authors_tree.show()
def main():
"""
Main entry point of program
"""
print("starting")
# Connect to the database using SqlAlchemy
sqlite_filepath = resource_filename(
"project.data", "author_book_publisher.db"
)
engine = create_engine(f"sqlite:///{sqlite_filepath}")
Session = sessionmaker()
Session.configure(bind=engine)
session = Session()
# Get the total number of books printed by each publisher
total_books_by_publisher = get_total_number_of_books_by_publishers(
session, "desc"
)
for row in total_books_by_publisher:
print(f"Publisher: {row.name}, total books: {row.total_books}")
print()
# Get the total number of authors each publisher publishes
total_authors_by_publisher = get_total_number_of_authors_by_publishers(
session, "desc"
)
for row in total_authors_by_publisher:
print(f"Publisher: {row.name}, total authors: {row.total_authors}")
print()
# Output hierarchical authors data
authors = get_authors(session)
output_hierarchical_author_data(authors)
# Add a new book
add_new_item(
session,
author_name="Stephen King",
book_title="The Stand",
publisher_name="Random House",
)
# Output the updated hierarchical authors data
authors = get_authors(session)
output_hierarchical_author_data(authors)
print("finished")
if __name__ == "__main__":
main()
......@@ -21,6 +21,7 @@ book_publisher = Table(
)
class Author(Base):
__tablename__ = "author"
author_id = Column(Integer, primary_key=True)
......@@ -31,7 +32,6 @@ class Author(Base):
"Publisher", secondary=author_publisher, back_populates="authors"
)
class Book(Base):
__tablename__ = "book"
book_id = Column(Integer, primary_key=True)
......
......@@ -5,6 +5,7 @@ setup(
version="1.0",
packages=find_packages(),
install_requires=[
"pandas==1.0.3",
"Flask==1.1.1",
"SQLAlchemy==1.3.13",
"Flask-SQLAlchemy==2.4.1",
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册