提交 2abbcc53 编写于 作者: J Julien Cristau

generate_releases: reduce number of sql queries for by-hash files

Instead of doing two queries per filename and per hash function, store a
set of known hashfiles up front and group the UPDATEs in a single query.

Suggested by Colin Watson.
Signed-off-by: NJulien Cristau <jcristau@debian.org>
上级 d6f184c3
...@@ -362,37 +362,49 @@ class ReleaseWriter(object): ...@@ -362,37 +362,49 @@ class ReleaseWriter(object):
out.close() out.close()
os.rename(outfile + '.new', outfile) os.rename(outfile + '.new', outfile)
# Mark all by-hash files as obsolete. We will undo that for the ones
# we still reference later.
query = """ query = """
UPDATE hashfile SET unreferenced = CURRENT_TIMESTAMP UPDATE hashfile SET unreferenced = CURRENT_TIMESTAMP
WHERE suite_id = :id AND unreferenced IS NULL""" WHERE suite_id = :id AND unreferenced IS NULL"""
session.execute(query, {'id': suite.suite_id}) session.execute(query, {'id': suite.suite_id})
if suite.byhash: if suite.byhash:
query = "SELECT path FROM hashfile WHERE suite_id = :id"
q = session.execute(query, {'id': suite.suite_id})
known_hashfiles = set(row[0] for row in q)
updated = []
new = []
# Update the hashfile table with new or updated files
for filename in fileinfo: for filename in fileinfo:
if not os.path.exists(filename): if not os.path.exists(filename):
# probably an uncompressed index we didn't generate # probably an uncompressed index we didn't generate
continue continue
byhashdir = os.path.join(os.path.dirname(filename), 'by-hash')
for h in hashes: for h in hashes:
field = h.release_field field = h.release_field
hashfile = os.path.join(os.path.dirname(filename), 'by-hash', field, fileinfo[filename][field]) hashfile = os.path.join(byhashdir, field, fileinfo[filename][field])
query = "SELECT 1 FROM hashfile WHERE path = :p AND suite_id = :id" if hashfile in known_hashfiles:
q = session.execute( updated.append(hashfile)
query,
{'p': hashfile, 'id': suite.suite_id})
if q.rowcount:
session.execute('''
UPDATE hashfile SET unreferenced = NULL
WHERE path = :p and suite_id = :id''',
{'p': hashfile, 'id': suite.suite_id})
else: else:
session.execute(''' new.append(hashfile)
INSERT INTO hashfile (path, suite_id)
VALUES (:p, :id)''', if updated:
{'p': hashfile, 'id': suite.suite_id}) session.execute("""
UPDATE hashfile SET unreferenced = NULL
WHERE path = ANY(:p) AND suite_id = :id""",
{'p': updated, 'id': suite.suite_id})
if new:
session.execute("""
INSERT INTO hashfile (path, suite_id)
VALUES (:p, :id)""",
[{'p': hashfile, 'id': suite.suite_id} for hashfile in new])
session.commit() session.commit()
if suite.byhash: if suite.byhash:
# Create hardlinks in by-hash directories
for filename in fileinfo: for filename in fileinfo:
if not os.path.exists(filename): if not os.path.exists(filename):
# probably an uncompressed index we didn't generate # probably an uncompressed index we didn't generate
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册