提交 b5d21dfa 编写于 作者: J Joerg Jaspert

Merge commit 'pkern/master' into merge

* commit 'pkern/master':
  Revert "revert all my stupid commits, we'll try this again later when we have a test server"
  Revert "and revert the remainder"
  implement sensible handling of checksum fields in .changes and .dsc
Signed-off-by: NJoerg Jaspert <joerg@debian.org>
2008-08-28 Philipp Kern <pkern@debian.org>
* daklib/utils.py (check_hashes): adapt to different API, check
sizes separately
* daklib/utils.py (parse_changes, parse_deb822): refactor
the string-based logic of parse_changes into a new function
parse_deb822; parse_changes itself remains file-based
* daklib/utils.py (hash_key): gives the key of a hash in the
files dict
* daklib/utils.py (create_hash, check_size): made more readable
* daklib/utils.py (check_hash): just check the hashes and complain
about missing checksums
* daklib/utils.py (check_hash_fields): function to reject unknown
checksums fields
* daklib/utils.py (_ensure_changes_hash, _ensure_dsc_hash): helper
functions for ensure_hashes; check their corresponding manifests'
hashes
* daklib/utils.py (ensure_hashes): retrieve the checksums fields
from the original filecontents blob so that they do not need to
be present in the .dak; refactored the actual checks by calling
the aforementioned helper functions
* daklib/utils.py (parse_checksums): parse a given checksums field
in a manifest and insert the values found into the files dict,
checking the file sizes on the way
2008-09-06 Philipp Kern <pkern@debian.org>
* dak/process_new.py (is_source_in_queue_dir): Access the right
......@@ -50,6 +83,24 @@
* config/debian/cron.dinstall: We dont want i18n to ever fail
dinstall, add a || true
2008-08-15 Mark Hymers <mhy@debian.org>
* daklib/utils.py: Actually import a module before using it.
* daklib/utils.py: Actually check we have basedict before trying to
use it.
* dak/process_accepted.py, dak/process_unchecked.py,
daklib/database.py: Don't change get_files_id to use sha1sum and
sha256sum.
* setup/init_pool.sql, dak/check_archive.py, dak/decode_dot_dak.py,
dak/process_accepted.py, dak/process_unchecked.py, daklib/database.py,
daklib/queue.py, daklib/utils.py: Attempt to add sha1sum and
sha256sums into the database. The complication is that we have to
keep backwards compatibility with the .dak files already in existance.
Note that import_archive hasn't been hacked to deal with this yet.
2008-08-14 Joerg Jaspert <joerg@debian.org>
* config/debian/cron.dinstall: Added the i18n retrieval of package
......@@ -113,7 +164,6 @@
2008-08-07 Stephen Gran <sgran@debian.org>
* Drop use of exec to eval variable interpolation
2008-08-07 Joerg Jaspert <joerg@debian.org>
* dak/process_accepted.py (install): Error out with the new
......
......@@ -52,7 +52,7 @@ Run various sanity checks of the archive and/or database.
The following MODEs are available:
md5sums - validate the md5sums stored in the database
checksums - validate the checksums stored in the database
files - check files in the database against what's in the archive
dsc-syntax - validate the syntax of .dsc files in the archive
missing-overrides - check for missing overrides
......@@ -194,16 +194,18 @@ SELECT l.path, f.filename FROM files f, dsc_files df, location l WHERE df.source
################################################################################
def check_md5sums():
def check_checksums():
print "Getting file information from database..."
q = projectB.query("SELECT l.path, f.filename, f.md5sum, f.size FROM files f, location l WHERE f.location = l.id")
q = projectB.query("SELECT l.path, f.filename, f.md5sum, f.sha1sum, f.sha256sum, f.size FROM files f, location l WHERE f.location = l.id")
ql = q.getresult()
print "Checking file md5sums & sizes..."
print "Checking file checksums & sizes..."
for i in ql:
filename = os.path.abspath(i[0] + i[1])
db_md5sum = i[2]
db_size = int(i[3])
db_sha1sum = i[3]
db_sha256sum = i[4]
db_size = int(i[5])
try:
f = utils.open_file(filename)
except:
......@@ -215,6 +217,18 @@ def check_md5sums():
utils.warn("**WARNING** md5sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, md5sum, db_md5sum))
if size != db_size:
utils.warn("**WARNING** size mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, size, db_size))
# Until the main database is filled, we need to not spit 500,000 warnings
# every time we scan the archive. Yet another hack (TM) which can go away
# once this is all working
if db_sha1sum is not None and db_sha1sum != '':
sha1sum = apt_pkg.sha1sum(f)
if sha1sum != db_sha1sum:
utils.warn("**WARNING** sha1sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha1sum, db_sha1sum))
if db_sha256sum is not None and db_sha256sum != '':
sha256sum = apt_pkg.sha256sum(f)
if sha256sum != db_sha256sum:
utils.warn("**WARNING** sha256sum mismatch for '%s' ('%s' [current] vs. '%s' [db])." % (filename, sha256sum, db_sha256sum))
print "Done."
......@@ -425,8 +439,8 @@ def main ():
projectB = pg.connect(Cnf["DB::Name"], Cnf["DB::Host"], int(Cnf["DB::Port"]))
database.init(Cnf, projectB)
if mode == "md5sums":
check_md5sums()
if mode == "checksums":
check_checksums()
elif mode == "files":
check_files()
elif mode == "dsc-syntax":
......
......@@ -101,9 +101,9 @@ def main():
for f in files.keys():
print " %s:" % (f)
for i in [ "package", "version", "architecture", "type", "size",
"md5sum", "component", "location id", "source package",
"source version", "maintainer", "dbtype", "files id",
"new", "section", "priority", "pool name" ]:
"md5sum", "sha1sum", "sha256sum", "component", "location id",
"source package", "source version", "maintainer", "dbtype",
"files id", "new", "section", "priority", "pool name" ]:
if files[f].has_key(i):
print " %s: %s" % (i.capitalize(), files[f][i])
del files[f][i]
......
......@@ -274,6 +274,10 @@ def install ():
# Begin a transaction; if we bomb out anywhere between here and the COMMIT WORK below, the DB will not be changed.
projectB.query("BEGIN WORK")
# Check the hashes are all present: HACK: Can go away once all dak files
# are known to be newer than the shasum changes
utils.ensure_hashes(changes, dsc, files, dsc_files)
# Add the .dsc file to the DB
for file in files.keys():
if files[file]["type"] == "dsc":
......@@ -291,7 +295,7 @@ def install ():
dsc_component = files[file]["component"]
dsc_location_id = files[file]["location id"]
if not files[file].has_key("files id") or not files[file]["files id"]:
files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], dsc_location_id)
files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id)
projectB.query("INSERT INTO source (source, version, maintainer, changedby, file, install_date, sig_fpr) VALUES ('%s', '%s', %d, %d, %d, '%s', %s)"
% (package, version, maintainer_id, changedby_id, files[file]["files id"], install_date, fingerprint_id))
......@@ -310,7 +314,7 @@ def install ():
files_id = database.get_files_id(filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], dsc_location_id)
# FIXME: needs to check for -1/-2 and or handle exception
if files_id == None:
files_id = database.set_files_id (filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], dsc_location_id)
files_id = database.set_files_id (filename, dsc_files[dsc_file]["size"], dsc_files[dsc_file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], dsc_location_id)
projectB.query("INSERT INTO dsc_files (source, file) VALUES (currval('source_id_seq'), %d)" % (files_id))
# Add the src_uploaders to the DB
......@@ -349,7 +353,7 @@ def install ():
if not files[file].has_key("location id") or not files[file]["location id"]:
files[file]["location id"] = database.get_location_id(Cnf["Dir::Pool"],files[file]["component"],utils.where_am_i())
if not files[file].has_key("files id") or not files[file]["files id"]:
files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], files[file]["location id"])
files[file]["files id"] = database.set_files_id (filename, files[file]["size"], files[file]["md5sum"], files[file]["sha1sum"], files[file]["sha256sum"], files[file]["location id"])
source_id = database.get_source_id (source, source_version)
if source_id:
projectB.query("INSERT INTO binaries (package, version, maintainer, source, architecture, file, type, sig_fpr) VALUES ('%s', '%s', %d, %d, %d, %d, '%s', %d)"
......@@ -388,16 +392,18 @@ def install ():
#
if changes["architecture"].has_key("source") and orig_tar_id and \
orig_tar_location != "legacy" and orig_tar_location != dsc_location_id:
q = projectB.query("SELECT l.path, f.filename, f.size, f.md5sum FROM files f, location l WHERE f.id = %s AND f.location = l.id" % (orig_tar_id))
q = projectB.query("SELECT l.path, f.filename, f.size, f.md5sum, f.sha1sum, f.sha256sum FROM files f, location l WHERE f.id = %s AND f.location = l.id" % (orig_tar_id))
ql = q.getresult()[0]
old_filename = ql[0] + ql[1]
file_size = ql[2]
file_md5sum = ql[3]
file_sha1sum = ql[4]
file_sha256sum = ql[5]
new_filename = utils.poolify(changes["source"], dsc_component) + os.path.basename(old_filename)
new_files_id = database.get_files_id(new_filename, file_size, file_md5sum, dsc_location_id)
if new_files_id == None:
utils.copy(old_filename, Cnf["Dir::Pool"] + new_filename)
new_files_id = database.set_files_id(new_filename, file_size, file_md5sum, dsc_location_id)
new_files_id = database.set_files_id(new_filename, file_size, file_md5sum, file_sha1sum, file_sha256sum, dsc_location_id)
projectB.query("UPDATE dsc_files SET file = %s WHERE source = %s AND file = %s" % (new_files_id, source_id, orig_tar_id))
# Install the files into the pool
......
......@@ -777,6 +777,8 @@ def check_dsc():
files[orig_tar_gz] = {}
files[orig_tar_gz]["size"] = os.stat(orig_tar_gz)[stat.ST_SIZE]
files[orig_tar_gz]["md5sum"] = dsc_files[orig_tar_gz]["md5sum"]
files[orig_tar_gz]["sha1sum"] = dsc_files[orig_tar_gz]["sha1sum"]
files[orig_tar_gz]["sha256sum"] = dsc_files[orig_tar_gz]["sha256sum"]
files[orig_tar_gz]["section"] = files[dsc_filename]["section"]
files[orig_tar_gz]["priority"] = files[dsc_filename]["priority"]
files[orig_tar_gz]["component"] = files[dsc_filename]["component"]
......@@ -917,84 +919,15 @@ def check_urgency ():
################################################################################
def check_hashes ():
# Make sure we recognise the format of the Files: field
format = changes.get("format", "0.0").split(".",1)
if len(format) == 2:
format = int(format[0]), int(format[1])
else:
format = int(float(format[0])), 0
check_hash(".changes", files, "md5sum", apt_pkg.md5sum)
check_hash(".dsc", dsc_files, "md5sum", apt_pkg.md5sum)
if format >= (1,8):
hashes = [("sha1", apt_pkg.sha1sum),
("sha256", apt_pkg.sha256sum)]
else:
hashes = []
for x in changes:
if x.startswith("checksum-"):
h = x.split("-",1)[1]
if h not in dict(hashes):
reject("Unsupported checksum field in .changes" % (h))
for x in dsc:
if x.startswith("checksum-"):
h = x.split("-",1)[1]
if h not in dict(hashes):
reject("Unsupported checksum field in .dsc" % (h))
for h,f in hashes:
try:
fs = utils.build_file_list(changes, 0, "checksums-%s" % h, h)
check_hash(".changes %s" % (h), fs, h, f, files)
except NoFilesFieldError:
reject("No Checksums-%s: field in .changes" % (h))
except UnknownFormatError, format:
reject("%s: unknown format of .changes" % (format))
except ParseChangesError, line:
reject("parse error for Checksums-%s in .changes, can't grok: %s." % (h, line))
if "source" not in changes["architecture"]: continue
try:
fs = utils.build_file_list(dsc, 1, "checksums-%s" % h, h)
check_hash(".dsc %s" % (h), fs, h, f, dsc_files)
except UnknownFormatError, format:
reject("%s: unknown format of .dsc" % (format))
except NoFilesFieldError:
reject("No Checksums-%s: field in .dsc" % (h))
except ParseChangesError, line:
reject("parse error for Checksums-%s in .dsc, can't grok: %s." % (h, line))
################################################################################
def check_hash (where, lfiles, key, testfn, basedict = None):
if basedict:
for f in basedict.keys():
if f not in lfiles:
reject("%s: no %s checksum" % (f, key))
for f in lfiles.keys():
if basedict and f not in basedict:
reject("%s: extraneous entry in %s checksums" % (f, key))
try:
file_handle = utils.open_file(f)
except CantOpenError:
continue
# Check hash
if testfn(file_handle) != lfiles[f][key]:
reject("%s: %s check failed." % (f, key))
file_handle.close()
# Check size
actual_size = os.stat(f)[stat.ST_SIZE]
size = int(lfiles[f]["size"])
if size != actual_size:
reject("%s: actual file size (%s) does not match size (%s) in %s"
% (f, actual_size, size, where))
utils.check_hash(".changes", files, "md5", apt_pkg.md5sum)
utils.check_size(".changes", files)
utils.check_hash(".dsc", dsc_files, "md5", apt_pkg.md5sum)
utils.check_size(".dsc", dsc_files)
# This is stupid API, but it'll have to do for now until
# we actually have proper abstraction
for m in utils.ensure_hashes(changes, dsc, files, dsc_files):
reject(m)
################################################################################
......
......@@ -360,10 +360,10 @@ def get_or_set_queue_id (queue):
################################################################################
def set_files_id (filename, size, md5sum, location_id):
def set_files_id (filename, size, md5sum, sha1sum, sha256sum, location_id):
global files_id_cache
projectB.query("INSERT INTO files (filename, size, md5sum, location) VALUES ('%s', %d, '%s', %d)" % (filename, long(size), md5sum, location_id))
projectB.query("INSERT INTO files (filename, size, md5sum, sha1sum, sha256sum, location) VALUES ('%s', %d, '%s', '%s', '%s', %d)" % (filename, long(size), md5sum, sha1sum, sha256sum, location_id))
return get_files_id (filename, size, md5sum, location_id)
......
......@@ -236,9 +236,10 @@ class Upload:
for file_entry in files.keys():
d_files[file_entry] = {}
for i in [ "package", "version", "architecture", "type", "size",
"md5sum", "component", "location id", "source package",
"source version", "maintainer", "dbtype", "files id",
"new", "section", "priority", "othercomponents",
"md5sum", "sha1sum", "sha256sum", "component",
"location id", "source package", "source version",
"maintainer", "dbtype", "files id", "new",
"section", "priority", "othercomponents",
"pool name", "original component" ]:
if files[file_entry].has_key(i):
d_files[file_entry][i] = files[file_entry][i]
......
#!/usr/bin/env python
# vim:set et ts=4 sw=4:
# Utility functions
# Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006 James Troup <james@nocrew.org>
......@@ -22,7 +23,7 @@
################################################################################
import codecs, commands, email.Header, os, pwd, re, select, socket, shutil, \
sys, tempfile, traceback
sys, tempfile, traceback, stat
import apt_pkg
import database
from dak_exceptions import *
......@@ -55,6 +56,10 @@ default_apt_config = "/etc/dak/apt.conf"
alias_cache = None
key_uid_email_cache = {}
# (hashname, function, earliest_changes_version)
known_hashes = [("sha1", apt_pkg.sha1sum, (1, 8)),
("sha256", apt_pkg.sha256sum, (1, 8))]
################################################################################
def open_file(filename, mode='r'):
......@@ -96,34 +101,14 @@ def extract_component_from_section(section):
################################################################################
def parse_changes(filename, signing_rules=0):
"""Parses a changes file and returns a dictionary where each field is a
key. The mandatory first argument is the filename of the .changes
file.
signing_rules is an optional argument:
o If signing_rules == -1, no signature is required.
o If signing_rules == 0 (the default), a signature is required.
o If signing_rules == 1, it turns on the same strict format checking
as dpkg-source.
The rules for (signing_rules == 1)-mode are:
o The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
followed by any PGP header data and must end with a blank line.
o The data section must end with a blank line and must be followed by
"-----BEGIN PGP SIGNATURE-----".
"""
def parse_deb822(contents, signing_rules=0):
error = ""
changes = {}
changes_in = open_file(filename)
lines = changes_in.readlines()
# Split the lines in the input, keeping the linebreaks.
lines = contents.splitlines(True)
if not lines:
if len(lines) == 0:
raise ParseChangesError, "[Empty changes file]"
# Reindex by line number so we can easily verify the format of
......@@ -189,7 +174,6 @@ The rules for (signing_rules == 1)-mode are:
if signing_rules == 1 and inside_signature:
raise InvalidDscError, index
changes_in.close()
changes["filecontents"] = "".join(lines)
if changes.has_key("source"):
......@@ -207,6 +191,212 @@ The rules for (signing_rules == 1)-mode are:
################################################################################
def parse_changes(filename, signing_rules=0):
"""Parses a changes file and returns a dictionary where each field is a
key. The mandatory first argument is the filename of the .changes
file.
signing_rules is an optional argument:
o If signing_rules == -1, no signature is required.
o If signing_rules == 0 (the default), a signature is required.
o If signing_rules == 1, it turns on the same strict format checking
as dpkg-source.
The rules for (signing_rules == 1)-mode are:
o The PGP header consists of "-----BEGIN PGP SIGNED MESSAGE-----"
followed by any PGP header data and must end with a blank line.
o The data section must end with a blank line and must be followed by
"-----BEGIN PGP SIGNATURE-----".
"""
changes_in = open_file(filename)
content = changes_in.read()
changes_in.close()
return parse_deb822(content, signing_rules)
################################################################################
def hash_key(hashname):
return '%ssum' % hashname
################################################################################
def create_hash(where, files, hashname, hashfunc):
"""create_hash extends the passed files dict with the given hash by
iterating over all files on disk and passing them to the hashing
function given."""
rejmsg = []
for f in files.keys():
try:
file_handle = open_file(f)
except CantOpenError:
rejmsg.append("Could not open file %s for checksumming" % (f))
files[f][hash_key(hashname)] = hashfunc(file_handle)
file_handle.close()
return rejmsg
################################################################################
def check_hash(where, files, hashname, hashfunc):
"""check_hash checks the given hash in the files dict against the actual
files on disk. The hash values need to be present consistently in
all file entries. It does not modify its input in any way."""
rejmsg = []
for f in files.keys():
try:
file_handle = open_file(f)
# Check for the hash entry, to not trigger a KeyError.
if not files[f].has_key(hash_key(hashname)):
rejmsg.append("%s: misses %s checksum in %s" % (f, hashname,
where))
continue
# Actually check the hash for correctness.
if hashfunc(file_handle) != files[f][hash_key(hashname)]:
rejmsg.append("%s: %s check failed in %s" % (f, hashname,
where))
except CantOpenError:
# XXX: IS THIS THE BLOODY CASE WHEN THE FILE'S IN THE POOL!?
continue
finally:
file_handle.close()
return rejmsg
################################################################################
def check_size(where, files):
"""check_size checks the file sizes in the passed files dict against the
files on disk."""
rejmsg = []
for f in files.keys():
actual_size = os.stat(f)[stat.ST_SIZE]
size = int(files[f]["size"])
if size != actual_size:
rejmsg.append("%s: actual file size (%s) does not match size (%s) in %s"
% (f, actual_size, size, where))
return rejmsg
################################################################################
def check_hash_fields(what, manifest):
"""check_hash_fields ensures that there are no checksum fields in the
given dict that we do not know about."""
rejmsg = []
hashes = map(lambda x: x[0], known_hashes)
for field in manifest:
if field.startswith("checksums-"):
hashname = field.split("-",1)[1]
if hashname not in hashes:
rejmsg.append("Unsupported checksum field for %s "\
"in %s" % (hashname, what))
return rejmsg
################################################################################
def _ensure_changes_hash(changes, format, version, files, hashname, hashfunc):
if format >= version:
# The version should contain the specified hash.
func = check_hash
# Import hashes from the changes
rejmsg = parse_checksums(".changes", files, changes, hashname)
if len(rejmsg) > 0:
return rejmsg
else:
# We need to calculate the hash because it can't possibly
# be in the file.
func = create_hash
return func(".changes", files, hashname, hashfunc)
# We could add the orig which might be in the pool to the files dict to
# access the checksums easily.
def _ensure_dsc_hash(dsc, dsc_files, hashname, hashfunc):
"""ensure_dsc_hashes' task is to ensure that each and every *present* hash
in the dsc is correct, i.e. identical to the changes file and if necessary
the pool. The latter task is delegated to check_hash."""
rejmsg = []
if not dsc.has_key('Checksums-%s' % (hashname,)):
return rejmsg
# Import hashes from the dsc
parse_checksums(".dsc", dsc_files, dsc, hashname)
# And check it...
rejmsg.extend(check_hash(".dsc", dsc_files, hashname, hashfunc))
return rejmsg
################################################################################
def ensure_hashes(changes, dsc, files, dsc_files):
rejmsg = []
# Make sure we recognise the format of the Files: field in the .changes
format = changes.get("format", "0.0").split(".", 1)
if len(format) == 2:
format = int(format[0]), int(format[1])
else:
format = int(float(format[0])), 0
# We need to deal with the original changes blob, as the fields we need
# might not be in the changes dict serialised into the .dak anymore.
orig_changes = parse_deb822(changes['filecontents'])
# Copy the checksums over to the current changes dict. This will keep
# the existing modifications to it intact.
for field in orig_changes:
if field.startswith('checksums-'):
changes[field] = orig_changes[field]
# Check for unsupported hashes
rejmsg.extend(check_hash_fields(".changes", changes))
rejmsg.extend(check_hash_fields(".dsc", dsc))
# We have to calculate the hash if we have an earlier changes version than
# the hash appears in rather than require it exist in the changes file
for hashname, hashfunc, version in known_hashes:
rejmsg.extend(_ensure_changes_hash(changes, format, version, files,
hashname, hashfunc))
if "source" in changes["architecture"]:
rejmsg.extend(_ensure_dsc_hash(dsc, dsc_files, hashname,
hashfunc))
return rejmsg
def parse_checksums(where, files, manifest, hashname):
rejmsg = []
field = 'checksums-%s' % hashname
if not field in manifest:
return rejmsg
input = manifest[field]
for line in input.split('\n'):
if not line:
break
hash, size, file = line.strip().split(' ')
if not files.has_key(file):
rejmsg.append("%s: not present in files but in checksums-%s in %s" %
(file, hashname, where))
if not files[file]["size"] == size:
rejmsg.append("%s: size differs for files and checksums-%s entry "\
"in %s" % (file, hashname, where))
files[file][hash_key(hashname)] = hash
for f in files.keys():
if not files[f].has_key(hash_key(hashname)):
rejmsg.append("%s: no entry in checksums-%s in %s" % (file,
hashname, where))
return rejmsg
################################################################################
# Dropped support for 1.4 and ``buggy dchanges 3.4'' (?!) compared to di.pl
def build_file_list(changes, is_a_dsc=0, field="files", hashname="md5sum"):
......
......@@ -70,6 +70,8 @@ CREATE TABLE files (
md5sum TEXT NOT NULL,
location INT4 NOT NULL, -- REFERENCES location
last_used TIMESTAMP,
sha1sum TEXT NOT NULL,
sha256sum TEXT NOT NULL,
unique (filename, location)
);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册