From f2f237c936b5d5727c2b70be85cdacd2903c05b2 Mon Sep 17 00:00:00 2001 From: James Troup Date: Sat, 3 Apr 2004 02:49:46 +0000 Subject: [PATCH] 2004-04-03 James Troup * debian/control (Depends): add python2.1-email | python (>= 2.2) needed for new utils.rfc2047_encode() function. * utils.py (re_parse_maintainer): allow whitespace inside the email address. (Error): new exception base class. (ParseMaintError): new exception class. (force_to_utf8): new function. (rfc2047_encode): likewise. (fix_maintainer): rework. use force_to_utf8() to force name and rfc822 return values to always use UTF-8. use rfc2047_encode() to return an rfc2047 value. Validate the address to catch missing email addresses and (some) broken ones. * katie.py (nmu_p.is_an_nmu): adapt for new utils.fix_maintainer() by adopting foo2047 return value. (Katie.dump_vars): add changedby2047 and maintainer2047 as mandatory changes fields. Promote changes and maintainer822 to mandatory fields. (Katie.update_subst): default maintainer2047 rather than maintainer822. User foo2047 rather than foo822 when setting __MAINTAINER_TO__ or __MAINTAINER_FROM__. * jennifer (check_changes): set default changes["maintainer2047"] and changes["changedby2047"] values rather than their 822 equivalents. Makes changes["changes"] a mandatory field. Adapt to new utils.fix_maintainer() - reject on exception and adopt foo2047 return value. (check_dsc): if a mandatory field is missing don't do any further checks and as a result reduce paranoia about dsc[var] existence. Validate the maintainer field by calling new utils.fix_maintainer(). * ashley (main): add changedby2047 and maintainer2047 to mandatory changes fields. Promote maintainer822 to a mandatory changes field. add "pool name" to files fields. * test/006/test.py: new file - tests for new utils.fix_maintainer(). --- TODO | 12 +++++ ashley | 11 +++-- debian/control | 2 +- jennifer | 52 ++++++++++++++------ katie.py | 39 ++++++++------- test/006/test.py | 123 +++++++++++++++++++++++++++++++++++++++++++++++ utils.py | 109 ++++++++++++++++++++++++++++++++++------- 7 files changed, 292 insertions(+), 56 deletions(-) create mode 100755 test/006/test.py diff --git a/TODO b/TODO index 6d94b1d4..936baa08 100644 --- a/TODO +++ b/TODO @@ -27,6 +27,18 @@ queue/approved Others ------ + o need to decide on whether we're tying for most errors at once.. if + so (probably) then make sure code doesn't assume variables exist and + either way do something about checking error code of check_dsc and + later functions so we skip later checks if they're bailing. + + o the .katie stuff is fundamentally braindamaged, it's not versioned + so there's no way to change the format, yay me. need to fix. + probably by putting a version var as the first thing and checking + that.. auto-upgrade at least from original format would be good. + might also be a good idea to put everything in one big dict after + that? + o reject sparc64 binaries in a non '*64*' package. o katie.py(source_exists): a) we take arguments as parameters that diff --git a/ashley b/ashley index 194b715d..4bf9a3cd 100755 --- a/ashley +++ b/ashley @@ -2,7 +2,7 @@ # Dump variables from a .katie file to stdout # Copyright (C) 2001, 2002, 2004 James Troup -# $Id: ashley,v 1.9 2004-04-01 17:14:25 troup Exp $ +# $Id: ashley,v 1.10 2004-04-03 02:49:46 troup Exp $ # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -67,7 +67,10 @@ def main(): changes = k.pkg.changes; print " Changes:"; # Mandatory changes fields - for i in [ "source", "version", "maintainer", "urgency", "changedby822", "changedbyname", "maintainername", "maintaineremail", "fingerprint" ]: + for i in [ "source", "version", "maintainer", "urgency", "changedby822", + "changedby2047", "changedbyname", "maintainer822", + "maintainer2047", "maintainername", "maintaineremail", + "fingerprint", "changes" ]: print " %s: %s" % (i.capitalize(), changes[i]); del changes[i]; # Mandatory changes lists @@ -75,7 +78,7 @@ def main(): print " %s: %s" % (i.capitalize(), " ".join(changes[i].keys())); del changes[i]; # Optional changes fields - for i in [ "changed-by", "maintainer822", "filecontents", "format" ]: + for i in [ "changed-by", "filecontents", "format" ]: if changes.has_key(i): print " %s: %s" % (i.capitalize(), changes[i]); del changes[i]; @@ -101,7 +104,7 @@ def main(): for i in [ "package", "version", "architecture", "type", "size", "md5sum", "component", "location id", "source package", "source version", "maintainer", "dbtype", "files id", - "new", "section", "priority" ]: + "new", "section", "priority", "pool name" ]: if files[file].has_key(i): print " %s: %s" % (i.capitalize(), files[file][i]); del files[file][i]; diff --git a/debian/control b/debian/control index a12898a2..05707adc 100644 --- a/debian/control +++ b/debian/control @@ -7,7 +7,7 @@ Standards-Version: 3.5.6.0 Package: katie Architecture: any -Depends: ${python:Depends}, python-pygresql, python-apt, apt-utils, gnupg (>= 1.0.6-1), ${shlibs:Depends} +Depends: ${python:Depends}, python-pygresql, python2.1-email | python (>= 2.2), python-apt, apt-utils, gnupg (>= 1.0.6-1), ${shlibs:Depends} Suggests: lintian, linda, less, binutils-multiarch, symlinks, postgresql (>= 7.1.0), dsync Description: Debian's archive maintenance scripts This is a collection of archive maintenance scripts used by the diff --git a/jennifer b/jennifer index c451a053..14b5b680 100755 --- a/jennifer +++ b/jennifer @@ -2,7 +2,7 @@ # Checks Debian packages from Incoming # Copyright (C) 2000, 2001, 2002, 2003, 2004 James Troup -# $Id: jennifer,v 1.46 2004-04-01 17:14:25 troup Exp $ +# $Id: jennifer,v 1.47 2004-04-03 02:49:46 troup Exp $ # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -47,7 +47,7 @@ re_strip_revision = re.compile(r"-([^-]+)$"); ################################################################################ # Globals -jennifer_version = "$Revision: 1.46 $"; +jennifer_version = "$Revision: 1.47 $"; Cnf = None; Options = None; @@ -182,9 +182,9 @@ def clean_holding(): def check_changes(): filename = pkg.changes_file; - # Default in case we bail out - changes["maintainer822"] = Cnf["Dinstall::MyEmailAddress"]; - changes["changedby822"] = Cnf["Dinstall::MyEmailAddress"]; + # Defaults in case we bail out + changes["maintainer2047"] = Cnf["Dinstall::MyEmailAddress"]; + changes["changedby2047"] = Cnf["Dinstall::MyEmailAddress"]; changes["architecture"] = {}; # Parse the .changes field into a dictionary @@ -207,7 +207,8 @@ def check_changes(): return 0; # Check for mandatory fields - for i in ("source", "binary", "architecture", "version", "distribution", "maintainer", "files"): + for i in ("source", "binary", "architecture", "version", "distribution", + "maintainer", "files", "changes"): if not changes.has_key(i): reject("%s: Missing mandatory field `%s'." % (filename, i)); return 0 # Avoid errors during later tests @@ -221,11 +222,23 @@ def check_changes(): for j in o.split(): changes[i][j] = 1 - # Fix the Maintainer: field to be RFC822 compatible - (changes["maintainer822"], changes["maintainername"], changes["maintaineremail"]) = utils.fix_maintainer (changes["maintainer"]) - - # Fix the Changed-By: field to be RFC822 compatible; if it exists. - (changes["changedby822"], changes["changedbyname"], changes["changedbyemail"]) = utils.fix_maintainer(changes.get("changed-by","")); + # Fix the Maintainer: field to be RFC822/2047 compatible + try: + (changes["maintainer822"], changes["maintainer2047"], + changes["maintainername"], changes["maintaineremail"]) = \ + utils.fix_maintainer (changes["maintainer"]); + except utils.ParseMaintError, msg: + reject("%s: Maintainer field ('%s') failed to parse: %s" \ + % (filename, changes["maintainer"], msg)); + + # ...likewise for the Changed-By: field if it exists. + try: + (changes["changedby822"], changes["changedby2047"], + changes["changedbyname"], changes["changedbyemail"]) = \ + utils.fix_maintainer (changes.get("changed-by", "")); + except utils.ParseMaintError, msg: + reject("%s: Changed-By field ('%s') failed to parse: %s" \ + % (filename, changes["changed-by"], msg)); # Ensure all the values in Closes: are numbers if changes.has_key("closes"): @@ -611,11 +624,12 @@ def check_dsc(): for i in ("format", "source", "version", "binary", "maintainer", "architecture", "files"): if not dsc.has_key(i): reject("%s: missing mandatory field `%s'." % (dsc_filename, i)); + return; # Validate the source and version fields - if dsc.has_key("source") and not re_valid_pkg_name.match(dsc["source"]): + if not re_valid_pkg_name.match(dsc["source"]): reject("%s: invalid source name '%s'." % (dsc_filename, dsc["source"])); - if dsc.has_key("version") and not re_valid_version.match(dsc["version"]): + if not re_valid_version.match(dsc["version"]): reject("%s: invalid version number '%s'." % (dsc_filename, dsc["version"])); # Bumping the version number of the .dsc breaks extraction by stable's @@ -623,6 +637,13 @@ def check_dsc(): if dsc["format"] != "1.0": reject("%s: incompatible 'Format' version produced by a broken version of dpkg-dev 1.9.1{3,4}." % (dsc_filename)); + # Validate the Maintainer field + try: + utils.fix_maintainer (dsc["maintainer"]); + except utils.ParseMaintError, msg: + reject("%s: Maintainer field ('%s') failed to parse: %s" \ + % (dsc_filename, changes["changed-by"], msg)); + # Validate the build-depends field(s) for field_name in [ "build-depends", "build-depends-indep" ]: field = dsc.get(field_name); @@ -639,7 +660,7 @@ def check_dsc(): pass; # Ensure the version number in the .dsc matches the version number in the .changes - epochless_dsc_version = utils.re_no_epoch.sub('', dsc.get("version")); + epochless_dsc_version = utils.re_no_epoch.sub('', dsc["version"]); changes_version = files[dsc_filename]["version"]; if epochless_dsc_version != files[dsc_filename]["version"]: reject("version ('%s') in .dsc does not match version ('%s') in .changes." % (epochless_dsc_version, changes_version)); @@ -701,8 +722,7 @@ def get_changelog_versions(source_dir): type = m.group(3); if type == "orig.tar.gz" and pkg.orig_tar_gz: continue; - else: - dest = os.path.join(os.getcwd(), f); + dest = os.path.join(os.getcwd(), f); os.symlink(src, dest); # If the orig.tar.gz is not a part of the upload, create a symlink to the diff --git a/katie.py b/katie.py index 308806cc..d1919044 100644 --- a/katie.py +++ b/katie.py @@ -2,7 +2,7 @@ # Utility functions for katie # Copyright (C) 2001, 2002, 2003, 2004 James Troup -# $Id: katie.py,v 1.45 2004-04-01 17:14:25 troup Exp $ +# $Id: katie.py,v 1.46 2004-04-03 02:49:46 troup Exp $ # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -66,7 +66,9 @@ class nmu_p: changes = pkg.changes; dsc = pkg.dsc; - (dsc_rfc822, dsc_name, dsc_email) = utils.fix_maintainer (dsc.get("maintainer",Cnf["Dinstall::MyEmailAddress"]).lower()); + i = utils.fix_maintainer (dsc.get("maintainer", + Cnf["Dinstall::MyEmailAddress"]).lower()); + (dsc_rfc822, dsc_rfc2047, dsc_name, dsc_email) = i; # changes["changedbyname"] == dsc_name is probably never true, but better safe than sorry if dsc_name == changes["maintainername"].lower() and \ (changes["changedby822"] == "" or changes["changedbyname"].lower() == dsc_name): @@ -76,7 +78,7 @@ class nmu_p: uploaders = dsc["uploaders"].lower().split(","); uploadernames = {}; for i in uploaders: - (rfc822, name, email) = utils.fix_maintainer (i.strip()); + (rfc822, rfc2047, name, email) = utils.fix_maintainer (i.strip()); uploadernames[name] = ""; if uploadernames.has_key(changes["changedbyname"].lower()): return 0; @@ -167,19 +169,19 @@ class Katie: d_files[file][i] = files[file][i]; ## changes # Mandatory changes fields - for i in [ "distribution", "source", "architecture", "version", "maintainer", - "urgency", "fingerprint", "changedby822", "changedbyname", - "maintainername", "maintaineremail", "closes" ]: + for i in [ "distribution", "source", "architecture", "version", + "maintainer", "urgency", "fingerprint", "changedby822", + "changedby2047", "changedbyname", "maintainer822", + "maintainer2047", "maintainername", "maintaineremail", + "closes", "changes" ]: d_changes[i] = changes[i]; # Optional changes fields - # FIXME: changes should be mandatory - for i in [ "changed-by", "maintainer822", "filecontents", "format", - "changes", "lisa note" ]: + for i in [ "changed-by", "filecontents", "format", "lisa note" ]: if changes.has_key(i): d_changes[i] = changes[i]; ## dsc - for i in [ "source", "version", "maintainer", "fingerprint", "uploaders", - "bts changelog" ]: + for i in [ "source", "version", "maintainer", "fingerprint", + "uploaders", "bts changelog" ]: if dsc.has_key(i): d_dsc[i] = dsc[i]; ## dsc_files @@ -208,9 +210,9 @@ class Katie: # If jennifer crashed out in the right place, architecture may still be a string. if not changes.has_key("architecture") or not isinstance(changes["architecture"], DictType): changes["architecture"] = { "Unknown" : "" }; - # and maintainer822 may not exist. - if not changes.has_key("maintainer822"): - changes["maintainer822"] = self.Cnf["Dinstall::MyEmailAddress"]; + # and maintainer2047 may not exist. + if not changes.has_key("maintainer2047"): + changes["maintainer2047"] = self.Cnf["Dinstall::MyEmailAddress"]; Subst["__ARCHITECTURE__"] = " ".join(changes["architecture"].keys()); Subst["__CHANGES_FILENAME__"] = os.path.basename(self.pkg.changes_file); @@ -218,12 +220,13 @@ class Katie: # For source uploads the Changed-By field wins; otherwise Maintainer wins. if changes["architecture"].has_key("source") and changes["changedby822"] != "" and (changes["changedby822"] != changes["maintainer822"]): - Subst["__MAINTAINER_FROM__"] = changes["changedby822"]; - Subst["__MAINTAINER_TO__"] = changes["changedby822"] + ", " + changes["maintainer822"]; + Subst["__MAINTAINER_FROM__"] = changes["changedby2047"]; + Subst["__MAINTAINER_TO__"] = "%s, %s" % (changes["changedby2047"], + changes["maintainer2047"]); Subst["__MAINTAINER__"] = changes.get("changed-by", "Unknown"); else: - Subst["__MAINTAINER_FROM__"] = changes["maintainer822"]; - Subst["__MAINTAINER_TO__"] = changes["maintainer822"]; + Subst["__MAINTAINER_FROM__"] = changes["maintainer2047"]; + Subst["__MAINTAINER_TO__"] = changes["maintainer2047"]; Subst["__MAINTAINER__"] = changes.get("maintainer", "Unknown"); if self.Cnf.has_key("Dinstall::TrackingServer") and changes.has_key("source"): Subst["__MAINTAINER_TO__"] += "\nBcc: %s@%s" % (changes["source"], self.Cnf["Dinstall::TrackingServer"]) diff --git a/test/006/test.py b/test/006/test.py new file mode 100755 index 00000000..99e1e1bf --- /dev/null +++ b/test/006/test.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Test utils.fix_maintainer() +# Copyright (C) 2004 James Troup +# $Id: test.py,v 1.1 2004-04-03 02:49:54 troup Exp $ + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +################################################################################ + +import os, sys + +sys.path.append(os.path.abspath('../../')); + +import utils + +################################################################################ + +def fail(message): + sys.stderr.write("%s\n" % (message)); + sys.exit(1); + +################################################################################ + +def check_valid(s, xa, xb, xc, xd): + (a, b, c, d) = utils.fix_maintainer(s) + if a != xa: + fail("rfc822_maint: %s (returned) != %s (expected [From: '%s']" % (a, xa, s)); + if b != xb: + fail("rfc2047_maint: %s (returned) != %s (expected [From: '%s']" % (b, xb, s)); + if c != xc: + fail("name: %s (returned) != %s (expected [From: '%s']" % (c, xc, s)); + if d != xd: + fail("email: %s (returned) != %s (expected [From: '%s']" % (d, xd, s)); + +def check_invalid(s): + try: + utils.fix_maintainer(s); + fail("%s was parsed successfully but is expected to be invalid." % (s)); + except utils.ParseMaintError, unused: + pass; + +def main (): + # Check Valid UTF-8 maintainer field + s = "Noèl Köthe " + xa = "Noèl Köthe " + xb = "=?utf-8?b?Tm/DqGwgS8O2dGhl?= " + xc = "Noèl Köthe" + xd = "noel@debian.org" + check_valid(s, xa, xb, xc, xd); + + # Check valid ISO-8859-1 maintainer field + s = "Noèl Köthe " + xa = "Noèl Köthe " + xb = "=?iso-8859-1?q?No=E8l_K=F6the?= " + xc = "Noèl Köthe" + xd = "noel@debian.org" + check_valid(s, xa, xb, xc, xd); + + # Check valid ASCII maintainer field + s = "James Troup " + xa = "James Troup " + xb = "James Troup " + xc = "James Troup" + xd = "james@nocrew.org" + check_valid(s, xa, xb, xc, xd); + + # Check "Debian vs RFC822" fixup of names with '.' or ',' in them + s = "James J. Troup " + xa = "james@nocrew.org (James J. Troup)" + xb = "james@nocrew.org (James J. Troup)" + xc = "James J. Troup" + xd = "james@nocrew.org" + check_valid(s, xa, xb, xc, xd); + s = "James J, Troup " + xa = "james@nocrew.org (James J, Troup)" + xb = "james@nocrew.org (James J, Troup)" + xc = "James J, Troup" + xd = "james@nocrew.org" + check_valid(s, xa, xb, xc, xd); + + # Check just-email form + s = "james@nocrew.org" + xa = " " + xb = " " + xc = "" + xd = "james@nocrew.org" + check_valid(s, xa, xb, xc, xd); + + # Check Krazy quoted-string local part email address + s = "Cris van Pelt <\"Cris van Pelt\"@tribe.eu.org>" + xa = "Cris van Pelt <\"Cris van Pelt\"@tribe.eu.org>" + xb = "Cris van Pelt <\"Cris van Pelt\"@tribe.eu.org>" + xc = "Cris van Pelt" + xd = "\"Cris van Pelt\"@tribe.eu.org" + check_valid(s, xa, xb, xc, xd); + + # Check empty string + s = xa = xb = xc = xd = ""; + check_valid(s, xa, xb, xc, xd); + + # Check for missing email address + check_invalid("James Troup"); + # Check for invalid email address + check_invalid("James Troup -# $Id: utils.py,v 1.65 2004-04-01 17:13:10 troup Exp $ +# $Id: utils.py,v 1.66 2004-04-03 02:49:46 troup Exp $ ################################################################################ @@ -22,7 +22,9 @@ ################################################################################ -import commands, os, pwd, re, select, socket, shutil, string, sys, tempfile, traceback; +import commands, encodings.ascii, encodings.utf_8, encodings.latin_1, \ + email.Header, os, pwd, re, select, socket, shutil, string, sys, \ + tempfile, traceback; import apt_pkg; import db_access; @@ -40,7 +42,7 @@ re_single_line_field = re.compile(r"^(\S*)\s*:\s*(.*)"); re_multi_line_field = re.compile(r"^\s(.*)"); re_taint_free = re.compile(r"^[-+~\.\w]+$"); -re_parse_maintainer = re.compile(r"^\s*(\S.*\S)\s*\<([^\> \t]+)\>"); +re_parse_maintainer = re.compile(r"^\s*(\S.*\S)\s*\<([^\>]+)\>"); changes_parse_error_exc = "Can't parse line in .changes file"; invalid_dsc_format_exc = "Invalid .dsc file"; @@ -58,6 +60,23 @@ default_apt_config = "/etc/katie/apt.conf"; ################################################################################ +class Error(Exception): + """Base class for exceptions in this module.""" + pass; + +class ParseMaintError(Error): + """Exception raised for errors in parsing a maintainer field. + + Attributes: + message -- explanation of the error + """ + + def __init__(self, message): + self.args = message,; + self.message = message; + +################################################################################ + def open_file(filename, mode='r'): try: f = open(filename, mode); @@ -262,24 +281,80 @@ def build_file_list(changes, is_a_dsc=0): ################################################################################ -# Fix the `Maintainer:' field to be an RFC822 compatible address. -# cf. Debian Policy Manual (D.2.4) -# -# 06:28| 'The standard sucks, but my tool is supposed to -# interoperate with it. I know - I'll fix the suckage -# and make things incompatible!' +def force_to_utf8(s): + """Forces a string to UTF-8. If the string isn't already UTF-8, +it's assumed to be ISO-8859-1.""" + try: + unicode(s, 'utf-8'); + return s; + except UnicodeError: + latin1_s = unicode(s,'iso8859-1'); + return latin1_s.encode('utf-8'); + +def rfc2047_encode(s): + """Encodes a (header) string per RFC2047 if necessary. If the +string is neither ASCII nor UTF-8, it's assumed to be ISO-8859-1.""" + try: + encodings.ascii.Codec().decode(s); + return s; + except UnicodeError: + pass; + try: + encodings.utf_8.Codec().decode(s); + h = email.Header.Header(s, 'utf-8', 998); + return str(h); + except UnicodeError: + h = email.Header.Header(s, 'iso-8859-1', 998); + return str(h); + +################################################################################ + +# 'The standard sucks, but my tool is supposed to interoperate +# with it. I know - I'll fix the suckage and make things +# incompatible!' def fix_maintainer (maintainer): - m = re_parse_maintainer.match(maintainer); - rfc822 = maintainer; - name = ""; - email = ""; - if m and len(m.groups()) == 2: + """Parses a Maintainer or Changed-By field and returns: + (1) an RFC822 compatible version, + (2) an RFC2047 compatible version, + (3) the name + (4) the email + +The name is forced to UTF-8 for both (1) and (3). If the name field +contains '.' or ',' (as allowed by Debian policy), (1) and (2) are +switched to 'email (name)' format.""" + maintainer = maintainer.strip() + if not maintainer: + return ('', '', '', ''); + + if maintainer.find("<") == -1 or (maintainer[0] == "<" and \ + maintainer[-1:] == ">"): + email = maintainer; + name = ""; + else: + m = re_parse_maintainer.match(maintainer); + if not m: + raise ParseMaintError, "Doesn't parse as a valid Maintainer field." name = m.group(1); email = m.group(2); - if name.find(',') != -1 or name.find('.') != -1: - rfc822 = "%s (%s)" % (email, name); - return (rfc822, name, email) + + # Get an RFC2047 compliant version of the name + rfc2047_name = rfc2047_encode(name); + + # Force the name to be UTF-8 + name = force_to_utf8(name); + + if name.find(',') != -1 or name.find('.') != -1: + rfc822_maint = "%s (%s)" % (email, name); + rfc2047_maint = "%s (%s)" % (email, rfc2047_name); + else: + rfc822_maint = "%s <%s>" % (name, email); + rfc2047_maint = "%s <%s>" % (rfc2047_name, email); + + if email.find("@") == -1 and email.find("buildd_") != 0: + raise ParseMaintError, "No @ found in email address part." + + return (rfc822_maint, rfc2047_maint, name, email); ################################################################################ -- GitLab