提交 b18683b5 编写于 作者: N Niels Thykier

auto-decruft: Batch check source-less cruft

Add a ReverseDependencyChecker class for bulk testing breakage in
reverse dependencies and use it in the auto-decrufter.

At this point, disable the NBS removal - it will be re-added in the
next commit.
Signed-off-by: NNiels Thykier <niels@thykier.net>
上级 3b5befe5
......@@ -40,7 +40,7 @@ from daklib.config import Config
from daklib.dbconn import *
from daklib import utils
from daklib.cruft import *
from daklib.rm import remove
from daklib.rm import remove, ReverseDependencyChecker
################################################################################
......@@ -55,7 +55,7 @@ Check for obsolete or duplicated packages.
################################################################################
def remove_sourceless_cruft(suite_name, suite_id, session, dryrun):
def remove_sourceless_cruft(suite_name, suite_id, session, dryrun, debug):
"""Remove binaries without a source
@type suite_name: string
......@@ -69,31 +69,96 @@ def remove_sourceless_cruft(suite_name, suite_id, session, dryrun):
@type dryrun: bool
@param dryrun: If True, just print the actions rather than actually doing them
@type debug: bool
@param debug: If True, print some extra information
"""""
global Options
rows = query_without_source(suite_id, session)
arch_all_id = get_architecture('all', session=session)
discarded_removal = set()
message = '[auto-cruft] no longer built from source, no reverse dependencies'
for row in rows:
package = row[0]
if utils.check_reverse_depends([package], suite_name, [], session, cruft=True, quiet=True):
continue
all_packages = dict((row[0], None) for row in rows)
if not all_packages:
if debug:
print "N: Found no candidates"
return
if debug:
print "N: Considering to remove %s" % str(sorted(all_packages.iterkeys()))
if debug:
print "N: Compiling ReverseDependencyChecker (RDC) - please hold ..."
rdc = ReverseDependencyChecker(session, suite_name)
if debug:
print "N: Computing initial breakage..."
breakage = rdc.check_reverse_depends(all_packages)
while breakage:
by_breakers = [(len(breakage[x]), x, breakage[x]) for x in breakage]
by_breakers.sort(reverse=True)
if debug:
print "N: - Removal would break %s (package, architecture)-pairs" % (len(breakage))
print "N: - full breakage:"
for _, breaker, broken in by_breakers:
bname = "%s/%s" % breaker
broken_str = ", ".join("%s/%s" % b for b in sorted(broken))
print "N: * %s => %s" % (bname, broken_str)
_, worst_package_arch, worst_breakage = by_breakers.pop(0)
averted_breakage = set(worst_breakage)
del all_packages[worst_package_arch[0]]
discarded_removal.add(worst_package_arch[0])
if debug:
print "N: - skip removal of %s (due to %s)" % (worst_package_arch[0], sorted(averted_breakage))
for _, package_arch, breakage in by_breakers:
package = package_arch[0]
if breakage <= averted_breakage:
# We already avoided this break
continue
if package in discarded_removal:
averted_breakage |= breakage
continue
if debug:
print "N: - skip removal of %s (due to %s)" % (
package, str(sorted(breakage - averted_breakage)))
discarded_removal.add(package)
averted_breakage |= breakage
del all_packages[package]
if not all_packages:
if debug:
print "N: Nothing left to remove"
return
if debug:
print "N: Now considering to remove %s" % str(sorted(all_packages.iterkeys()))
breakage = rdc.check_reverse_depends(all_packages)
if debug:
print "N: Removal looks good"
if dryrun:
# Embed the -R just in case someone wants to run it manually later
print 'Would do: dak rm -m "%s" -s %s -a all -p -R -b %s' % \
(message, suite_name, " ".join(sorted(all_packages)))
else:
params = {
arch_all_id: arch_all_id,
all_packages: tuple(all_packages),
suite_id: suite_id
}
q = session.execute("""
SELECT b.package, b.version, a.arch_string, b.id
FROM binaries b
JOIN bin_associations ba ON b.id = ba.bin
JOIN architecture a ON b.architecture = a.id
JOIN suite su ON ba.suite = su.id
WHERE a.id = :arch_all_id AND b.package IN :all_packages AND su.id = :suite_id
""", params)
remove(session, message, [suite_name], list(q), partial=True, whoami="DAK's auto-decrufter")
if dryrun:
# Embed the -R just in case someone wants to run it manually later
print 'Would do: dak rm -m "%s" -s %s -a all -p -R -b %s' % \
(message, suite_name, package)
else:
q = session.execute("""
SELECT b.package, b.version, a.arch_string, b.id
FROM binaries b
JOIN bin_associations ba ON b.id = ba.bin
JOIN architecture a ON b.architecture = a.id
JOIN suite su ON ba.suite = su.id
WHERE a.id = :arch_all_id AND b.package = :package AND su.id = :suite_id
""", {arch_all_id: arch_all_id, package: package, suite_id: suite_id})
remove(session, message, [suite_name], list(q), partial=True, whoami="DAK's auto-decrufter")
def removeNBS(suite_name, suite_id, session, dryrun):
......@@ -154,8 +219,9 @@ def main ():
Arguments = [('h',"help","Auto-Decruft::Options::Help"),
('n',"dry-run","Auto-Decruft::Options::Dry-Run"),
('d',"debug","Auto-Decruft::Options::Debug"),
('s',"suite","Auto-Decruft::Options::Suite","HasArg")]
for i in ["help", "Dry-Run"]:
for i in ["help", "Dry-Run", "Debug"]:
if not cnf.has_key("Auto-Decruft::Options::%s" % (i)):
cnf["Auto-Decruft::Options::%s" % (i)] = ""
......@@ -167,9 +233,12 @@ def main ():
if Options["Help"]:
usage()
debug = False
dryrun = False
if Options["Dry-Run"]:
dryrun = True
if Options["Debug"]:
debug = True
session = DBConn().session()
......@@ -180,8 +249,8 @@ def main ():
suite_id = suite.suite_id
suite_name = suite.suite_name.lower()
remove_sourceless_cruft(suite_name, suite_id, session, dryrun)
removeNBS(suite_name, suite_id, session, dryrun)
remove_sourceless_cruft(suite_name, suite_id, session, dryrun, debug)
#removeNBS(suite_name, suite_id, session, dryrun)
################################################################################
......
......@@ -32,6 +32,8 @@
import commands
import apt_pkg
from re import sub
from collections import defaultdict
from regexes import re_build_dep_arch
from daklib.dbconn import *
from daklib import utils
......@@ -41,6 +43,247 @@ import debianbts as bts
################################################################################
class ReverseDependencyChecker(object):
"""A bulk tester for reverse dependency checks
This class is similar to the check_reverse_depends method from "utils". However,
it is primarily focused on facilitating bulk testing of reverse dependencies.
It caches the state of the suite and then uses that as basis for answering queries.
This saves a significant amount of time if multiple reverse dependency checks are
required.
"""
def __init__(self, session, suite):
"""Creates a new ReverseDependencyChecker instance
This will spend a significant amount of time caching data.
@type session: SQLA Session
@param session: The database session in use
@type suite: str
@param suite: The name of the suite that is used as basis for removal tests.
"""
self._session = session
dbsuite = get_suite(suite, session)
suite_archs2id = dict((x.arch_string, x.arch_id) for x in get_suite_architectures(suite))
package_dependencies, arch_providors_of, arch_provided_by = self._load_package_information(session,
dbsuite.suite_id,
suite_archs2id)
self._package_dependencies = package_dependencies
self._arch_providors_of = arch_providors_of
self._arch_provided_by = arch_provided_by
self._archs_in_suite = set(suite_archs2id)
@staticmethod
def _load_package_information(session, suite_id, suite_archs2id):
package_dependencies = defaultdict(lambda: defaultdict(set))
arch_providors_of = defaultdict(lambda: defaultdict(set))
arch_provided_by = defaultdict(lambda: defaultdict(set))
source_deps = defaultdict(set)
metakey_d = get_or_set_metadatakey("Depends", session)
metakey_p = get_or_set_metadatakey("Provides", session)
params = {
'suite_id': suite_id,
'arch_all_id': suite_archs2id['all'],
'metakey_d_id': metakey_d.key_id,
'metakey_p_id': metakey_p.key_id,
}
all_arches = set(suite_archs2id)
all_arches.discard('source')
package_dependencies['source'] = source_deps
for architecture in all_arches:
deps = defaultdict(set)
providers_of = defaultdict(set)
provided_by = defaultdict(set)
arch_providors_of[architecture] = providers_of
arch_provided_by[architecture] = provided_by
package_dependencies[architecture] = deps
params['arch_id'] = suite_archs2id[architecture]
statement = '''
SELECT b.package,
(SELECT bmd.value FROM binaries_metadata bmd WHERE bmd.bin_id = b.id AND bmd.key_id = :metakey_d_id) AS depends,
(SELECT bmp.value FROM binaries_metadata bmp WHERE bmp.bin_id = b.id AND bmp.key_id = :metakey_p_id) AS provides
FROM binaries b
JOIN bin_associations ba ON b.id = ba.bin AND ba.suite = :suite_id
WHERE b.architecture = :arch_id OR b.architecture = :arch_all_id'''
query = session.query('package', 'depends', 'provides'). \
from_statement(statement).params(params)
for package, depends, provides in query:
if depends is not None:
try:
parsed_dep = []
for dep in apt_pkg.parse_depends(depends):
parsed_dep.append(frozenset(d[0] for d in dep))
deps[package].update(parsed_dep)
except ValueError as e:
print "Error for package %s: %s" % (package, e)
# Maintain a counter for each virtual package. If a
# Provides: exists, set the counter to 0 and count all
# provides by a package not in the list for removal.
# If the counter stays 0 at the end, we know that only
# the to-be-removed packages provided this virtual
# package.
if provides is not None:
for virtual_pkg in provides.split(","):
virtual_pkg = virtual_pkg.strip()
if virtual_pkg == package:
continue
provided_by[virtual_pkg].add(package)
providers_of[package].add(virtual_pkg)
# Check source dependencies (Build-Depends and Build-Depends-Indep)
metakey_bd = get_or_set_metadatakey("Build-Depends", session)
metakey_bdi = get_or_set_metadatakey("Build-Depends-Indep", session)
params = {
'suite_id': suite_id,
'metakey_ids': (metakey_bd.key_id, metakey_bdi.key_id),
}
statement = '''
SELECT s.source, string_agg(sm.value, ', ') as build_dep
FROM source s
JOIN source_metadata sm ON s.id = sm.src_id
WHERE s.id in
(SELECT source FROM src_associations
WHERE suite = :suite_id)
AND sm.key_id in :metakey_ids
GROUP BY s.id, s.source'''
query = session.query('source', 'build_dep').from_statement(statement). \
params(params)
for source, build_dep in query:
if build_dep is not None:
# Remove [arch] information since we want to see breakage on all arches
build_dep = re_build_dep_arch.sub("", build_dep)
try:
parsed_dep = []
for dep in apt_pkg.parse_src_depends(build_dep):
parsed_dep.append(frozenset(d[0] for d in dep))
source_deps[source].update(parsed_dep)
except ValueError as e:
print "Error for package %s: %s" % (source, e)
return package_dependencies, arch_providors_of, arch_provided_by
def check_reverse_depends(self, removal_requests):
"""Bulk check reverse dependencies
Example:
removal_request = {
"eclipse-rcp": None, # means ALL architectures (incl. source)
"eclipse": None, # means ALL architectures (incl. source)
"lintian": ["source", "all"], # Only these two "architectures".
}
obj.check_reverse_depends(removal_request)
@type removal_requests: dict (or a list of tuples)
@param removal_requests: A dictionary mapping a package name to a list of architectures. The list of
architectures decides from which the package will be removed - if the list is empty the package will
be removed on ALL architectures in the suite (including "source").
@rtype: dict
@return: A mapping of "removed package" (as a "(pkg, arch)"-tuple) to a set of broken
broken packages (also as "(pkg, arch)"-tuple). Note that the architecture values
in these tuples /can/ be "source" to reflect a breakage in build-dependencies.
"""
archs_in_suite = self._archs_in_suite
removals_by_arch = defaultdict(set)
affected_virtual_by_arch = defaultdict(set)
package_dependencies = self._package_dependencies
arch_providors_of = self._arch_providors_of
arch_provided_by = self._arch_provided_by
arch_provides2removal = defaultdict(lambda: defaultdict(set))
dep_problems = defaultdict(set)
src_deps = package_dependencies['source']
src_removals = set()
arch_all_removals = set()
if isinstance(removal_requests, dict):
removal_requests = removal_requests.iteritems()
for pkg, arch_list in removal_requests:
if not arch_list:
arch_list = archs_in_suite
for arch in arch_list:
if arch == 'source':
src_removals.add(pkg)
continue
if arch == 'all':
arch_all_removals.add(pkg)
continue
removals_by_arch[arch].add(pkg)
if pkg in arch_providors_of[arch]:
affected_virtual_by_arch[arch].add(pkg)
if arch_all_removals:
for arch in archs_in_suite:
if arch in ('all', 'source'):
continue
removals_by_arch[arch].update(arch_all_removals)
for pkg in arch_all_removals:
if pkg in arch_providors_of[arch]:
affected_virtual_by_arch[arch].add(pkg)
if not removals_by_arch:
# Nothing to remove => no problems
return dep_problems
for arch, removed_providers in affected_virtual_by_arch.iteritems():
provides2removal = arch_provides2removal[arch]
removals = removals_by_arch[arch]
for virtual_pkg, virtual_providers in arch_provided_by[arch].iteritems():
v = virtual_providers & removed_providers
if len(v) == len(virtual_providers):
# We removed all the providers of virtual_pkg
removals.add(virtual_pkg)
# Pick one to take the blame for the removal
# - we sort for determinism, optimally we would prefer to blame the same package
# to minimise the number of blamed packages.
provides2removal[virtual_pkg] = sorted(v)[0]
for arch, removals in removals_by_arch.iteritems():
deps = package_dependencies[arch]
provides2removal = arch_provides2removal[arch]
# Check binary dependencies (Depends)
for package, dependencies in deps.iteritems():
if package in removals:
continue
for clause in dependencies:
if not (clause <= removals):
# Something probably still satisfies this relation
continue
# whoops, we seemed to have removed all packages that could possibly satisfy
# this relation. Lets blame something for it
for dep_package in clause:
removal = dep_package
if dep_package in provides2removal:
removal = provides2removal[dep_package]
dep_problems[(removal, arch)].add((package, arch))
for source, build_dependencies in src_deps.iteritems():
if source in src_removals:
continue
for clause in build_dependencies:
if not (clause <= removals):
# Something probably still satisfies this relation
continue
# whoops, we seemed to have removed all packages that could possibly satisfy
# this relation. Lets blame something for it
for dep_package in clause:
removal = dep_package
if dep_package in provides2removal:
removal = provides2removal[dep_package]
dep_problems[(removal, arch)].add((source, 'source'))
return dep_problems
def remove(session, reason, suites, removals,
whoami=None, partial=False, components=None, done_bugs=None, date=None,
carbon_copy=None, close_related_bugs=False):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册