#!/usr/bin/python """ DB access class @contact: Debian FTPMaster @copyright: 2000, 2001, 2002, 2003, 2004, 2006 James Troup @copyright: 2008-2009 Mark Hymers @copyright: 2009 Joerg Jaspert @copyright: 2009 Mike O'Connor @license: GNU General Public License version 2 or later """ # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ################################################################################ # < mhy> I need a funny comment # < sgran> two peanuts were walking down a dark street # < sgran> one was a-salted # * mhy looks up the definition of "funny" ################################################################################ import os import psycopg2 import traceback from singleton import Singleton from config import Config ################################################################################ class Cache(object): def __init__(self, hashfunc=None): if hashfunc: self.hashfunc = hashfunc else: self.hashfunc = lambda x: x['value'] self.data = {} def SetValue(self, keys, value): self.data[self.hashfunc(keys)] = value def GetValue(self, keys): return self.data.get(self.hashfunc(keys)) ################################################################################ class DBConn(Singleton): """ database module init. """ def __init__(self, *args, **kwargs): super(DBConn, self).__init__(*args, **kwargs) def _startup(self, *args, **kwargs): self.__createconn() self.__init_caches() ## Connection functions def __createconn(self): cnf = Config() connstr = "dbname=%s" % cnf["DB::Name"] if cnf["DB::Host"]: connstr += " host=%s" % cnf["DB::Host"] if cnf["DB::Port"] and cnf["DB::Port"] != "-1": connstr += " port=%s" % cnf["DB::Port"] self.db_con = psycopg2.connect(connstr) def reconnect(self): try: self.db_con.close() except psycopg2.InterfaceError: pass self.db_con = None self.__createconn() ## Cache functions def __init_caches(self): self.caches = {'suite': Cache(), 'section': Cache(), 'priority': Cache(), 'override_type': Cache(), 'architecture': Cache(), 'archive': Cache(), 'component': Cache(), 'content_path_names': Cache(), 'content_file_names': Cache(), 'location': Cache(lambda x: '%s_%s_%s' % (x['location'], x['component'], x['location'])), 'maintainer': {}, # TODO 'keyring': {}, # TODO 'source': Cache(lambda x: '%s_%s_' % (x['source'], x['version'])), 'files': Cache(lambda x: '%s_%s_' % (x['filename'], x['location'])), 'maintainer': {}, # TODO 'fingerprint': {}, # TODO 'queue': {}, # TODO 'uid': {}, # TODO 'suite_version': Cache(lambda x: '%s_%s' % (x['source'], x['suite'])), } self.prepared_statements = {} def prepare(self,name,statement): if not self.prepared_statements.has_key(name): c = self.cursor() c.execute(statement) self.prepared_statements[name] = statement def clear_caches(self): self.__init_caches() ## Functions to pass through to the database connector def cursor(self): return self.db_con.cursor() def commit(self): return self.db_con.commit() ## Get functions def __get_single_id(self, query, values, cachename=None): # This is a bit of a hack but it's an internal function only if cachename is not None: res = self.caches[cachename].GetValue(values) if res: return res c = self.db_con.cursor() c.execute(query, values) if c.rowcount != 1: return None res = c.fetchone()[0] if cachename is not None: self.caches[cachename].SetValue(values, res) return res def __get_id(self, retfield, table, qfield, value): query = "SELECT %s FROM %s WHERE %s = %%(value)s" % (retfield, table, qfield) return self.__get_single_id(query, {'value': value}, cachename=table) def get_suite_id(self, suite): """ Returns database id for given C{suite}. Results are kept in a cache during runtime to minimize database queries. @type suite: string @param suite: The name of the suite @rtype: int @return: the database id for the given suite """ return int(self.__get_id('id', 'suite', 'suite_name', suite)) def get_section_id(self, section): """ Returns database id for given C{section}. Results are kept in a cache during runtime to minimize database queries. @type section: string @param section: The name of the section @rtype: int @return: the database id for the given section """ return self.__get_id('id', 'section', 'section', section) def get_priority_id(self, priority): """ Returns database id for given C{priority}. Results are kept in a cache during runtime to minimize database queries. @type priority: string @param priority: The name of the priority @rtype: int @return: the database id for the given priority """ return self.__get_id('id', 'priority', 'priority', priority) def get_override_type_id(self, override_type): """ Returns database id for given override C{type}. Results are kept in a cache during runtime to minimize database queries. @type type: string @param type: The name of the override type @rtype: int @return: the database id for the given override type """ return self.__get_id('id', 'override_type', 'type', override_type) def get_architecture_id(self, architecture): """ Returns database id for given C{architecture}. Results are kept in a cache during runtime to minimize database queries. @type architecture: string @param architecture: The name of the override type @rtype: int @return: the database id for the given architecture """ return self.__get_id('id', 'architecture', 'arch_string', architecture) def get_archive_id(self, archive): """ returns database id for given c{archive}. results are kept in a cache during runtime to minimize database queries. @type archive: string @param archive: the name of the override type @rtype: int @return: the database id for the given archive """ return self.__get_id('id', 'archive', 'lower(name)', archive) def get_component_id(self, component): """ Returns database id for given C{component}. Results are kept in a cache during runtime to minimize database queries. @type component: string @param component: The name of the override type @rtype: int @return: the database id for the given component """ return self.__get_id('id', 'component', 'lower(name)', component) def get_location_id(self, location, component, archive): """ Returns database id for the location behind the given combination of - B{location} - the path of the location, eg. I{/srv/ftp.debian.org/ftp/pool/} - B{component} - the id of the component as returned by L{get_component_id} - B{archive} - the id of the archive as returned by L{get_archive_id} Results are kept in a cache during runtime to minimize database queries. @type location: string @param location: the path of the location @type component: int @param component: the id of the component @type archive: int @param archive: the id of the archive @rtype: int @return: the database id for the location """ archive_id = self.get_archive_id(archive) if not archive_id: return None res = None if component: component_id = self.get_component_id(component) if component_id: res = self.__get_single_id("SELECT id FROM location WHERE path=%(location)s AND component=%(component)s AND archive=%(archive)s", {'location': location, 'archive': int(archive_id), 'component': component_id}, cachename='location') else: res = self.__get_single_id("SELECT id FROM location WHERE path=%(location)s AND archive=%(archive)d", {'location': location, 'archive': archive_id, 'component': ''}, cachename='location') return res def get_source_id(self, source, version): """ Returns database id for the combination of C{source} and C{version} - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc} - B{version} Results are kept in a cache during runtime to minimize database queries. @type source: string @param source: source package name @type version: string @param version: the source version @rtype: int @return: the database id for the source """ return self.__get_single_id("SELECT id FROM source s WHERE s.source=%(source)s AND s.version=%(version)s", {'source': source, 'version': version}, cachename='source') def get_suite_version(self, source, suite): """ Returns database id for a combination of C{source} and C{suite}. - B{source} - source package name, eg. I{mailfilter}, I{bbdb}, I{glibc} - B{suite} - a suite name, eg. I{unstable} Results are kept in a cache during runtime to minimize database queries. @type source: string @param source: source package name @type suite: string @param suite: the suite name @rtype: string @return: the version for I{source} in I{suite} """ return self.__get_single_id(""" SELECT s.version FROM source s, suite su, src_associations sa WHERE sa.source=s.id AND sa.suite=su.id AND su.suite_name=%(suite)s AND s.source=%(source)""", {'suite': suite, 'source': source}, cachename='suite_version') def get_files_id (self, filename, size, md5sum, location_id): """ Returns -1, -2 or the file_id for filename, if its C{size} and C{md5sum} match an existing copy. The database is queried using the C{filename} and C{location_id}. If a file does exist at that location, the existing size and md5sum are checked against the provided parameters. A size or checksum mismatch returns -2. If more than one entry is found within the database, a -1 is returned, no result returns None, otherwise the file id. Results are kept in a cache during runtime to minimize database queries. @type filename: string @param filename: the filename of the file to check against the DB @type size: int @param size: the size of the file to check against the DB @type md5sum: string @param md5sum: the md5sum of the file to check against the DB @type location_id: int @param location_id: the id of the location as returned by L{get_location_id} @rtype: int / None @return: Various return values are possible: - -2: size/checksum error - -1: more than one file found in database - None: no file found in database - int: file id """ values = {'filename' : filename, 'location' : location_id} res = self.caches['files'].GetValue( values ) if not res: query = """SELECT id, size, md5sum FROM files WHERE filename = %(filename)s AND location = %(location)s""" cursor = self.db_con.cursor() cursor.execute( query, values ) if cursor.rowcount == 0: res = None elif cursor.rowcount != 1: res = -1 else: row = cursor.fetchone() if row[1] != int(size) or row[2] != md5sum: res = -2 else: self.caches['files'].SetValue(values, row[0]) res = row[0] return res def get_or_set_contents_file_id(self, filename): """ Returns database id for given filename. Results are kept in a cache during runtime to minimize database queries. If no matching file is found, a row is inserted. @type filename: string @param filename: The filename @rtype: int @return: the database id for the given component """ try: values={'value': filename} query = "SELECT id FROM content_file_names WHERE file = %(value)s" id = self.__get_single_id(query, values, cachename='content_file_names') if not id: c = self.db_con.cursor() c.execute( "INSERT INTO content_file_names VALUES (DEFAULT, %(value)s) RETURNING id", values ) id = c.fetchone()[0] self.caches['content_file_names'].SetValue(values, id) return id except: traceback.print_exc() raise def get_or_set_contents_path_id(self, path): """ Returns database id for given path. Results are kept in a cache during runtime to minimize database queries. If no matching file is found, a row is inserted. @type path: string @param path: The filename @rtype: int @return: the database id for the given component """ try: values={'value': path} query = "SELECT id FROM content_file_paths WHERE path = %(value)s" id = self.__get_single_id(query, values, cachename='content_path_names') if not id: c = self.db_con.cursor() c.execute( "INSERT INTO content_file_paths VALUES (DEFAULT, %(value)s) RETURNING id", values ) id = c.fetchone()[0] self.caches['content_path_names'].SetValue(values, id) return id except: traceback.print_exc() raise def get_suite_architectures(self, suite): """ Returns list of architectures for C{suite}. @type suite: string, int @param suite: the suite name or the suite_id @rtype: list @return: the list of architectures for I{suite} """ suite_id = None if type(suite) == str: suite_id = self.get_suite_id(suite) elif type(suite) == int: suite_id = suite else: return None c = self.db_con.cursor() c.execute( """SELECT a.arch_string FROM suite_architectures sa JOIN architecture a ON (a.id = sa.architecture) WHERE suite='%s'""" % suite_id ) return map(lambda x: x[0], c.fetchall()) def insert_content_paths(self, bin_id, fullpaths): """ Make sure given path is associated with given binary id @type bin_id: int @param bin_id: the id of the binary @type fullpath: string @param fullpath: the path of the file being associated with the binary @return True upon success """ c = self.db_con.cursor() c.execute("BEGIN WORK") try: for fullpath in fullpaths: (path, file) = os.path.split(fullpath) # Get the necessary IDs ... file_id = self.get_or_set_contents_file_id(file) path_id = self.get_or_set_contents_path_id(path) c.execute("""INSERT INTO content_associations (binary_pkg, filepath, filename) VALUES ( '%d', '%d', '%d')""" % (bin_id, path_id, file_id) ) c.execute("COMMIT") return True except: traceback.print_exc() c.execute("ROLLBACK") return False def insert_pending_content_paths(self, package, fullpaths): """ Make sure given paths are temporarily associated with given package @type package: dict @param package: the package to associate with should have been read in from the binary control file @type fullpaths: list @param fullpaths: the list of paths of the file being associated with the binary @return True upon success """ c = self.db_con.cursor() c.execute("BEGIN WORK") try: arch_id = self.get_architecture_id(package['Architecture']) # Remove any already existing recorded files for this package c.execute("""DELETE FROM pending_content_associations WHERE package=%(Package)s AND version=%(Version)s AND architecture=%(ArchID)s""", {'Package': package['Package'], 'Version': package['Version'], 'ArchID': arch_id}) for fullpath in fullpaths: (path, file) = os.path.split(fullpath) if path.startswith( "./" ): path = path[2:] # Get the necessary IDs ... file_id = self.get_or_set_contents_file_id(file) path_id = self.get_or_set_contents_path_id(path) c.execute("""INSERT INTO pending_content_associations (package, version, architecture, filepath, filename) VALUES (%%(Package)s, %%(Version)s, '%d', '%d', '%d')""" % (arch_id, path_id, file_id), package ) c.execute("COMMIT") return True except: traceback.print_exc() c.execute("ROLLBACK") return False