From 2862e1227a592e3c79413019ac15eaa83eace30a Mon Sep 17 00:00:00 2001 From: Amador Pahim Date: Fri, 19 Feb 2016 15:28:00 -0200 Subject: [PATCH] avocado.utils asset fetcher Find for files in multiple locations, caching it when successfully fetched. Available as fetch_asset() method in avocado.Test(). Reference: https://trello.com/c/KTeMIx0u Signed-off-by: Amador Pahim --- avocado/core/test.py | 21 ++++ avocado/utils/asset.py | 180 +++++++++++++++++++++++++++++ docs/source/WritingTests.rst | 92 +++++++++++++++ etc/avocado/avocado.conf | 4 + selftests/unit/test_utils_asset.py | 61 ++++++++++ 5 files changed, 358 insertions(+) create mode 100644 avocado/utils/asset.py create mode 100644 selftests/unit/test_utils_asset.py diff --git a/avocado/core/test.py b/avocado/core/test.py index fa50d025..45a3d27a 100644 --- a/avocado/core/test.py +++ b/avocado/core/test.py @@ -30,12 +30,14 @@ from . import data_dir from . import exceptions from . import multiplexer from . import sysinfo +from ..utils import asset from ..utils import astring from ..utils import data_structures from ..utils import genio from ..utils import path as utils_path from ..utils import process from ..utils import stacktrace +from .settings import settings from .version import VERSION if sys.version_info[:2] == (2, 6): @@ -579,6 +581,25 @@ class Test(unittest.TestCase): """ raise exceptions.TestSkipError(message) + def fetch_asset(self, name, asset_hash=None, algorithm='sha1', + locations=None): + """ + Method o call the utils.asset in order to fetch and asset file + supporting hash check, caching and multiple locations. + + :param name: the asset filename or URL + :param asset_hash: asset hash (optional) + :param algorithm: hash algorithm (optional, defaults to sha1) + :param locations: list of URLs from where the asset can be + fetched (optional) + :returns: asset file local path + """ + cache_dirs = settings.get_value('datadir.paths', 'cache_dirs', + key_type=list, default=[]) + cache_dirs.append(os.path.join(data_dir.get_data_dir(), 'cache')) + return asset.Asset(name, asset_hash, algorithm, locations, + cache_dirs).fetch() + class SimpleTest(Test): diff --git a/avocado/utils/asset.py b/avocado/utils/asset.py new file mode 100644 index 00000000..ec615ea5 --- /dev/null +++ b/avocado/utils/asset.py @@ -0,0 +1,180 @@ +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# +# See LICENSE for more details. +# +# Copyright: Red Hat Inc. 2016 +# Author: Amador Pahim + +""" +Asset fetcher from multiple locationss +""" + +import logging +import os +import re +import urlparse + +from . import crypto +from . import path as utils_path +from .download import url_download + + +log = logging.getLogger('avocado.test') + + +class Asset(object): + """ + Try to fetch/verify an asset file from multiple locations. + """ + + def __init__(self, name, asset_hash, algorithm, locations, cache_dirs): + """ + Initialize the Asset() and fetches the asset file. The path for + the fetched file can be reached using the self.path attribute. + + :param name: the asset filename. url is also supported + :param asset_hash: asset hash + :param algorithm: hash algorithm + :param locations: list of locations fetch asset from + :params cache_dirs: list of cache directories + """ + self.name = name + self.asset_hash = asset_hash + self.algorithm = algorithm + self.locations = locations + self.cache_dirs = cache_dirs + self.nameobj = urlparse.urlparse(self.name) + self.basename = os.path.basename(self.nameobj.path) + + def fetch(self): + urls = [] + + # If name is actually an url, it has to be included in urls list + if self.nameobj.scheme: + urls.append(self.nameobj.geturl()) + + # First let's find for the file in all cache locations + for cache_dir in self.cache_dirs: + cache_dir = os.path.expanduser(cache_dir) + self.asset_file = os.path.join(cache_dir, self.basename) + if self._check_file(self.asset_file, self.asset_hash, self.algorithm): + return self.asset_file + + # If we get to this point, file is not in any cache directory + # and we have to download it from a location. A rw cache + # directory is then needed. The first rw cache directory will be + # used. + log.debug("Looking for a writable cache dir.") + for cache_dir in self.cache_dirs: + cache_dir = os.path.expanduser(cache_dir) + self.asset_file = os.path.join(cache_dir, self.basename) + if not utils_path.usable_rw_dir(cache_dir): + log.debug("Read-only cache dir '%s'. Skiping." % + cache_dir) + continue + log.debug("Using %s as cache dir." % cache_dir) + + # Adding the user defined locations to the urls list + if self.locations is not None: + for item in self.locations: + urls.append(item) + + for url in urls: + urlobj = urlparse.urlparse(url) + if urlobj.scheme == 'http' or urlobj.scheme == 'https': + log.debug('Downloading from %s.' % url) + try: + url_download(url, self.asset_file) + except Exception as e: + log.error(e) + continue + if self._check_file(self.asset_file, self.asset_hash, + self.algorithm): + return self.asset_file + + elif urlobj.scheme == 'ftp': + log.debug('Downloading from %s.' % url) + try: + url_download(url, self.asset_file) + except Exception as e: + log.error(e) + continue + if self._check_file(self.asset_file, self.asset_hash, + self.algorithm): + return self.asset_file + + elif urlobj.scheme == 'file': + if os.path.isdir(urlobj.path): + path = os.path.join(urlobj.path, self.name) + else: + path = urlobj.path + log.debug('Looking for file on %s.' % path) + if self._check_file(path): + os.symlink(path, self.asset_file) + log.debug('Symlink created %s -> %s.' % + (self.asset_file, path)) + else: + continue + if self._check_file(self.asset_file, self.asset_hash, + self.algorithm): + return self.asset_file + + raise EnvironmentError("Failed to fetch %s." % self.basename) + raise EnvironmentError("Can't find a writable cache dir.") + + @staticmethod + def _check_file(path, filehash=None, algorithm=None): + """ + Checks if file exists and verifies the hash, when the hash is + provided. We try first to find a hash file to verify the hash + against and only if the hash file is not present we compute the + hash. + """ + if not os.path.isfile(path): + log.debug('Asset %s not found.' % path) + return False + + if filehash is None: + return True + + basename = os.path.basename(path) + discovered_hash = None + # Try to find a hashfile for the asset file + hashfile = '%s.%s' % (path, algorithm) + if os.path.isfile(hashfile): + with open(hashfile, 'r') as f: + for line in f.readlines(): + # md5 is 32 chars big and sha512 is 128 chars big. + # others supported algorithms are between those. + pattern = '[a-f0-9]{32,128} %s' % basename + if re.match(pattern, line): + log.debug('Hashfile found for %s.' % path) + discovered_hash = line.split()[0] + break + + # If no hashfile, lets calculate the hash by ourselves + if discovered_hash is None: + log.debug('No hashfile found for %s. Computing hash.' % + path) + discovered_hash = crypto.hash_file(path, algorithm=algorithm) + + # Creating the hashfile for further usage. + log.debug('Creating hashfile %s.' % hashfile) + with open(hashfile, 'w') as f: + content = '%s %s\n' % (discovered_hash, basename) + f.write(content) + + if filehash == discovered_hash: + log.debug('Asset %s verified.' % path) + return True + else: + log.error('Asset %s corrupted (hash expected:%s, hash found:%s).' % + (path, filehash, discovered_hash)) + return False diff --git a/docs/source/WritingTests.rst b/docs/source/WritingTests.rst index 17c747aa..082b2a5c 100644 --- a/docs/source/WritingTests.rst +++ b/docs/source/WritingTests.rst @@ -345,6 +345,98 @@ In this example, the ``test`` method just gets into the base directory of the compiled suite and executes the ``./synctest`` command, with appropriate parameters, using :func:`avocado.utils.process.system`. +Fetching asset files +==================== +To run third party test suites as mentioned above, or for any other pourpose, +we offer an asset fetcher as a method of Avocado Test class. +The asset method looks for a list of directories in the ``cache_dirs`` key, +inside the ``[datadir.paths]`` section from the configuration files. Read-only +directories are also supported. When the asset file is not present in any of +the provided directories, we will try to download the file from the provided +locations, copying it to the first writable cache directory. Example:: + + cache_dirs = ['/usr/local/src/', '~/avocado/cache'] + +In the example above, ``/usr/local/src/`` is a read-only directory. In that +case, when we need to fetch the asset from the locations, it will be copied to +the ``~/avocado/cache`` directory. + +If you don't provide a ``cache_dirs``, we will use the test temporary directory +as the cache to put the fetched files. That directory is expected to be dropped +by the end of the test. So, to take advantage of the cache feature, you have +to configure the ``cache_dirs`` on your system. + +* Use case 1: no ``cache_dirs`` key in config files, only the asset name + provided in the full url format:: + + ... + def setUp(self): + stress = 'http://people.seas.harvard.edu/~apw/stress/stress-1.0.4.tar.gz' + tarball = self.fetch_asset(stress) + archive.extract(tarball, self.srcdir) + ... + + In this case, ``fetch_asset()`` will download the file from the url provided, + copying it to the test temporary workdir. ``tarball`` variable will + contains, for example, ``/var/tmp/avocado_BZXo2B/stress.py_Stress.test/cache/stress-1.0.4.tar.gz``. + +* Use case 2: Read-only cache directory provided. ``cache_dirs = ['/mnt/files']``:: + + ... + def setUp(self): + stress = 'http://people.seas.harvard.edu/~apw/stress/stress-1.0.4.tar.gz' + tarball = self.fetch_asset(stress) + archive.extract(tarball, self.srcdir) + ... + + In this case, we try to find ``stress-1.0.4.tar.gz`` file in ``/mnt/files`` + directory. If it's not there, since ``/mnt/files`` is read-only, we will try + to download the asset file to the test temporary workdir. + +* Use case 3: Writable cache directory provided, along with a list of + locations. ``cache_dirs = ['~/avocado/cache']``:: + + ... + def setUp(self): + st_name = 'stress-1.0.4.tar.gz' + st_hash = 'e1533bc704928ba6e26a362452e6db8fd58b1f0b' + st_loc = ['http://people.seas.harvard.edu/~apw/stress/stress-1.0.4.tar.gz', + 'ftp://foo.bar/stress-1.0.4.tar.gz'] + tarball = self.fetch_asset(st_name, asset_hash=st_hash, + locations=st_loc) + archive.extract(tarball, self.srcdir) + ... + + In this case, we try to download ``stress-1.0.4.tar.gz`` from the provided + locations list (since it's not already in ``~/avocado/cache``). The hash was + also provided, so we will verify the hash. To do so, we first look for a + hashfile named ``stress-1.0.4.tar.gz.sha1`` in the same directory. If the + hashfile is not present we compute the hash and create the hashfile for + further usage. + + The resulting ``tarball`` variable content will be ``~/avocado/cache/stress-1.0.4.tar.gz``. + An exception will take place if we fail to download or to verify the file. + + +Detailing the ``fetch_asset()`` attributes: + +* ``name:`` The name used to name the fetched file. It can also contains a full + URL, that will be used as the first location to try (after serching into the + cache directories). +* ``asset_hash:`` (optional) The expected file hash. If missing, we skip the + check. If provided, before computing the hash, we look for a hashfile to + verify the asset. If the hashfile is nor present, we compute the hash and + create the hashfile in the same cache directory for further usage. +* ``algorithm:`` (optional) Provided hash algorithm format. Defaults to sha1. +* ``locations:`` (optional) List of locations that will be used to try to fetch + the file from. The supported schemes are ``http://``, ``ftp://`` and + ``file://``. You're required to inform the full url to the file, including + the file name. The first success will skip the next locations. Notice that + for ``file://`` we just create a symbolic link in the cache directory, + pointing to the file original location. + +The expected ``return`` is the asset file path or an exception. + Test Output Check and Output Record Mode ======================================== diff --git a/etc/avocado/avocado.conf b/etc/avocado/avocado.conf index 4a5805e4..f3c2f131 100644 --- a/etc/avocado/avocado.conf +++ b/etc/avocado/avocado.conf @@ -7,6 +7,10 @@ test_dir = /usr/share/avocado/tests data_dir = /usr/share/avocado/data # You may override the specific job results directory with logs_dir logs_dir = ~/avocado/job-results +# You can set a list of cache directories to be used by the avocado test +# fetch_asset() with 'cache_dirs'. read-only cache directories are also +# supported. +# cache_dirs = ['~/avocado/cache', '/mnt/cache'] [sysinfo.collect] # Whether to collect system information during avocado jobs diff --git a/selftests/unit/test_utils_asset.py b/selftests/unit/test_utils_asset.py new file mode 100644 index 00000000..6dea1d35 --- /dev/null +++ b/selftests/unit/test_utils_asset.py @@ -0,0 +1,61 @@ +import os +import shutil +import tempfile +import unittest + +from avocado.utils import asset + + +class TestAsset(unittest.TestCase): + + def setUp(self): + self.basedir = tempfile.mkdtemp(prefix='avocado_' + __name__) + self.assetdir = tempfile.mkdtemp(dir=self.basedir) + self.assetname = 'foo.tgz' + self.assethash = '3a033a8938c1af56eeb793669db83bcbd0c17ea5' + self.localpath = os.path.join(self.assetdir, self.assetname) + with open(self.localpath, 'w') as f: + f.write('Test!') + self.url = 'file://%s' % self.localpath + self.cache_dir = tempfile.mkdtemp(dir=self.basedir) + + def testFetch_urlname(self): + foo_tarball = asset.Asset(self.url, + asset_hash=self.assethash, + algorithm='sha1', + locations=None, + cache_dirs=[self.cache_dir]).fetch() + expected_tarball = os.path.join(self.cache_dir, self.assetname) + self.assertEqual(foo_tarball, expected_tarball) + hashfile = '.'.join([expected_tarball, 'sha1']) + self.assertTrue(os.path.isfile(hashfile)) + expected_content = '%s %s\n' % (self.assethash, self.assetname) + with open(hashfile, 'r') as f: + content = f.read() + self.assertEqual(content, expected_content) + + def testFetch_location(self): + foo_tarball = asset.Asset(self.assetname, + asset_hash=self.assethash, + algorithm='sha1', + locations=[self.url], + cache_dirs=[self.cache_dir]).fetch() + expected_tarball = os.path.join(self.cache_dir, self.assetname) + self.assertEqual(foo_tarball, expected_tarball) + hashfile = '.'.join([expected_tarball, 'sha1']) + self.assertTrue(os.path.isfile(hashfile)) + expected_content = '%s %s\n' % (self.assethash, self.assetname) + with open(hashfile, 'r') as f: + content = f.read() + self.assertEqual(content, expected_content) + + def testException(self): + a = asset.Asset(name='bar.tgz', asset_hash=None, algorithm=None, + locations=None, cache_dirs=[self.cache_dir]) + self.assertRaises(EnvironmentError, a.fetch) + + def tearDown(self): + shutil.rmtree(self.basedir) + +if __name__ == "__main__": + unittest.main() -- GitLab