提交 2862e122 编写于 作者: A Amador Pahim

avocado.utils asset fetcher

Find for files in multiple locations, caching it when successfully fetched.
Available as fetch_asset() method in avocado.Test().

Reference: https://trello.com/c/KTeMIx0uSigned-off-by: NAmador Pahim <apahim@redhat.com>
上级 d9faeadc
......@@ -30,12 +30,14 @@ from . import data_dir
from . import exceptions
from . import multiplexer
from . import sysinfo
from ..utils import asset
from ..utils import astring
from ..utils import data_structures
from ..utils import genio
from ..utils import path as utils_path
from ..utils import process
from ..utils import stacktrace
from .settings import settings
from .version import VERSION
if sys.version_info[:2] == (2, 6):
......@@ -579,6 +581,25 @@ class Test(unittest.TestCase):
"""
raise exceptions.TestSkipError(message)
def fetch_asset(self, name, asset_hash=None, algorithm='sha1',
locations=None):
"""
Method o call the utils.asset in order to fetch and asset file
supporting hash check, caching and multiple locations.
:param name: the asset filename or URL
:param asset_hash: asset hash (optional)
:param algorithm: hash algorithm (optional, defaults to sha1)
:param locations: list of URLs from where the asset can be
fetched (optional)
:returns: asset file local path
"""
cache_dirs = settings.get_value('datadir.paths', 'cache_dirs',
key_type=list, default=[])
cache_dirs.append(os.path.join(data_dir.get_data_dir(), 'cache'))
return asset.Asset(name, asset_hash, algorithm, locations,
cache_dirs).fetch()
class SimpleTest(Test):
......
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# See LICENSE for more details.
#
# Copyright: Red Hat Inc. 2016
# Author: Amador Pahim <apahim@redhat.com>
"""
Asset fetcher from multiple locationss
"""
import logging
import os
import re
import urlparse
from . import crypto
from . import path as utils_path
from .download import url_download
log = logging.getLogger('avocado.test')
class Asset(object):
"""
Try to fetch/verify an asset file from multiple locations.
"""
def __init__(self, name, asset_hash, algorithm, locations, cache_dirs):
"""
Initialize the Asset() and fetches the asset file. The path for
the fetched file can be reached using the self.path attribute.
:param name: the asset filename. url is also supported
:param asset_hash: asset hash
:param algorithm: hash algorithm
:param locations: list of locations fetch asset from
:params cache_dirs: list of cache directories
"""
self.name = name
self.asset_hash = asset_hash
self.algorithm = algorithm
self.locations = locations
self.cache_dirs = cache_dirs
self.nameobj = urlparse.urlparse(self.name)
self.basename = os.path.basename(self.nameobj.path)
def fetch(self):
urls = []
# If name is actually an url, it has to be included in urls list
if self.nameobj.scheme:
urls.append(self.nameobj.geturl())
# First let's find for the file in all cache locations
for cache_dir in self.cache_dirs:
cache_dir = os.path.expanduser(cache_dir)
self.asset_file = os.path.join(cache_dir, self.basename)
if self._check_file(self.asset_file, self.asset_hash, self.algorithm):
return self.asset_file
# If we get to this point, file is not in any cache directory
# and we have to download it from a location. A rw cache
# directory is then needed. The first rw cache directory will be
# used.
log.debug("Looking for a writable cache dir.")
for cache_dir in self.cache_dirs:
cache_dir = os.path.expanduser(cache_dir)
self.asset_file = os.path.join(cache_dir, self.basename)
if not utils_path.usable_rw_dir(cache_dir):
log.debug("Read-only cache dir '%s'. Skiping." %
cache_dir)
continue
log.debug("Using %s as cache dir." % cache_dir)
# Adding the user defined locations to the urls list
if self.locations is not None:
for item in self.locations:
urls.append(item)
for url in urls:
urlobj = urlparse.urlparse(url)
if urlobj.scheme == 'http' or urlobj.scheme == 'https':
log.debug('Downloading from %s.' % url)
try:
url_download(url, self.asset_file)
except Exception as e:
log.error(e)
continue
if self._check_file(self.asset_file, self.asset_hash,
self.algorithm):
return self.asset_file
elif urlobj.scheme == 'ftp':
log.debug('Downloading from %s.' % url)
try:
url_download(url, self.asset_file)
except Exception as e:
log.error(e)
continue
if self._check_file(self.asset_file, self.asset_hash,
self.algorithm):
return self.asset_file
elif urlobj.scheme == 'file':
if os.path.isdir(urlobj.path):
path = os.path.join(urlobj.path, self.name)
else:
path = urlobj.path
log.debug('Looking for file on %s.' % path)
if self._check_file(path):
os.symlink(path, self.asset_file)
log.debug('Symlink created %s -> %s.' %
(self.asset_file, path))
else:
continue
if self._check_file(self.asset_file, self.asset_hash,
self.algorithm):
return self.asset_file
raise EnvironmentError("Failed to fetch %s." % self.basename)
raise EnvironmentError("Can't find a writable cache dir.")
@staticmethod
def _check_file(path, filehash=None, algorithm=None):
"""
Checks if file exists and verifies the hash, when the hash is
provided. We try first to find a hash file to verify the hash
against and only if the hash file is not present we compute the
hash.
"""
if not os.path.isfile(path):
log.debug('Asset %s not found.' % path)
return False
if filehash is None:
return True
basename = os.path.basename(path)
discovered_hash = None
# Try to find a hashfile for the asset file
hashfile = '%s.%s' % (path, algorithm)
if os.path.isfile(hashfile):
with open(hashfile, 'r') as f:
for line in f.readlines():
# md5 is 32 chars big and sha512 is 128 chars big.
# others supported algorithms are between those.
pattern = '[a-f0-9]{32,128} %s' % basename
if re.match(pattern, line):
log.debug('Hashfile found for %s.' % path)
discovered_hash = line.split()[0]
break
# If no hashfile, lets calculate the hash by ourselves
if discovered_hash is None:
log.debug('No hashfile found for %s. Computing hash.' %
path)
discovered_hash = crypto.hash_file(path, algorithm=algorithm)
# Creating the hashfile for further usage.
log.debug('Creating hashfile %s.' % hashfile)
with open(hashfile, 'w') as f:
content = '%s %s\n' % (discovered_hash, basename)
f.write(content)
if filehash == discovered_hash:
log.debug('Asset %s verified.' % path)
return True
else:
log.error('Asset %s corrupted (hash expected:%s, hash found:%s).' %
(path, filehash, discovered_hash))
return False
......@@ -345,6 +345,98 @@ In this example, the ``test`` method just gets into the base directory of
the compiled suite and executes the ``./synctest`` command, with appropriate
parameters, using :func:`avocado.utils.process.system`.
Fetching asset files
====================
To run third party test suites as mentioned above, or for any other pourpose,
we offer an asset fetcher as a method of Avocado Test class.
The asset method looks for a list of directories in the ``cache_dirs`` key,
inside the ``[datadir.paths]`` section from the configuration files. Read-only
directories are also supported. When the asset file is not present in any of
the provided directories, we will try to download the file from the provided
locations, copying it to the first writable cache directory. Example::
cache_dirs = ['/usr/local/src/', '~/avocado/cache']
In the example above, ``/usr/local/src/`` is a read-only directory. In that
case, when we need to fetch the asset from the locations, it will be copied to
the ``~/avocado/cache`` directory.
If you don't provide a ``cache_dirs``, we will use the test temporary directory
as the cache to put the fetched files. That directory is expected to be dropped
by the end of the test. So, to take advantage of the cache feature, you have
to configure the ``cache_dirs`` on your system.
* Use case 1: no ``cache_dirs`` key in config files, only the asset name
provided in the full url format::
...
def setUp(self):
stress = 'http://people.seas.harvard.edu/~apw/stress/stress-1.0.4.tar.gz'
tarball = self.fetch_asset(stress)
archive.extract(tarball, self.srcdir)
...
In this case, ``fetch_asset()`` will download the file from the url provided,
copying it to the test temporary workdir. ``tarball`` variable will
contains, for example, ``/var/tmp/avocado_BZXo2B/stress.py_Stress.test/cache/stress-1.0.4.tar.gz``.
* Use case 2: Read-only cache directory provided. ``cache_dirs = ['/mnt/files']``::
...
def setUp(self):
stress = 'http://people.seas.harvard.edu/~apw/stress/stress-1.0.4.tar.gz'
tarball = self.fetch_asset(stress)
archive.extract(tarball, self.srcdir)
...
In this case, we try to find ``stress-1.0.4.tar.gz`` file in ``/mnt/files``
directory. If it's not there, since ``/mnt/files`` is read-only, we will try
to download the asset file to the test temporary workdir.
* Use case 3: Writable cache directory provided, along with a list of
locations. ``cache_dirs = ['~/avocado/cache']``::
...
def setUp(self):
st_name = 'stress-1.0.4.tar.gz'
st_hash = 'e1533bc704928ba6e26a362452e6db8fd58b1f0b'
st_loc = ['http://people.seas.harvard.edu/~apw/stress/stress-1.0.4.tar.gz',
'ftp://foo.bar/stress-1.0.4.tar.gz']
tarball = self.fetch_asset(st_name, asset_hash=st_hash,
locations=st_loc)
archive.extract(tarball, self.srcdir)
...
In this case, we try to download ``stress-1.0.4.tar.gz`` from the provided
locations list (since it's not already in ``~/avocado/cache``). The hash was
also provided, so we will verify the hash. To do so, we first look for a
hashfile named ``stress-1.0.4.tar.gz.sha1`` in the same directory. If the
hashfile is not present we compute the hash and create the hashfile for
further usage.
The resulting ``tarball`` variable content will be ``~/avocado/cache/stress-1.0.4.tar.gz``.
An exception will take place if we fail to download or to verify the file.
Detailing the ``fetch_asset()`` attributes:
* ``name:`` The name used to name the fetched file. It can also contains a full
URL, that will be used as the first location to try (after serching into the
cache directories).
* ``asset_hash:`` (optional) The expected file hash. If missing, we skip the
check. If provided, before computing the hash, we look for a hashfile to
verify the asset. If the hashfile is nor present, we compute the hash and
create the hashfile in the same cache directory for further usage.
* ``algorithm:`` (optional) Provided hash algorithm format. Defaults to sha1.
* ``locations:`` (optional) List of locations that will be used to try to fetch
the file from. The supported schemes are ``http://``, ``ftp://`` and
``file://``. You're required to inform the full url to the file, including
the file name. The first success will skip the next locations. Notice that
for ``file://`` we just create a symbolic link in the cache directory,
pointing to the file original location.
The expected ``return`` is the asset file path or an exception.
Test Output Check and Output Record Mode
========================================
......
......@@ -7,6 +7,10 @@ test_dir = /usr/share/avocado/tests
data_dir = /usr/share/avocado/data
# You may override the specific job results directory with logs_dir
logs_dir = ~/avocado/job-results
# You can set a list of cache directories to be used by the avocado test
# fetch_asset() with 'cache_dirs'. read-only cache directories are also
# supported.
# cache_dirs = ['~/avocado/cache', '/mnt/cache']
[sysinfo.collect]
# Whether to collect system information during avocado jobs
......
import os
import shutil
import tempfile
import unittest
from avocado.utils import asset
class TestAsset(unittest.TestCase):
def setUp(self):
self.basedir = tempfile.mkdtemp(prefix='avocado_' + __name__)
self.assetdir = tempfile.mkdtemp(dir=self.basedir)
self.assetname = 'foo.tgz'
self.assethash = '3a033a8938c1af56eeb793669db83bcbd0c17ea5'
self.localpath = os.path.join(self.assetdir, self.assetname)
with open(self.localpath, 'w') as f:
f.write('Test!')
self.url = 'file://%s' % self.localpath
self.cache_dir = tempfile.mkdtemp(dir=self.basedir)
def testFetch_urlname(self):
foo_tarball = asset.Asset(self.url,
asset_hash=self.assethash,
algorithm='sha1',
locations=None,
cache_dirs=[self.cache_dir]).fetch()
expected_tarball = os.path.join(self.cache_dir, self.assetname)
self.assertEqual(foo_tarball, expected_tarball)
hashfile = '.'.join([expected_tarball, 'sha1'])
self.assertTrue(os.path.isfile(hashfile))
expected_content = '%s %s\n' % (self.assethash, self.assetname)
with open(hashfile, 'r') as f:
content = f.read()
self.assertEqual(content, expected_content)
def testFetch_location(self):
foo_tarball = asset.Asset(self.assetname,
asset_hash=self.assethash,
algorithm='sha1',
locations=[self.url],
cache_dirs=[self.cache_dir]).fetch()
expected_tarball = os.path.join(self.cache_dir, self.assetname)
self.assertEqual(foo_tarball, expected_tarball)
hashfile = '.'.join([expected_tarball, 'sha1'])
self.assertTrue(os.path.isfile(hashfile))
expected_content = '%s %s\n' % (self.assethash, self.assetname)
with open(hashfile, 'r') as f:
content = f.read()
self.assertEqual(content, expected_content)
def testException(self):
a = asset.Asset(name='bar.tgz', asset_hash=None, algorithm=None,
locations=None, cache_dirs=[self.cache_dir])
self.assertRaises(EnvironmentError, a.fetch)
def tearDown(self):
shutil.rmtree(self.basedir)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册