# checksum.py -- core Portage functionality # Copyright 1998-2012 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 import portage from portage.const import PRELINK_BINARY,HASHING_BLOCKSIZE from portage.localization import _ from portage import os from portage import _encodings from portage import _unicode_encode import errno import stat import tempfile #dict of all available hash functions hashfunc_map = {} hashorigin_map = {} def _open_file(filename): try: return open(_unicode_encode(filename, encoding=_encodings['fs'], errors='strict'), 'rb') except IOError as e: func_call = "open('%s')" % filename if e.errno == errno.EPERM: raise portage.exception.OperationNotPermitted(func_call) elif e.errno == errno.EACCES: raise portage.exception.PermissionDenied(func_call) elif e.errno == errno.ENOENT: raise portage.exception.FileNotFound(filename) else: raise class _generate_hash_function(object): __slots__ = ("_hashobject",) def __init__(self, hashtype, hashobject, origin="unknown"): self._hashobject = hashobject hashfunc_map[hashtype] = self hashorigin_map[hashtype] = origin def __call__(self, filename): """ Run a checksum against a file. @param filename: File to run the checksum against @type filename: String @return: The hash and size of the data """ f = _open_file(filename) blocksize = HASHING_BLOCKSIZE data = f.read(blocksize) size = 0 checksum = self._hashobject() while data: checksum.update(data) size = size + len(data) data = f.read(blocksize) f.close() return (checksum.hexdigest(), size) # Define hash functions, try to use the best module available. Later definitions # override earlier ones # Use the internal modules as last fallback try: from hashlib import md5 as _new_md5 except ImportError: from md5 import new as _new_md5 md5hash = _generate_hash_function("MD5", _new_md5, origin="internal") try: from hashlib import sha1 as _new_sha1 except ImportError: from sha import new as _new_sha1 sha1hash = _generate_hash_function("SHA1", _new_sha1, origin="internal") # Try to use mhash if available # mhash causes GIL presently, so it gets less priority than hashlib and # pycrypto. However, it might be the only accelerated implementation of # WHIRLPOOL available. try: import mhash, functools md5hash = _generate_hash_function("MD5", functools.partial(mhash.MHASH, mhash.MHASH_MD5), origin="mhash") sha1hash = _generate_hash_function("SHA1", functools.partial(mhash.MHASH, mhash.MHASH_SHA1), origin="mhash") sha256hash = _generate_hash_function("SHA256", functools.partial(mhash.MHASH, mhash.MHASH_SHA256), origin="mhash") sha512hash = _generate_hash_function("SHA512", functools.partial(mhash.MHASH, mhash.MHASH_SHA512), origin="mhash") for local_name, hash_name in (("rmd160", "ripemd160"), ("whirlpool", "whirlpool")): if hasattr(mhash, 'MHASH_%s' % local_name.upper()): globals()['%shash' % local_name] = \ _generate_hash_function(local_name.upper(), \ functools.partial(mhash.MHASH, getattr(mhash, 'MHASH_%s' % hash_name.upper())), \ origin='mhash') except ImportError: pass # Use pycrypto when available, prefer it over the internal fallbacks # Check for 'new' attributes, since they can be missing if the module # is broken somehow. try: from Crypto.Hash import SHA256, RIPEMD sha256hash = getattr(SHA256, 'new', None) if sha256hash is not None: sha256hash = _generate_hash_function("SHA256", sha256hash, origin="pycrypto") rmd160hash = getattr(RIPEMD, 'new', None) if rmd160hash is not None: rmd160hash = _generate_hash_function("RMD160", rmd160hash, origin="pycrypto") except ImportError: pass # Use hashlib from python-2.5 if available and prefer it over pycrypto and internal fallbacks. # Need special handling for RMD160/WHIRLPOOL as they may not always be provided by hashlib. try: import hashlib, functools md5hash = _generate_hash_function("MD5", hashlib.md5, origin="hashlib") sha1hash = _generate_hash_function("SHA1", hashlib.sha1, origin="hashlib") sha256hash = _generate_hash_function("SHA256", hashlib.sha256, origin="hashlib") sha512hash = _generate_hash_function("SHA512", hashlib.sha512, origin="hashlib") for local_name, hash_name in (("rmd160", "ripemd160"), ("whirlpool", "whirlpool")): try: hashlib.new(hash_name) except ValueError: pass else: globals()['%shash' % local_name] = \ _generate_hash_function(local_name.upper(), \ functools.partial(hashlib.new, hash_name), \ origin='hashlib') except ImportError: pass if "WHIRLPOOL" not in hashfunc_map: # Bundled WHIRLPOOL implementation from portage.util.whirlpool import new as _new_whirlpool whirlpoolhash = _generate_hash_function("WHIRLPOOL", _new_whirlpool, origin="bundled") # Use python-fchksum if available, prefer it over all other MD5 implementations try: from fchksum import fmd5t as md5hash hashfunc_map["MD5"] = md5hash hashorigin_map["MD5"] = "python-fchksum" except ImportError: pass # There is only one implementation for size def getsize(filename): size = os.stat(filename).st_size return (size, size) hashfunc_map["size"] = getsize # end actual hash functions prelink_capable = False if os.path.exists(PRELINK_BINARY): results = portage.subprocess_getstatusoutput( "%s --version > /dev/null 2>&1" % (PRELINK_BINARY,)) if (results[0] >> 8) == 0: prelink_capable=1 del results def is_prelinkable_elf(filename): f = _open_file(filename) try: magic = f.read(17) finally: f.close() return (len(magic) == 17 and magic.startswith(b'\x7fELF') and magic[16] in (b'\x02', b'\x03')) # 2=ET_EXEC, 3=ET_DYN def perform_md5(x, calc_prelink=0): return perform_checksum(x, "MD5", calc_prelink)[0] def _perform_md5_merge(x, **kwargs): return perform_md5(_unicode_encode(x, encoding=_encodings['merge'], errors='strict'), **kwargs) def perform_all(x, calc_prelink=0): mydict = {} for k in hashfunc_map: mydict[k] = perform_checksum(x, k, calc_prelink)[0] return mydict def get_valid_checksum_keys(): return list(hashfunc_map) def get_hash_origin(hashtype): if hashtype not in hashfunc_map: raise KeyError(hashtype) return hashorigin_map.get(hashtype, "unknown") def verify_all(filename, mydict, calc_prelink=0, strict=0): """ Verify all checksums against a file. @param filename: File to run the checksums against @type filename: String @param calc_prelink: Whether or not to reverse prelink before running the checksum @type calc_prelink: Integer @param strict: Enable/Disable strict checking (which stops exactly at a checksum failure and throws an exception) @type strict: Integer @rtype: Tuple @return: Result of the checks and possible message: 1) If size fails, False, and a tuple containing a message, the given size, and the actual size 2) If there is an os error, False, and a tuple containing the system error followed by 2 nulls 3) If a checksum fails, False and a tuple containing a message, the given hash, and the actual hash 4) If all checks succeed, return True and a fake reason """ # Dict relates to single file only. # returns: (passed,reason) file_is_ok = True reason = "Reason unknown" try: mysize = os.stat(filename)[stat.ST_SIZE] if mydict["size"] != mysize: return False,(_("Filesize does not match recorded size"), mysize, mydict["size"]) except OSError as e: if e.errno == errno.ENOENT: raise portage.exception.FileNotFound(filename) return False, (str(e), None, None) verifiable_hash_types = set(mydict).intersection(hashfunc_map) verifiable_hash_types.discard("size") if not verifiable_hash_types: expected = set(hashfunc_map) expected.discard("size") expected = list(expected) expected.sort() expected = " ".join(expected) got = set(mydict) got.discard("size") got = list(got) got.sort() got = " ".join(got) return False, (_("Insufficient data for checksum verification"), got, expected) for x in sorted(mydict): if x == "size": continue elif x in hashfunc_map: myhash = perform_checksum(filename, x, calc_prelink=calc_prelink)[0] if mydict[x] != myhash: if strict: raise portage.exception.DigestException( ("Failed to verify '$(file)s' on " + \ "checksum type '%(type)s'") % \ {"file" : filename, "type" : x}) else: file_is_ok = False reason = (("Failed on %s verification" % x), myhash,mydict[x]) break return file_is_ok,reason def perform_checksum(filename, hashname="MD5", calc_prelink=0): """ Run a specific checksum against a file. The filename can be either unicode or an encoded byte string. If filename is unicode then a UnicodeDecodeError will be raised if necessary. @param filename: File to run the checksum against @type filename: String @param hashname: The type of hash function to run @type hashname: String @param calc_prelink: Whether or not to reverse prelink before running the checksum @type calc_prelink: Integer @rtype: Tuple @return: The hash and size of the data """ global prelink_capable # Make sure filename is encoded with the correct encoding before # it is passed to spawn (for prelink) and/or the hash function. filename = _unicode_encode(filename, encoding=_encodings['fs'], errors='strict') myfilename = filename prelink_tmpfile = None try: if (calc_prelink and prelink_capable and is_prelinkable_elf(filename)): # Create non-prelinked temporary file to checksum. # Files rejected by prelink are summed in place. try: tmpfile_fd, prelink_tmpfile = tempfile.mkstemp() try: retval = portage.process.spawn([PRELINK_BINARY, "--verify", filename], fd_pipes={1:tmpfile_fd}) finally: os.close(tmpfile_fd) if retval == os.EX_OK: myfilename = prelink_tmpfile except portage.exception.CommandNotFound: # This happens during uninstallation of prelink. prelink_capable = False try: if hashname not in hashfunc_map: raise portage.exception.DigestException(hashname + \ " hash function not available (needs dev-python/pycrypto)") myhash, mysize = hashfunc_map[hashname](myfilename) except (OSError, IOError) as e: if e.errno in (errno.ENOENT, errno.ESTALE): raise portage.exception.FileNotFound(myfilename) elif e.errno == portage.exception.PermissionDenied.errno: raise portage.exception.PermissionDenied(myfilename) raise return myhash, mysize finally: if prelink_tmpfile: try: os.unlink(prelink_tmpfile) except OSError as e: if e.errno != errno.ENOENT: raise del e def perform_multiple_checksums(filename, hashes=["MD5"], calc_prelink=0): """ Run a group of checksums against a file. @param filename: File to run the checksums against @type filename: String @param hashes: A list of checksum functions to run against the file @type hashname: List @param calc_prelink: Whether or not to reverse prelink before running the checksum @type calc_prelink: Integer @rtype: Tuple @return: A dictionary in the form: return_value[hash_name] = (hash_result,size) for each given checksum """ rVal = {} for x in hashes: if x not in hashfunc_map: raise portage.exception.DigestException(x+" hash function not available (needs dev-python/pycrypto or >=dev-lang/python-2.5)") rVal[x] = perform_checksum(filename, x, calc_prelink)[0] return rVal