Skip to content
Snippets Groups Projects
common.py 4.87 KiB
Newer Older
from __future__ import print_function, division, absolute_import
Tim O'Donnell's avatar
Tim O'Donnell committed
import collections
Tim O'Donnell's avatar
Tim O'Donnell committed
import warnings
Tim O'Donnell's avatar
Tim O'Donnell committed
import numpy
Tim O'Donnell's avatar
Tim O'Donnell committed
from . import amino_acid

def set_keras_backend(backend=None, gpu_device_nums=None, num_threads=None):
    """
    Configure Keras backend to use GPU or CPU. Only tensorflow is supported.

    Parameters
    ----------
    backend : string, optional
        one of 'tensorflow-default', 'tensorflow-cpu', 'tensorflow-gpu'

    gpu_device_nums : list of int, optional
        GPU devices to potentially use
    num_threads : int, optional
        Tensorflow threads to use

    """
    os.environ["KERAS_BACKEND"] = "tensorflow"

Tim O'Donnell's avatar
Tim O'Donnell committed
    original_backend = backend

    if not backend:
        backend = "tensorflow-default"

    if gpu_device_nums is not None:
        os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(
            [str(i) for i in gpu_device_nums])

    if backend == "tensorflow-cpu" or gpu_device_nums == []:
        print("Forcing tensorflow/CPU backend.")
        os.environ["CUDA_VISIBLE_DEVICES"] = ""
        device_count = {'CPU': 1, 'GPU': 0}
    elif backend == "tensorflow-gpu":
        print("Forcing tensorflow/GPU backend.")
        device_count = {'CPU': 0, 'GPU': 1}
Tim O'Donnell's avatar
Tim O'Donnell committed
    elif backend == "tensorflow-default":
        print("Forcing tensorflow backend.")
        device_count = None
    else:
        raise ValueError("Unsupported backend: %s" % backend)

    import tensorflow
    from keras import backend as K
Tim O'Donnell's avatar
Tim O'Donnell committed
    if K.backend() == 'tensorflow':
        config = tensorflow.ConfigProto(device_count=device_count)
        config.gpu_options.allow_growth = True
        if num_threads:
            config.inter_op_parallelism_threads = num_threads
            config.intra_op_parallelism_threads = num_threads
        session = tensorflow.Session(config=config)
        K.set_session(session)
    else:
Tim O'Donnell's avatar
Tim O'Donnell committed
        if original_backend or gpu_device_nums or num_threads:
            warnings.warn(
                "Only tensorflow backend can be customized. Ignoring "
                " customization. Backend: %s" % K.backend())
def configure_logging(verbose=False):
Tim O'Donnell's avatar
Tim O'Donnell committed
    """
    Configure logging module using defaults.

    Parameters
    ----------
    verbose : boolean
        If true, output will be at level DEBUG, otherwise, INFO.
    """
    level = logging.DEBUG if verbose else logging.INFO
    logging.basicConfig(
        format="%(asctime)s.%(msecs)d %(levelname)s %(module)s - %(funcName)s:"
        " %(message)s",
        datefmt="%Y-%m-%d %H:%M:%S",
        stream=sys.stderr,
        level=level)


Tim O'Donnell's avatar
Tim O'Donnell committed
def amino_acid_distribution(peptides, smoothing=0.0):
Tim O'Donnell's avatar
Tim O'Donnell committed
    """
    Compute the fraction of each amino acid across a collection of peptides.
    
    Parameters
    ----------
    peptides : list of string
    smoothing : float, optional
        Small number (e.g. 0.01) to add to all amino acid fractions. The higher
        the number the more uniform the distribution.

    Returns
    -------
    pandas.Series indexed by amino acids
    """
Tim O'Donnell's avatar
Tim O'Donnell committed
    peptides = pandas.Series(peptides)
    aa_counts = pandas.Series(peptides.map(collections.Counter).sum())
    normalized = aa_counts / aa_counts.sum()
    if smoothing:
        normalized += smoothing
        normalized /= normalized.sum()
    return normalized


def random_peptides(num, length=9, distribution=None):
    """
    Generate random peptides (kmers).

    Parameters
    ----------
    num : int
        Number of peptides to return

    length : int
        Length of each peptide

    distribution : pandas.Series
        Maps 1-letter amino acid abbreviations to
        probabilities. If not specified a uniform
        distribution is used.

    Returns
    ----------
    list of string

    """
    if num == 0:
        return []
    if distribution is None:
        distribution = pandas.Series(
            1, index=sorted(amino_acid.COMMON_AMINO_ACIDS))
Tim O'Donnell's avatar
Tim O'Donnell committed
        distribution /= distribution.sum()

    return [
        ''.join(peptide_sequence)
        for peptide_sequence in
        numpy.random.choice(
            distribution.index,
            p=distribution.values,
            size=(int(num), int(length)))
    ]
Tim O'Donnell's avatar
Tim O'Donnell committed


def positional_frequency_matrix(peptides):
Tim O'Donnell's avatar
Tim O'Donnell committed
    """
    Given a set of peptides, calculate a length x amino acids frequency matrix.

    Parameters
    ----------
    peptides : list of string
        All of same length

    Returns
    -------
    pandas.DataFrame
        Index is position, columns are amino acids
    """
Tim O'Donnell's avatar
Tim O'Donnell committed
    length = len(peptides[0])
    assert all(len(peptide) == length for peptide in peptides)
    counts = pandas.DataFrame(
        index=[a for a in amino_acid.BLOSUM62_MATRIX.index if a != 'X'],
        columns=numpy.arange(1, length + 1),
    )
    for i in range(length):
        counts[i + 1] = pandas.Series([p[i] for p in peptides]).value_counts()
    result = (counts / len(peptides)).fillna(0.0).T
    result.index.name = 'position'
Tim O'Donnell's avatar
Tim O'Donnell committed
    return result