from __future__ import print_function, division, absolute_import import collections import logging import sys import os import warnings import numpy import pandas from . import amino_acid def set_keras_backend(backend=None, gpu_device_nums=None, num_threads=None): """ Configure Keras backend to use GPU or CPU. Only tensorflow is supported. Parameters ---------- backend : string, optional one of 'tensorflow-default', 'tensorflow-cpu', 'tensorflow-gpu' gpu_device_nums : list of int, optional GPU devices to potentially use num_threads : int, optional Tensorflow threads to use """ os.environ["KERAS_BACKEND"] = "tensorflow" original_backend = backend if not backend: backend = "tensorflow-default" if gpu_device_nums is not None: os.environ["CUDA_VISIBLE_DEVICES"] = ",".join( [str(i) for i in gpu_device_nums]) if backend == "tensorflow-cpu" or gpu_device_nums == []: print("Forcing tensorflow/CPU backend.") os.environ["CUDA_VISIBLE_DEVICES"] = "" device_count = {'CPU': 1, 'GPU': 0} elif backend == "tensorflow-gpu": print("Forcing tensorflow/GPU backend.") device_count = {'CPU': 0, 'GPU': 1} elif backend == "tensorflow-default": print("Forcing tensorflow backend.") device_count = None else: raise ValueError("Unsupported backend: %s" % backend) import tensorflow from keras import backend as K if K.backend() == 'tensorflow': config = tensorflow.ConfigProto(device_count=device_count) config.gpu_options.allow_growth = True if num_threads: config.inter_op_parallelism_threads = num_threads config.intra_op_parallelism_threads = num_threads session = tensorflow.Session(config=config) K.set_session(session) else: if original_backend or gpu_device_nums or num_threads: warnings.warn( "Only tensorflow backend can be customized. Ignoring " " customization. Backend: %s" % K.backend()) def configure_logging(verbose=False): """ Configure logging module using defaults. Parameters ---------- verbose : boolean If true, output will be at level DEBUG, otherwise, INFO. """ level = logging.DEBUG if verbose else logging.INFO logging.basicConfig( format="%(asctime)s.%(msecs)d %(levelname)s %(module)s - %(funcName)s:" " %(message)s", datefmt="%Y-%m-%d %H:%M:%S", stream=sys.stderr, level=level) def amino_acid_distribution(peptides, smoothing=0.0): """ Compute the fraction of each amino acid across a collection of peptides. Parameters ---------- peptides : list of string smoothing : float, optional Small number (e.g. 0.01) to add to all amino acid fractions. The higher the number the more uniform the distribution. Returns ------- pandas.Series indexed by amino acids """ peptides = pandas.Series(peptides) aa_counts = pandas.Series(peptides.map(collections.Counter).sum()) normalized = aa_counts / aa_counts.sum() if smoothing: normalized += smoothing normalized /= normalized.sum() return normalized def random_peptides(num, length=9, distribution=None): """ Generate random peptides (kmers). Parameters ---------- num : int Number of peptides to return length : int Length of each peptide distribution : pandas.Series Maps 1-letter amino acid abbreviations to probabilities. If not specified a uniform distribution is used. Returns ---------- list of string """ if num == 0: return [] if distribution is None: distribution = pandas.Series( 1, index=sorted(amino_acid.COMMON_AMINO_ACIDS)) distribution /= distribution.sum() return [ ''.join(peptide_sequence) for peptide_sequence in numpy.random.choice( distribution.index, p=distribution.values, size=(int(num), int(length))) ] def positional_frequency_matrix(peptides): """ Given a set of peptides, calculate a length x amino acids frequency matrix. Parameters ---------- peptides : list of string All of same length Returns ------- pandas.DataFrame Index is position, columns are amino acids """ length = len(peptides[0]) assert all(len(peptide) == length for peptide in peptides) counts = pandas.DataFrame( index=[a for a in amino_acid.BLOSUM62_MATRIX.index if a != 'X'], columns=numpy.arange(1, length + 1), ) for i in range(length): counts[i + 1] = pandas.Series([p[i] for p in peptides]).value_counts() result = (counts / len(peptides)).fillna(0.0).T result.index.name = 'position' return result