Skip to content
Snippets Groups Projects
Unverified Commit bbcfc2ba authored by Tim O'Donnell's avatar Tim O'Donnell Committed by GitHub
Browse files

Merge pull request #114 from hammerlab/2017-11-13

Implement BLOSUM62 encoding
parents 1c3da71b ed9372c2
No related branches found
No related tags found
No related merge requests found
...@@ -22,7 +22,7 @@ If you find MHCflurry useful in your research please cite: ...@@ -22,7 +22,7 @@ If you find MHCflurry useful in your research please cite:
> O'Donnell, T. et al., 2017. MHCflurry: open-source class I MHC binding affinity prediction. bioRxiv. Available at: http://www.biorxiv.org/content/early/2017/08/09/174243. > O'Donnell, T. et al., 2017. MHCflurry: open-source class I MHC binding affinity prediction. bioRxiv. Available at: http://www.biorxiv.org/content/early/2017/08/09/174243.
## Setup ## Setup (pip)
Install the package: Install the package:
...@@ -56,6 +56,32 @@ export KERAS_BACKEND=theano ...@@ -56,6 +56,32 @@ export KERAS_BACKEND=theano
You may also needs to `pip install theano`. You may also needs to `pip install theano`.
## Setup (conda)
You can alternatively get up and running with a [conda](https://conda.io/docs/)
environment as follows:
```
conda create -q -n mhcflurry-env python=3.6 'tensorflow>=1.1.0'
source activate mhcflurry-env
```
Then continue as above:
```
pip install mhcflurry
mhcflurry-downloads fetch
```
If you wish to test your installation, you can install `nose` and run the tests
from a checkout:
```
pip install nose
nosetests .
```
## Making predictions from the command-line ## Making predictions from the command-line
...@@ -103,7 +129,7 @@ notebook for an overview of the Python API, including fitting your own predictor ...@@ -103,7 +129,7 @@ notebook for an overview of the Python API, including fitting your own predictor
An ensemble of eight single-allele models was trained for each allele with at least An ensemble of eight single-allele models was trained for each allele with at least
100 measurements in the training set (118 alleles). The models were trained on a 100 measurements in the training set (118 alleles). The models were trained on a
random 80% sample of the data for the allele and the remaining 20% was used for random 80% sample of the data for the allele and the remaining 20% was used for
early stopping. All models use the same [architecture](downloads-generation/models_class1/hyperparameters.json). The early stopping. All models use the same [architecture](downloads-generation/models_class1/hyperparameters.yaml). The
predictions are taken to be the geometric mean of the nM binding affinity predictions are taken to be the geometric mean of the nM binding affinity
predictions of the individual models. The training script is [here](downloads-generation/models_class1/GENERATE.sh). predictions of the individual models. The training script is [here](downloads-generation/models_class1/GENERATE.sh).
......
...@@ -25,11 +25,11 @@ cd $SCRATCH_DIR/$DOWNLOAD_NAME ...@@ -25,11 +25,11 @@ cd $SCRATCH_DIR/$DOWNLOAD_NAME
mkdir models mkdir models
cp $SCRIPT_DIR/hyperparameters.json . cp $SCRIPT_DIR/hyperparameters.yaml .
time mhcflurry-class1-train-allele-specific-models \ time mhcflurry-class1-train-allele-specific-models \
--data "$(mhcflurry-downloads path data_curated)/curated_training_data.csv.bz2" \ --data "$(mhcflurry-downloads path data_curated)/curated_training_data.csv.bz2" \
--hyperparameters hyperparameters.json \ --hyperparameters hyperparameters.yaml \
--out-models-dir models \ --out-models-dir models \
--min-measurements-per-allele 200 --min-measurements-per-allele 200
......
[
{
"n_models": 12,
"max_epochs": 500,
"patience": 10,
"early_stopping": true,
"validation_split": 0.2,
"random_negative_rate": 0.0,
"random_negative_constant": 25,
"use_embedding": false,
"kmer_size": 15,
"batch_normalization": false,
"locally_connected_layers": [
{
"filters": 8,
"activation": "tanh",
"kernel_size": 3
},
{
"filters": 8,
"activation": "tanh",
"kernel_size": 3
}
],
"activation": "relu",
"output_activation": "sigmoid",
"layer_sizes": [
32
],
"random_negative_affinity_min": 20000.0,
"random_negative_affinity_max": 50000.0,
"dense_layer_l1_regularization": 0.001,
"dropout_probability": 0.0
}
]
[{
##########################################
# ENSEMBLE SIZE
##########################################
"n_models": 12,
##########################################
# OPTIMIZATION
##########################################
"max_epochs": 500,
"patience": 10,
"early_stopping": true,
"validation_split": 0.2,
##########################################
# RANDOM NEGATIVE PEPTIDES
##########################################
"random_negative_rate": 0.0,
"random_negative_constant": 25,
"random_negative_affinity_min": 20000.0,
"random_negative_affinity_max": 50000.0,
##########################################
# PEPTIDE REPRESENTATION
##########################################
# One of "one-hot", "embedding", or "BLOSUM62".
"peptide_amino_acid_encoding": "BLOSUM62",
"use_embedding": false, # maintained for backward compatability
"kmer_size": 15,
##########################################
# NEURAL NETWORK ARCHITECTURE
##########################################
"locally_connected_layers": [
{
"filters": 8,
"activation": "tanh",
"kernel_size": 3
},
{
"filters": 8,
"activation": "tanh",
"kernel_size": 3
}
],
"activation": "relu",
"output_activation": "sigmoid",
"layer_sizes": [
32
],
"dense_layer_l1_regularization": 0.001,
"batch_normalization": false,
"dropout_probability": 0.0,
}]
...@@ -17,7 +17,7 @@ from .class1_affinity_prediction.class1_neural_network import ( ...@@ -17,7 +17,7 @@ from .class1_affinity_prediction.class1_neural_network import (
from .class1_affinity_prediction.class1_affinity_predictor import ( from .class1_affinity_prediction.class1_affinity_predictor import (
Class1AffinityPredictor) Class1AffinityPredictor)
__version__ = "0.9.2" __version__ = "0.9.3"
__all__ = [ __all__ = [
"Class1NeuralNetwork", "Class1NeuralNetwork",
......
...@@ -9,14 +9,17 @@ import keras.models ...@@ -9,14 +9,17 @@ import keras.models
import keras.layers.pooling import keras.layers.pooling
import keras.regularizers import keras.regularizers
from keras.layers import Input from keras.layers import Input
import keras.layers.merge import keras.layers
from keras.layers.core import Dense, Flatten, Dropout from keras.layers.core import Dense, Flatten, Dropout
from keras.layers.embeddings import Embedding from keras.layers.embeddings import Embedding
from keras.layers.normalization import BatchNormalization from keras.layers.normalization import BatchNormalization
from mhcflurry.hyperparameters import HyperparameterDefaults from mhcflurry.hyperparameters import HyperparameterDefaults
from ..encodable_sequences import EncodableSequences from ..encodable_sequences import (
EncodableSequences,
available_vector_encodings,
vector_encoding_length)
from ..regression_target import to_ic50, from_ic50 from ..regression_target import to_ic50, from_ic50
from ..common import random_peptides, amino_acid_distribution from ..common import random_peptides, amino_acid_distribution
...@@ -35,6 +38,7 @@ class Class1NeuralNetwork(object): ...@@ -35,6 +38,7 @@ class Class1NeuralNetwork(object):
network_hyperparameter_defaults = HyperparameterDefaults( network_hyperparameter_defaults = HyperparameterDefaults(
kmer_size=15, kmer_size=15,
use_embedding=False, use_embedding=False,
peptide_amino_acid_encoding="one-hot",
embedding_input_dim=21, embedding_input_dim=21,
embedding_output_dim=8, embedding_output_dim=8,
pseudosequence_use_embedding=False, pseudosequence_use_embedding=False,
...@@ -256,20 +260,26 @@ class Class1NeuralNetwork(object): ...@@ -256,20 +260,26 @@ class Class1NeuralNetwork(object):
numpy.array numpy.array
""" """
encoder = EncodableSequences.create(peptides) encoder = EncodableSequences.create(peptides)
if self.hyperparameters['use_embedding']: if (self.hyperparameters['use_embedding'] or
self.hyperparameters['peptide_amino_acid_encoding'] == "embedding"):
encoded = encoder.variable_length_to_fixed_length_categorical( encoded = encoder.variable_length_to_fixed_length_categorical(
max_length=self.hyperparameters['kmer_size'], max_length=self.hyperparameters['kmer_size'],
**self.input_encoding_hyperparameter_defaults.subselect( **self.input_encoding_hyperparameter_defaults.subselect(
self.hyperparameters)) self.hyperparameters))
else: elif (
encoded = encoder.variable_length_to_fixed_length_one_hot( self.hyperparameters['peptide_amino_acid_encoding'] in
available_vector_encodings()):
encoded = encoder.variable_length_to_fixed_length_vector_encoding(
self.hyperparameters['peptide_amino_acid_encoding'],
max_length=self.hyperparameters['kmer_size'], max_length=self.hyperparameters['kmer_size'],
**self.input_encoding_hyperparameter_defaults.subselect( **self.input_encoding_hyperparameter_defaults.subselect(
self.hyperparameters)) self.hyperparameters))
else:
raise ValueError("Unsupported peptide_amino_acid_encoding: %s" %
self.hyperparameters['peptide_amino_acid_encoding'])
assert len(encoded) == len(peptides) assert len(encoded) == len(peptides)
return encoded return encoded
@property @property
def supported_peptide_lengths(self): def supported_peptide_lengths(self):
""" """
...@@ -302,7 +312,8 @@ class Class1NeuralNetwork(object): ...@@ -302,7 +312,8 @@ class Class1NeuralNetwork(object):
if self.hyperparameters['pseudosequence_use_embedding']: if self.hyperparameters['pseudosequence_use_embedding']:
encoded = encoder.fixed_length_categorical() encoded = encoder.fixed_length_categorical()
else: else:
encoded = encoder.fixed_length_one_hot() raise NotImplementedError
# encoded = encoder.fixed_length_one_hot()
assert len(encoded) == len(pseudosequences) assert len(encoded) == len(pseudosequences)
return encoded return encoded
...@@ -492,7 +503,8 @@ class Class1NeuralNetwork(object): ...@@ -492,7 +503,8 @@ class Class1NeuralNetwork(object):
pseudosequences_input = self.pseudosequence_to_network_input( pseudosequences_input = self.pseudosequence_to_network_input(
allele_pseudosequences) allele_pseudosequences)
x_dict['pseudosequence'] = pseudosequences_input x_dict['pseudosequence'] = pseudosequences_input
(predictions,) = numpy.array(self.network(borrow=True).predict(x_dict)).T (predictions,) = numpy.array(
self.network(borrow=True).predict(x_dict), dtype="float64").T
return to_ic50(predictions) return to_ic50(predictions)
def compile(self): def compile(self):
...@@ -507,6 +519,7 @@ class Class1NeuralNetwork(object): ...@@ -507,6 +519,7 @@ class Class1NeuralNetwork(object):
def make_network( def make_network(
pseudosequence_length, pseudosequence_length,
kmer_size, kmer_size,
peptide_amino_acid_encoding,
use_embedding, use_embedding,
embedding_input_dim, embedding_input_dim,
embedding_output_dim, embedding_output_dim,
...@@ -524,7 +537,7 @@ class Class1NeuralNetwork(object): ...@@ -524,7 +537,7 @@ class Class1NeuralNetwork(object):
""" """
Helper function to make a keras network for class1 affinity prediction. Helper function to make a keras network for class1 affinity prediction.
""" """
if use_embedding: if use_embedding or peptide_amino_acid_encoding == "embedding":
peptide_input = Input( peptide_input = Input(
shape=(kmer_size,), dtype='int32', name='peptide') shape=(kmer_size,), dtype='int32', name='peptide')
current_layer = Embedding( current_layer = Embedding(
...@@ -535,7 +548,11 @@ class Class1NeuralNetwork(object): ...@@ -535,7 +548,11 @@ class Class1NeuralNetwork(object):
name="peptide_embedding")(peptide_input) name="peptide_embedding")(peptide_input)
else: else:
peptide_input = Input( peptide_input = Input(
shape=(kmer_size, 21), dtype='float32', name='peptide') shape=(
kmer_size,
vector_encoding_length(peptide_amino_acid_encoding)),
dtype='float32',
name='peptide')
current_layer = peptide_input current_layer = peptide_input
inputs = [peptide_input] inputs = [peptide_input]
...@@ -609,4 +626,7 @@ class Class1NeuralNetwork(object): ...@@ -609,4 +626,7 @@ class Class1NeuralNetwork(object):
inputs=inputs, inputs=inputs,
outputs=[output], outputs=[output],
name="predictor") name="predictor")
print("*** ARCHITECTURE ***")
model.summary()
return model return model
...@@ -5,7 +5,7 @@ Train Class1 single allele models. ...@@ -5,7 +5,7 @@ Train Class1 single allele models.
import os import os
import sys import sys
import argparse import argparse
import json import yaml
import pandas import pandas
...@@ -31,7 +31,7 @@ parser.add_argument( ...@@ -31,7 +31,7 @@ parser.add_argument(
"--hyperparameters", "--hyperparameters",
metavar="FILE.json", metavar="FILE.json",
required=True, required=True,
help="JSON of hyperparameters") help="JSON or YAML of hyperparameters")
parser.add_argument( parser.add_argument(
"--allele", "--allele",
default=None, default=None,
...@@ -61,7 +61,7 @@ def run(argv=sys.argv[1:]): ...@@ -61,7 +61,7 @@ def run(argv=sys.argv[1:]):
configure_logging(verbose=args.verbosity > 1) configure_logging(verbose=args.verbosity > 1)
hyperparameters_lst = json.load(open(args.hyperparameters)) hyperparameters_lst = yaml.load(open(args.hyperparameters))
assert isinstance(hyperparameters_lst, list) assert isinstance(hyperparameters_lst, list)
print("Loaded hyperparameters list: %s" % str(hyperparameters_lst)) print("Loaded hyperparameters list: %s" % str(hyperparameters_lst))
......
...@@ -22,11 +22,75 @@ import math ...@@ -22,11 +22,75 @@ import math
import pandas import pandas
import numpy import numpy
from six import StringIO
import typechecks import typechecks
from . import amino_acid from . import amino_acid
AMINO_ACIDS = list(amino_acid.COMMON_AMINO_ACIDS_WITH_UNKNOWN.keys())
BLOSUM62_MATRIX = pandas.read_table(StringIO("""
A R N D C Q E G H I L K M F P S T W Y V X
A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 0
R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 0
N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 0
D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 0
C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 0
Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0
E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 0
G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 0
H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0
I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 0
L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 0
K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0
M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 0
F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 0
P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 0
S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0
T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 0
W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 0
Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 0
V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 0
X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
"""), sep='\s+').loc[AMINO_ACIDS, AMINO_ACIDS]
assert (BLOSUM62_MATRIX == BLOSUM62_MATRIX.T).all().all()
ENCODING_DFS = {
"BLOSUM62": BLOSUM62_MATRIX,
"one-hot": pandas.DataFrame([
[1 if i == j else 0 for i in range(len(AMINO_ACIDS))]
for j in range(len(AMINO_ACIDS))
], index=AMINO_ACIDS, columns=AMINO_ACIDS)
}
def available_vector_encodings():
"""
Return list of supported amino acid vector encodings.
Returns
-------
list of string
"""
return list(ENCODING_DFS)
def vector_encoding_length(name):
"""
Return the length of the given vector encoding.
Parameters
----------
name : string
Returns
-------
int
"""
return ENCODING_DFS[name].shape[1]
def index_encoding(sequences, letter_to_index_dict): def index_encoding(sequences, letter_to_index_dict):
""" """
...@@ -47,38 +111,23 @@ def index_encoding(sequences, letter_to_index_dict): ...@@ -47,38 +111,23 @@ def index_encoding(sequences, letter_to_index_dict):
return result.values return result.values
def one_hot_encoding(index_encoded, alphabet_size): def fixed_vectors_encoding(sequences, letter_to_vector_function):
""" """
Given an n * k array of integers in the range [0, alphabet_size), return Given a sequence of n strings all of length k, return a n * k * m array where
an n * k * alphabet_size array where element (i, k, j) is 1 if element the (i, j)th element is letter_to_vector_function(sequence[i][j]).
(i, k) == j in the input array and zero otherwise.
Parameters Parameters
---------- ----------
index_encoded : numpy.array of integers with shape (n, k) sequences : list of length n of strings of length k
alphabet_size : int letter_to_vector_function : function : string -> vector of length m
Returns Returns
------- -------
numpy.array of integers of shape (n, k, alphabet_size) numpy.array of integers with shape (n, k, m)
""" """
alphabet_size = int(alphabet_size) arr = numpy.array([list(s) for s in sequences])
(num_sequences, sequence_length) = index_encoded.shape result = numpy.vectorize(
result = numpy.zeros( letter_to_vector_function, signature='()->(n)')(arr)
(num_sequences, sequence_length, alphabet_size),
dtype='int32')
# Transform the index encoded array into an array of indices into the
# flattened result, which we will set to 1.
flattened_indices = (
index_encoded +
(
sequence_length * alphabet_size * numpy.arange(num_sequences)
).reshape((-1, 1)) +
numpy.tile(numpy.arange(sequence_length),
(num_sequences, 1)) * alphabet_size)
result.put(flattened_indices, 1)
return result return result
...@@ -114,40 +163,6 @@ class EncodableSequences(object): ...@@ -114,40 +163,6 @@ class EncodableSequences(object):
def __len__(self): def __len__(self):
return len(self.sequences) return len(self.sequences)
def fixed_length_categorical(self):
"""
Returns a categorical encoding (i.e. integers 0 <= x < 21) of the
sequences, which must already be all the same length.
Returns
-------
numpy.array of integers
"""
cache_key = ("categorical",)
if cache_key not in self.encoding_cache:
assert self.fixed_sequence_length
self.encoding_cache[cache_key] = index_encoding(
self.sequences, amino_acid.AMINO_ACID_INDEX)
return self.encoding_cache[cache_key]
def fixed_length_one_hot(self):
"""
Returns a binary one-hot encoding of the sequences, which must already
be all the same length.
Returns
-------
numpy.array of integers
"""
cache_key = ("one_hot",)
if cache_key not in self.encoding_cache:
assert self.fixed_sequence_length
encoded = self.categorical_encoding()
result = one_hot_encoding(
encoded, alphabet_size=len(amino_acid.AMINO_ACID_INDEX))
self.encoding_cache[cache_key] = result
return self.encoding_cache[cache_key]
def variable_length_to_fixed_length_categorical( def variable_length_to_fixed_length_categorical(
self, left_edge=4, right_edge=4, max_length=15): self, left_edge=4, right_edge=4, max_length=15):
""" """
...@@ -187,8 +202,8 @@ class EncodableSequences(object): ...@@ -187,8 +202,8 @@ class EncodableSequences(object):
fixed_length_sequences, amino_acid.AMINO_ACID_INDEX) fixed_length_sequences, amino_acid.AMINO_ACID_INDEX)
return self.encoding_cache[cache_key] return self.encoding_cache[cache_key]
def variable_length_to_fixed_length_one_hot( def variable_length_to_fixed_length_vector_encoding(
self, left_edge=4, right_edge=4, max_length=15): self, vector_encoding_name, left_edge=4, right_edge=4, max_length=15):
""" """
Encode variable-length sequences using a fixed-length encoding designed Encode variable-length sequences using a fixed-length encoding designed
for preserving the anchor positions of class I peptides. for preserving the anchor positions of class I peptides.
...@@ -198,35 +213,43 @@ class EncodableSequences(object): ...@@ -198,35 +213,43 @@ class EncodableSequences(object):
Parameters Parameters
---------- ----------
vector_encoding_name : string
How to represent amino acids.
One of "BLOSUM62", "one-hot", etc. Full list of supported vector
encodings is given by available_vector_encodings().
left_edge : int, size of fixed-position left side left_edge : int, size of fixed-position left side
right_edge : int, size of the fixed-position right side right_edge : int, size of the fixed-position right side
max_length : sequence length of the resulting encoding max_length : sequence length of the resulting encoding
Returns Returns
------- -------
binary numpy.array with shape (num sequences, max_length, 21) numpy.array with shape (num sequences, max_length, m) where m is
""" vector_encoding_length(vector_encoding_name)
"""
cache_key = ( cache_key = (
"fixed_length_one_hot", "fixed_length_vector_encoding",
vector_encoding_name,
left_edge, left_edge,
right_edge, right_edge,
max_length) max_length)
if cache_key not in self.encoding_cache: if cache_key not in self.encoding_cache:
encoded = self.variable_length_to_fixed_length_categorical( fixed_length_sequences = [
left_edge=left_edge, self.sequence_to_fixed_length_string(
right_edge=right_edge, sequence,
max_length=max_length) left_edge=left_edge,
result = one_hot_encoding( right_edge=right_edge,
encoded, alphabet_size=len(amino_acid.AMINO_ACID_INDEX)) max_length=max_length)
assert result.shape == ( for sequence in self.sequences
len(self.sequences), ]
encoded.shape[1], result = fixed_vectors_encoding(
len(amino_acid.AMINO_ACID_INDEX)) fixed_length_sequences,
ENCODING_DFS[vector_encoding_name].loc.__getitem__)
assert result.shape[0] == len(self.sequences)
self.encoding_cache[cache_key] = result self.encoding_cache[cache_key] = result
return self.encoding_cache[cache_key] return self.encoding_cache[cache_key]
@classmethod @classmethod
def sequence_to_fixed_length_string( def sequence_to_fixed_length_string(
klass, sequence, left_edge=4, right_edge=4, max_length=15): klass, sequence, left_edge=4, right_edge=4, max_length=15):
......
...@@ -30,14 +30,14 @@ try: ...@@ -30,14 +30,14 @@ try:
with open(readme_filename, 'r') as f: with open(readme_filename, 'r') as f:
readme = f.read() readme = f.read()
except: except:
logging.warn("Failed to load %s" % readme_filename) logging.warning("Failed to load %s" % readme_filename)
readme = "" readme = ""
try: try:
import pypandoc import pypandoc
readme = pypandoc.convert(readme, to='rst', format='md') readme = pypandoc.convert(readme, to='rst', format='md')
except: except:
logging.warn("Conversion of long_description from MD to RST failed") logging.warning("Conversion of long_description from MD to RST failed")
pass pass
with open('mhcflurry/__init__.py', 'r') as f: with open('mhcflurry/__init__.py', 'r') as f:
...@@ -51,12 +51,13 @@ if __name__ == '__main__': ...@@ -51,12 +51,13 @@ if __name__ == '__main__':
'six', 'six',
'numpy>=1.11', 'numpy>=1.11',
'pandas>=0.13.1', 'pandas>=0.13.1',
'Keras==2.0.8', 'Keras==2.0.9',
'appdirs', 'appdirs',
'tensorflow', 'tensorflow>=1.1.0',
'scikit-learn', 'scikit-learn',
'typechecks', 'typechecks',
'mhcnames', 'mhcnames',
'pyyaml',
] ]
if PY2: if PY2:
# concurrent.futures is a standard library in Py3 but Py2 # concurrent.futures is a standard library in Py3 but Py2
......
from mhcflurry import encodable_sequences from mhcflurry import encodable_sequences
from nose.tools import eq_ from nose.tools import eq_
from numpy.testing import assert_equal from numpy.testing import assert_equal
import numpy
letter_to_index_dict = { letter_to_index_dict = {
'A': 0, 'A': 0,
...@@ -18,7 +19,13 @@ def test_index_and_one_hot_encoding(): ...@@ -18,7 +19,13 @@ def test_index_and_one_hot_encoding():
[0, 0, 0, 0], [0, 0, 0, 0],
[0, 1, 2, 0], [0, 1, 2, 0],
]) ])
one_hot = encodable_sequences.one_hot_encoding(index_encoding, 3) one_hot = encodable_sequences.fixed_vectors_encoding(
index_encoding,
{
0: numpy.array([1, 0, 0]),
1: numpy.array([0, 1, 0]),
2: numpy.array([0, 0, 1]),
}.get)
eq_(one_hot.shape, (2, 4, 3)) eq_(one_hot.shape, (2, 4, 3))
assert_equal( assert_equal(
one_hot[0], one_hot[0],
......
...@@ -53,7 +53,7 @@ def test_run(): ...@@ -53,7 +53,7 @@ def test_run():
try: try:
models_dir = tempfile.mkdtemp(prefix="mhcflurry-test-models") models_dir = tempfile.mkdtemp(prefix="mhcflurry-test-models")
hyperparameters_filename = os.path.join( hyperparameters_filename = os.path.join(
models_dir, "hyperparameters.json") models_dir, "hyperparameters.yaml")
with open(hyperparameters_filename, "w") as fd: with open(hyperparameters_filename, "w") as fd:
json.dump(HYPERPARAMETERS, fd) json.dump(HYPERPARAMETERS, fd)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment