Skip to content
Snippets Groups Projects
Commit f73e73b2 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

fix peptide encoding

parent efb02b5d
No related merge requests found
...@@ -35,6 +35,7 @@ time python curate.py \ ...@@ -35,6 +35,7 @@ time python curate.py \
bzip2 curated_training_data.csv bzip2 curated_training_data.csv
cp $SCRIPT_ABSOLUTE_PATH . cp $SCRIPT_ABSOLUTE_PATH .
bzip2 LOG.txt
tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" * tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2" echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2"
...@@ -26,6 +26,7 @@ rm mhc_ligand_full.zip ...@@ -26,6 +26,7 @@ rm mhc_ligand_full.zip
bzip2 mhc_ligand_full.csv bzip2 mhc_ligand_full.csv
cp $SCRIPT_ABSOLUTE_PATH . cp $SCRIPT_ABSOLUTE_PATH .
bzip2 LOG.txt
tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" * tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2" echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2"
...@@ -28,6 +28,7 @@ wget --quiet https://dl.dropboxusercontent.com/u/3967524/bdata.20130222.mhci.pub ...@@ -28,6 +28,7 @@ wget --quiet https://dl.dropboxusercontent.com/u/3967524/bdata.20130222.mhci.pub
wget --quiet https://dl.dropboxusercontent.com/u/3967524/bdata.2013.mhci.public.blind.1.txt wget --quiet https://dl.dropboxusercontent.com/u/3967524/bdata.2013.mhci.public.blind.1.txt
cp $SCRIPT_ABSOLUTE_PATH . cp $SCRIPT_ABSOLUTE_PATH .
bzip2 LOG.txt
tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" * tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2" echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2"
...@@ -34,6 +34,7 @@ time mhcflurry-class1-train-allele-specific-models \ ...@@ -34,6 +34,7 @@ time mhcflurry-class1-train-allele-specific-models \
--min-measurements-per-allele 100 --min-measurements-per-allele 100
cp $SCRIPT_ABSOLUTE_PATH . cp $SCRIPT_ABSOLUTE_PATH .
bzip2 LOG.txt
tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" * tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2" echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2"
...@@ -11,10 +11,47 @@ import pandas ...@@ -11,10 +11,47 @@ import pandas
import mhcnames import mhcnames
from ..encodable_sequences import EncodableSequences from ..encodable_sequences import EncodableSequences
from ..downloads import get_path
from .class1_neural_network import Class1NeuralNetwork from .class1_neural_network import Class1NeuralNetwork
class LazyLoadingClass1NeuralNetwork(object):
@classmethod
def wrap(cls, instance):
if isinstance(instance, cls):
return instance
elif isinstance(instance, Class1NeuralNetwork):
return cls(model=instance)
raise TypeError("Unsupported type: %s" % instance)
@classmethod
def wrap_list(cls, lst):
return [
cls.wrap(instance)
for instance in lst
]
def __init__(self, model=None, config=None, weights_filename=None):
if model is None:
assert config is not None
assert weights_filename is not None
else:
assert config is None
assert weights_filename is None
self.model = model
self.config = config
self.weights_filename = weights_filename
@property
def instance(self):
if self.model is None:
self.model = Class1NeuralNetwork.from_config(self.config)
self.model.restore_weights(self.weights_filename)
return self.model
class Class1AffinityPredictor(object): class Class1AffinityPredictor(object):
def __init__( def __init__(
self, self,
...@@ -31,9 +68,11 @@ class Class1AffinityPredictor(object): ...@@ -31,9 +68,11 @@ class Class1AffinityPredictor(object):
if class1_pan_allele_models: if class1_pan_allele_models:
assert allele_to_pseudosequence, "Pseudosequences required" assert allele_to_pseudosequence, "Pseudosequences required"
self.allele_to_allele_specific_models = ( self.allele_to_allele_specific_models = dict(
allele_to_allele_specific_models) (k, LazyLoadingClass1NeuralNetwork.wrap_list(v))
self.class1_pan_allele_models = class1_pan_allele_models for (k, v) in allele_to_allele_specific_models.items())
self.class1_pan_allele_models = (
LazyLoadingClass1NeuralNetwork.wrap_list(class1_pan_allele_models))
self.allele_to_pseudosequence = allele_to_pseudosequence self.allele_to_pseudosequence = allele_to_pseudosequence
if manifest_df is None: if manifest_df is None:
...@@ -61,7 +100,7 @@ class Class1AffinityPredictor(object): ...@@ -61,7 +100,7 @@ class Class1AffinityPredictor(object):
for (_, row) in sub_manifest_df.iterrows(): for (_, row) in sub_manifest_df.iterrows():
weights_path = self.weights_path(models_dir, row.model_name) weights_path = self.weights_path(models_dir, row.model_name)
row.model.save_weights(weights_path) row.model.instance.save_weights(weights_path)
print("Wrote: %s" % weights_path) print("Wrote: %s" % weights_path)
write_manifest_df = self.manifest_df[[ write_manifest_df = self.manifest_df[[
...@@ -75,7 +114,7 @@ class Class1AffinityPredictor(object): ...@@ -75,7 +114,7 @@ class Class1AffinityPredictor(object):
def model_name(allele, num): def model_name(allele, num):
random_string = hashlib.sha1( random_string = hashlib.sha1(
str(time.time()).encode()).hexdigest()[:16] str(time.time()).encode()).hexdigest()[:16]
return "%s-%d-%s" % (allele, num, random_string) return "%s-%d-%s" % (allele.upper(), num, random_string)
@staticmethod @staticmethod
def weights_path(models_dir, model_name): def weights_path(models_dir, model_name):
...@@ -84,9 +123,11 @@ class Class1AffinityPredictor(object): ...@@ -84,9 +123,11 @@ class Class1AffinityPredictor(object):
"weights_%s.%s" % ( "weights_%s.%s" % (
model_name, Class1NeuralNetwork.weights_filename_extension)) model_name, Class1NeuralNetwork.weights_filename_extension))
@staticmethod @staticmethod
def load(models_dir, max_models=None): def load(models_dir=None, max_models=None):
if models_dir is None:
models_dir = get_path("models_class1", "models")
manifest_path = join(models_dir, "manifest.csv") manifest_path = join(models_dir, "manifest.csv")
manifest_df = pandas.read_csv(manifest_path, nrows=max_models) manifest_df = pandas.read_csv(manifest_path, nrows=max_models)
...@@ -94,13 +135,11 @@ class Class1AffinityPredictor(object): ...@@ -94,13 +135,11 @@ class Class1AffinityPredictor(object):
class1_pan_allele_models = [] class1_pan_allele_models = []
all_models = [] all_models = []
for (_, row) in manifest_df.iterrows(): for (_, row) in manifest_df.iterrows():
model = Class1NeuralNetwork.from_config( model = LazyLoadingClass1NeuralNetwork(
json.loads(row.config_json)) config=json.loads(row.config_json),
weights_path = Class1AffinityPredictor.weights_path( weights_filename=Class1AffinityPredictor.weights_path(
models_dir, row.model_name) models_dir, row.model_name)
print("Loading model weights: %s" % weights_path) )
model.restore_weights(weights_path)
if row.allele == "pan-class1": if row.allele == "pan-class1":
class1_pan_allele_models.append(model) class1_pan_allele_models.append(model)
else: else:
...@@ -157,17 +196,18 @@ class Class1AffinityPredictor(object): ...@@ -157,17 +196,18 @@ class Class1AffinityPredictor(object):
models_list = [] models_list = []
for (i, model) in enumerate(models): for (i, model) in enumerate(models):
lazy_model = LazyLoadingClass1NeuralNetwork.wrap(model)
model_name = self.model_name(allele, i) model_name = self.model_name(allele, i)
models_list.append(model) # models is a generator models_list.append(model) # models is a generator
row = pandas.Series(collections.OrderedDict([ row = pandas.Series(collections.OrderedDict([
("model_name", model_name), ("model_name", model_name),
("allele", allele), ("allele", allele),
("config_json", json.dumps(model.get_config())), ("config_json", json.dumps(model.get_config())),
("model", model), ("model", lazy_model),
])).to_frame().T ])).to_frame().T
self.manifest_df = pandas.concat( self.manifest_df = pandas.concat(
[self.manifest_df, row], ignore_index=True) [self.manifest_df, row], ignore_index=True)
self.allele_to_allele_specific_models[allele].append(model) self.allele_to_allele_specific_models[allele].append(lazy_model)
if models_dir_for_save: if models_dir_for_save:
self.save( self.save(
models_dir_for_save, model_names_to_write=[model_name]) models_dir_for_save, model_names_to_write=[model_name])
...@@ -191,16 +231,18 @@ class Class1AffinityPredictor(object): ...@@ -191,16 +231,18 @@ class Class1AffinityPredictor(object):
architecture_hyperparameters=architecture_hyperparameters, architecture_hyperparameters=architecture_hyperparameters,
peptides=peptides, peptides=peptides,
affinities=affinities, affinities=affinities,
allele_pseudosequences=allele_pseudosequences) allele_pseudosequences=allele_pseudosequences,
verbose=verbose)
for (i, model) in enumerate(models): for (i, model) in enumerate(models):
lazy_model = LazyLoadingClass1NeuralNetwork.wrap(model)
model_name = self.model_name("pan-class1", i) model_name = self.model_name("pan-class1", i)
self.class1_pan_allele_models.append(model) self.class1_pan_allele_models.append(lazy_model)
row = pandas.Series(collections.OrderedDict([ row = pandas.Series(collections.OrderedDict([
("model_name", model_name), ("model_name", model_name),
("allele", "pan-class1"), ("allele", "pan-class1"),
("config_json", json.dumps(model.get_config())), ("config_json", json.dumps(model.get_config())),
("model", model), ("model", lazy_model),
])).to_frame().T ])).to_frame().T
self.manifest_df = pandas.concat( self.manifest_df = pandas.concat(
[self.manifest_df, row], ignore_index=True) [self.manifest_df, row], ignore_index=True)
...@@ -265,7 +307,7 @@ class Class1AffinityPredictor(object): ...@@ -265,7 +307,7 @@ class Class1AffinityPredictor(object):
encodable_peptides = EncodableSequences.create( encodable_peptides = EncodableSequences.create(
df.peptide.values) df.peptide.values)
for (i, model) in enumerate(self.class1_pan_allele_models): for (i, model) in enumerate(self.class1_pan_allele_models):
df["model_pan_%d" % i] = model.predict( df["model_pan_%d" % i] = model.instance.predict(
encodable_peptides, encodable_peptides,
allele_pseudosequences=allele_pseudosequences) allele_pseudosequences=allele_pseudosequences)
...@@ -276,8 +318,9 @@ class Class1AffinityPredictor(object): ...@@ -276,8 +318,9 @@ class Class1AffinityPredictor(object):
df.ix[mask].peptide.values) df.ix[mask].peptide.values)
models = self.allele_to_allele_specific_models.get(allele, []) models = self.allele_to_allele_specific_models.get(allele, [])
for (i, model) in enumerate(models): for (i, model) in enumerate(models):
df.loc[mask, "model_single_%d" % i] = model.predict( df.loc[
allele_peptides) mask, "model_single_%d" % i
] = model.instance.predict(allele_peptides)
# Geometric mean # Geometric mean
df_predictions = df[ df_predictions = df[
......
...@@ -175,7 +175,7 @@ class Class1NeuralNetwork(object): ...@@ -175,7 +175,7 @@ class Class1NeuralNetwork(object):
self.hyperparameters['random_negative_constant']) self.hyperparameters['random_negative_constant'])
num_random_negative = pandas.Series(num_random_negative) num_random_negative = pandas.Series(num_random_negative)
logging.info("Random negative counts per length:\n%s" % ( logging.info("Random negative counts per length:\n%s" % (
str(num_random_negative))) str(num_random_negative.to_dict())))
aa_distribution = None aa_distribution = None
if self.hyperparameters['random_negative_match_distribution']: if self.hyperparameters['random_negative_match_distribution']:
...@@ -185,7 +185,7 @@ class Class1NeuralNetwork(object): ...@@ -185,7 +185,7 @@ class Class1NeuralNetwork(object):
'random_negative_distribution_smoothing']) 'random_negative_distribution_smoothing'])
logging.info( logging.info(
"Using amino acid distribution for random negative:\n%s" % ( "Using amino acid distribution for random negative:\n%s" % (
str(aa_distribution))) str(aa_distribution.to_dict())))
y_values = from_ic50(affinities) y_values = from_ic50(affinities)
assert numpy.isnan(y_values).sum() == 0, numpy.isnan(y_values).sum() assert numpy.isnan(y_values).sum() == 0, numpy.isnan(y_values).sum()
...@@ -224,6 +224,8 @@ class Class1NeuralNetwork(object): ...@@ -224,6 +224,8 @@ class Class1NeuralNetwork(object):
sample_weights_with_random_negatives = numpy.concatenate([ sample_weights_with_random_negatives = numpy.concatenate([
numpy.ones(int(num_random_negative.sum())), numpy.ones(int(num_random_negative.sum())),
sample_weights]) sample_weights])
else:
sample_weights_with_random_negatives = None
val_losses = [] val_losses = []
min_val_loss_iteration = None min_val_loss_iteration = None
...@@ -239,12 +241,10 @@ class Class1NeuralNetwork(object): ...@@ -239,12 +241,10 @@ class Class1NeuralNetwork(object):
count, count,
length=length, length=length,
distribution=aa_distribution)) distribution=aa_distribution))
random_negative_peptides_encodable = (
EncodableSequences.create(
random_negative_peptides_list))
random_negative_peptides_encoding = ( random_negative_peptides_encoding = (
self.peptides_to_network_input( self.peptides_to_network_input(
random_negative_peptides_encodable)) random_negative_peptides_list))
x_dict_with_random_negatives = { x_dict_with_random_negatives = {
"peptide": numpy.concatenate([ "peptide": numpy.concatenate([
random_negative_peptides_encoding, random_negative_peptides_encoding,
...@@ -263,9 +263,8 @@ class Class1NeuralNetwork(object): ...@@ -263,9 +263,8 @@ class Class1NeuralNetwork(object):
shuffle=True, shuffle=True,
verbose=verbose, verbose=verbose,
epochs=1, epochs=1,
validation_split=self.hyperparameters[ validation_split=self.hyperparameters['validation_split'],
'validation_split'], sample_weight=sample_weights_with_random_negatives)
sample_weight=sample_weights)
for (key, value) in fit_history.history.items(): for (key, value) in fit_history.history.items():
self.loss_history[key].extend(value) self.loss_history[key].extend(value)
......
...@@ -65,8 +65,17 @@ def one_hot_encoding(index_encoded, alphabet_size): ...@@ -65,8 +65,17 @@ def one_hot_encoding(index_encoded, alphabet_size):
result = numpy.zeros( result = numpy.zeros(
(num_sequences, sequence_length, alphabet_size), (num_sequences, sequence_length, alphabet_size),
dtype='int32') dtype='int32')
for position in range(sequence_length):
result[:, position, index_encoded[:, position]] = 1 # Transform the index encoded array into an array of indices into the
# flattened result, which we will set to 1.
flattened_indices = (
index_encoded +
(
sequence_length * alphabet_size * numpy.arange(num_sequences)
).reshape((-1, 1)) +
numpy.tile(numpy.arange(sequence_length),
(num_sequences, 1)) * alphabet_size)
result.put(flattened_indices, 1)
return result return result
......
from mhcflurry import encodable_sequences
from nose.tools import eq_
from numpy.testing import assert_equal
letter_to_index_dict = {
'A': 0,
'B': 1,
'C': 2,
}
def test_index_and_one_hot_encoding():
index_encoding = encodable_sequences.index_encoding(
["AAAA", "ABCA"], letter_to_index_dict)
assert_equal(
index_encoding,
[
[0, 0, 0, 0],
[0, 1, 2, 0],
])
one_hot = encodable_sequences.one_hot_encoding(index_encoding, 3)
eq_(one_hot.shape, (2, 4, 3))
assert_equal(
one_hot[0],
[
[1, 0, 0],
[1, 0, 0],
[1, 0, 0],
[1, 0, 0],
])
assert_equal(
one_hot[1],
[
[1, 0, 0],
[0, 1, 0],
[0, 0, 1],
[1, 0, 0],
])
...@@ -12,27 +12,35 @@ ...@@ -12,27 +12,35 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import cProfile import pandas
import mhcflurry
import mhcflurry.class1_affinity_prediction import mhcflurry.class1_affinity_prediction
import mhcflurry.class1_allele_specific_ensemble from mhcflurry.downloads import get_path
from mhcflurry import Class1AffinityPredictor
predictors = [ predictors = [
mhcflurry.class1_affinity_prediction.get_downloaded_predictor(), mhcflurry.class1_affinity_prediction.Class1AffinityPredictor.load(),
mhcflurry.class1_allele_specific_ensemble.get_downloaded_predictor(),
] ]
def predict_and_check(allele, peptide, expected_range=(0, 500)): def predict_and_check(
allele,
peptide,
predictors=predictors,
expected_range=(0, 500)):
for predictor in predictors: for predictor in predictors:
(prediction,) = predictor.predict_for_allele(allele, [peptide]) def debug():
assert prediction >= expected_range[0], (predictor, prediction) print("\n%s" % (
assert prediction <= expected_range[1], (predictor, prediction) predictor.predict_to_dataframe(
peptides=[peptide],
allele=allele,
include_individual_model_predictions=True)))
(prediction,) = predictor.predict(allele=allele, peptides=[peptide])
assert prediction >= expected_range[0], (predictor, prediction, debug())
assert prediction <= expected_range[1], (predictor, prediction, debug())
def test_A1_Titin_epitope(): def test_A1_Titin_epitope_downloaded_models():
# Test the A1 Titin epitope ESDPIVAQY from # Test the A1 Titin epitope ESDPIVAQY from
# Identification of a Titin-Derived HLA-A1-Presented Peptide # Identification of a Titin-Derived HLA-A1-Presented Peptide
# as a Cross-Reactive Target for Engineered MAGE A3-Directed # as a Cross-Reactive Target for Engineered MAGE A3-Directed
...@@ -40,7 +48,7 @@ def test_A1_Titin_epitope(): ...@@ -40,7 +48,7 @@ def test_A1_Titin_epitope():
predict_and_check("HLA-A*01:01", "ESDPIVAQY") predict_and_check("HLA-A*01:01", "ESDPIVAQY")
def test_A1_MAGE_epitope(): def test_A1_MAGE_epitope_downloaded_models():
# Test the A1 MAGE epitope EVDPIGHLY from # Test the A1 MAGE epitope EVDPIGHLY from
# Identification of a Titin-Derived HLA-A1-Presented Peptide # Identification of a Titin-Derived HLA-A1-Presented Peptide
# as a Cross-Reactive Target for Engineered MAGE A3-Directed # as a Cross-Reactive Target for Engineered MAGE A3-Directed
...@@ -48,13 +56,66 @@ def test_A1_MAGE_epitope(): ...@@ -48,13 +56,66 @@ def test_A1_MAGE_epitope():
predict_and_check("HLA-A*01:01", "EVDPIGHLY") predict_and_check("HLA-A*01:01", "EVDPIGHLY")
def test_A2_HIV_epitope(): def test_A1_trained_models():
allele = "HLA-A*01:01"
df = pandas.read_csv(
get_path(
"data_curated", "curated_training_data.csv.bz2"))
df = df.ix[
(df.allele == allele) &
(df.peptide.str.len() >= 8) &
(df.peptide.str.len() <= 15)
]
hyperparameters = {
"max_epochs": 500,
"patience": 10,
"early_stopping": True,
"validation_split": 0.2,
"random_negative_rate": 0.0,
"random_negative_constant": 25,
"use_embedding": False,
"kmer_size": 15,
"batch_normalization": False,
"locally_connected_layers": [
{
"filters": 8,
"activation": "tanh",
"kernel_size": 3
},
{
"filters": 8,
"activation": "tanh",
"kernel_size": 3
}
],
"activation": "relu",
"output_activation": "sigmoid",
"layer_sizes": [
32
],
"random_negative_affinity_min": 20000.0,
"random_negative_affinity_max": 50000.0,
"dense_layer_l1_regularization": 0.001,
"dropout_probability": 0.0
}
predictor = Class1AffinityPredictor()
predictor.fit_allele_specific_predictors(
n_models=2,
architecture_hyperparameters=hyperparameters,
allele=allele,
peptides=df.peptide.values,
affinities=df.measurement_value.values,
)
predict_and_check("HLA-A*01:01", "EVDPIGHLY", predictors=[predictor])
def test_A2_HIV_epitope_downloaded_models():
# Test the A2 HIV epitope SLYNTVATL from # Test the A2 HIV epitope SLYNTVATL from
# The HIV-1 HLA-A2-SLYNTVATL Is a Help-Independent CTL Epitope # The HIV-1 HLA-A2-SLYNTVATL Is a Help-Independent CTL Epitope
predict_and_check("HLA-A*02:01", "SLYNTVATL") predict_and_check("HLA-A*02:01", "SLYNTVATL")
if __name__ == "__main__":
test_A1_Titin_epitope()
test_A1_MAGE_epitope()
test_A2_HIV_epitope()
# Copyright (c) 2016. Mount Sinai School of Medicine
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from mhcflurry.peptide_encoding import (
all_kmers,
extend_peptide,
shorten_peptide,
fixed_length_from_many_peptides,
)
from nose.tools import eq_
def test_all_kmers():
kmers = all_kmers(2, alphabet=["A", "B"])
assert len(kmers) == 4, kmers
eq_(set(kmers), {"AA", "AB", "BA", "BB"})
def test_all_kmers_string_alphabet():
kmers = all_kmers(2, alphabet="AB")
assert len(kmers) == 4, kmers
eq_(set(kmers), {"AA", "AB", "BA", "BB"})
def test_extend_peptide_all_positions():
# insert 0 or 1 at every position
results = extend_peptide(
"111",
desired_length=4,
start_offset=0,
end_offset=0,
insert_amino_acid_letters="01")
expected = [
"0111",
"1111",
"1011",
"1111",
"1101",
"1111",
"1110",
"1111",
]
eq_(results, expected)
def test_shorten_peptide_all_positions():
# insert 0 or 1 at every position
results = shorten_peptide(
"012",
desired_length=2,
start_offset=0,
end_offset=0,
insert_amino_acid_letters="012")
expected = [
"12",
"02",
"01"
]
eq_(results, expected)
def test_shorten_peptide_all_positions_except_first():
# insert 0 or 1 at every position
results = shorten_peptide(
"012",
desired_length=2,
start_offset=1,
end_offset=0,
insert_amino_acid_letters="012")
expected = [
"02",
"01",
]
eq_(results, expected)
def test_shorten_peptide_all_positions_except_last():
# insert 0 or 1 at every position
results = shorten_peptide(
"012",
desired_length=2,
start_offset=0,
end_offset=1,
insert_amino_acid_letters="012")
expected = [
"12",
"02",
]
eq_(results, expected)
def test_fixed_length_from_many_peptides():
kmers, original_indices, counts = fixed_length_from_many_peptides(
peptides=["ABC", "A"],
desired_length=2,
start_offset_extend=0,
end_offset_extend=0,
start_offset_shorten=0,
end_offset_shorten=0,
insert_amino_acid_letters="ABC")
print(kmers)
print(original_indices)
print(counts)
eq_(len(kmers), len(original_indices))
eq_(len(kmers), len(counts))
eq_(kmers, ["BC", "AC", "AB", "AA", "BA", "CA", "AA", "AB", "AC"])
eq_(original_indices, [0] * 3 + [1] * 6)
eq_(counts, [3, 3, 3, 6, 6, 6, 6, 6, 6])
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment