Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
mhc_rank
Manage
Activity
Members
Labels
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Patrick Skillman-Lawrence
mhc_rank
Commits
057e272a
Commit
057e272a
authored
7 years ago
by
Tim O'Donnell
Browse files
Options
Downloads
Patches
Plain Diff
docs update
parent
94f4c437
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
+291
-19
291 additions, 19 deletions
...y/class1_affinity_prediction/class1_affinity_predictor.py
test/test_known_class1_epitopes.py
+27
-5
27 additions, 5 deletions
test/test_known_class1_epitopes.py
with
318 additions
and
24 deletions
mhcflurry/class1_affinity_prediction/class1_affinity_predictor.py
+
291
−
19
View file @
057e272a
...
...
@@ -17,12 +17,38 @@ from .class1_neural_network import Class1NeuralNetwork
class
Class1AffinityPredictor
(
object
):
"""
High-level interface for peptide/MHC I binding affinity prediction.
This is the class most users will want to use.
This class delegates to one or more `Class1NeuralNetwork` instances.
It supports prediction across multiple alleles using ensembles of single-
or pan-allele predictors.
"""
def
__init__
(
self
,
allele_to_allele_specific_models
=
None
,
class1_pan_allele_models
=
None
,
allele_to_pseudosequence
=
None
,
manifest_df
=
None
):
"""
Parameters
----------
allele_to_allele_specific_models : dict of string -> list of Class1NeuralNetwork
Ensemble of single-allele models to use for each allele.
class1_pan_allele_models : list of Class1NeuralNetwork
Ensemble of pan-allele models.
allele_to_pseudosequence : dict of string -> string
Required only if class1_pan_allele_models is specified.
manifest_df : pandas.DataFrame, optional
Must have columns: model_name, allele, config_json, model.
Only required if you want to update an existing serialization of a
Class1AffinityPredictor.
"""
if
allele_to_allele_specific_models
is
None
:
allele_to_allele_specific_models
=
{}
...
...
@@ -40,14 +66,40 @@ class Class1AffinityPredictor(object):
self
.
allele_to_pseudosequence
=
allele_to_pseudosequence
if
manifest_df
is
None
:
manifest_df
=
pandas
.
DataFrame
()
manifest_df
[
"
model_name
"
]
=
[]
manifest_df
[
"
allele
"
]
=
[]
manifest_df
[
"
config_json
"
]
=
[]
manifest_df
[
"
model
"
]
=
[]
rows
=
[]
for
(
i
,
model
)
in
enumerate
(
self
.
class1_pan_allele_models
):
rows
.
append
((
self
.
model_name
(
"
pan-class1
"
,
i
),
"
pan-class1
"
,
json
.
dumps
(
model
.
instance
.
get_config
()),
model
))
for
(
allele
,
models
)
in
self
.
allele_to_allele_specific_models
.
items
():
for
(
i
,
model
)
in
enumerate
(
models
):
rows
.
append
((
self
.
model_name
(
allele
,
i
),
allele
,
json
.
dumps
(
model
.
instance
.
get_config
()),
model
))
manifest_df
=
pandas
.
DataFrame
(
rows
,
columns
=
[
"
model_name
"
,
"
allele
"
,
"
config_json
"
,
"
model
"
])
self
.
manifest_df
=
manifest_df
def
save
(
self
,
models_dir
,
model_names_to_write
=
None
):
"""
Serialize the predictor to a directory on disk.
Parameters
----------
models_dir : string
Path to directory
model_names_to_write : list of string, optional
Only write the weights for the specified models. Useful for
incremental updates during training.
"""
num_models
=
len
(
self
.
class1_pan_allele_models
)
+
sum
(
len
(
v
)
for
v
in
self
.
allele_to_allele_specific_models
.
values
())
assert
len
(
self
.
manifest_df
)
==
num_models
,
(
...
...
@@ -74,21 +126,23 @@ class Class1AffinityPredictor(object):
write_manifest_df
.
to_csv
(
manifest_path
,
index
=
False
)
print
(
"
Wrote: %s
"
%
manifest_path
)
@staticmethod
def
model_name
(
allele
,
num
):
random_string
=
hashlib
.
sha1
(
str
(
time
.
time
()).
encode
()).
hexdigest
()[:
16
]
return
"
%s-%d-%s
"
%
(
allele
.
upper
(),
num
,
random_string
)
@staticmethod
def
weights_path
(
models_dir
,
model_name
):
return
join
(
models_dir
,
"
weights_%s.%s
"
%
(
model_name
,
Class1NeuralNetwork
.
weights_filename_extension
))
@staticmethod
def
load
(
models_dir
=
None
,
max_models
=
None
):
"""
Deserialize a predictor from a directory on disk.
Parameters
----------
models_dir : string
Path to directory
max_models : int, optional
Maximum number of Class1NeuralNetwork instances to load
Returns
-------
Class1AffinityPredictor
"""
if
models_dir
is
None
:
models_dir
=
get_path
(
"
models_class1
"
,
"
models
"
)
...
...
@@ -136,6 +190,44 @@ class Class1AffinityPredictor(object):
manifest_df
=
manifest_df
)
return
result
@staticmethod
def
model_name
(
allele
,
num
):
"""
Generate a model name
Parameters
----------
allele : string
num : int
Returns
-------
string
"""
random_string
=
hashlib
.
sha1
(
str
(
time
.
time
()).
encode
()).
hexdigest
()[:
16
]
return
"
%s-%d-%s
"
%
(
allele
.
upper
(),
num
,
random_string
)
@staticmethod
def
weights_path
(
models_dir
,
model_name
):
"""
Generate the path to the weights file for a model
Parameters
----------
models_dir : string
model_name : string
Returns
-------
string
"""
return
join
(
models_dir
,
"
weights_%s.%s
"
%
(
model_name
,
Class1NeuralNetwork
.
weights_filename_extension
))
def
fit_allele_specific_predictors
(
self
,
n_models
,
...
...
@@ -145,6 +237,38 @@ class Class1AffinityPredictor(object):
affinities
,
models_dir_for_save
=
None
,
verbose
=
1
):
"""
Fit one or more allele specific predictors for a single allele using a
single neural network architecture.
The new predictors are saved in the Class1AffinityPredictor instance
and will be used on subsequent calls to `predict`.
Parameters
----------
n_models : int
Number of neural networks to fit
architecture_hyperparameters : dict
allele : string
peptides : EncodableSequences or list of string
affinities : list of float
nM affinities
models_dir_for_save : string, optional
If specified, the Class1AffinityPredictor is (incrementally) written
to the given models dir after each neural network is fit.
verbose : int
Keras verbosity
Returns
-------
list of Class1NeuralNetwork
"""
allele
=
mhcnames
.
normalize_allele_name
(
allele
)
models
=
self
.
_fit_predictors
(
...
...
@@ -186,6 +310,39 @@ class Class1AffinityPredictor(object):
affinities
,
models_dir_for_save
=
None
,
verbose
=
1
):
"""
Fit one or more pan-allele predictors using a single neural network
architecture.
The new predictors are saved in the Class1AffinityPredictor instance
and will be used on subsequent calls to `predict`.
Parameters
----------
n_models : int
Number of neural networks to fit
architecture_hyperparameters : dict
alleles : list of string
Allele names (not pseudosequences) corresponding to each peptide
peptides : EncodableSequences or list of string
affinities : list of float
nM affinities
models_dir_for_save : string, optional
If specified, the Class1AffinityPredictor is (incrementally) written
to the given models dir after each neural network is fit.
verbose : int
Keras verbosity
Returns
-------
list of Class1NeuralNetwork
"""
alleles
=
pandas
.
Series
(
alleles
).
map
(
mhcnames
.
normalize_allele_name
)
allele_pseudosequences
=
alleles
.
map
(
self
.
allele_to_pseudosequence
)
...
...
@@ -223,7 +380,22 @@ class Class1AffinityPredictor(object):
affinities
,
allele_pseudosequences
,
verbose
=
1
):
"""
Private helper method
Parameters
----------
n_models : int
architecture_hyperparameters : dict
peptides : EncodableSequences or list of string
affinities : list of float
allele_pseudosequences : EncodableSequences or list of string
verbose : int
Returns
-------
generator of Class1NeuralNetwork
"""
encodable_peptides
=
EncodableSequences
.
create
(
peptides
)
for
i
in
range
(
n_models
):
print
(
"
Training model %d / %d
"
%
(
i
+
1
,
n_models
))
...
...
@@ -236,6 +408,27 @@ class Class1AffinityPredictor(object):
yield
model
def
predict
(
self
,
peptides
,
alleles
=
None
,
allele
=
None
):
"""
Predict nM binding affinities.
If multiple predictors are available for an allele, the predictions are
the geometric means of the individual model predictions.
One of
'
allele
'
or
'
alleles
'
must be specified. If
'
allele
'
is specified
all predictions will be for the given allele. If
'
alleles
'
is specified
it must be the same length as
'
peptides
'
and give the allele
corresponding to each peptide.
Parameters
----------
peptides : EncodableSequences or list of string
alleles : list of string
allele : string
Returns
-------
numpy.array of predictions
"""
df
=
self
.
predict_to_dataframe
(
peptides
=
peptides
,
alleles
=
alleles
,
...
...
@@ -249,6 +442,31 @@ class Class1AffinityPredictor(object):
alleles
=
None
,
allele
=
None
,
include_individual_model_predictions
=
False
):
"""
Predict nM binding affinities. Gives more detailed output than `predict`
method, including 5-95% prediction intervals.
If multiple predictors are available for an allele, the predictions are
the geometric means of the individual model predictions.
One of
'
allele
'
or
'
alleles
'
must be specified. If
'
allele
'
is specified
all predictions will be for the given allele. If
'
alleles
'
is specified
it must be the same length as
'
peptides
'
and give the allele
corresponding to each peptide.
Parameters
----------
peptides : EncodableSequences or list of string
alleles : list of string
allele : string
include_individual_model_predictions : boolean
If True, the predictions of each individual model are incldued as
columns in the result dataframe.
Returns
-------
pandas.DataFrame of predictions
"""
if
isinstance
(
peptides
,
string_types
):
raise
TypeError
(
"
peptides must be a list or array, not a string
"
)
if
isinstance
(
alleles
,
string_types
):
...
...
@@ -307,8 +525,26 @@ class Class1AffinityPredictor(object):
class
LazyLoadingClass1NeuralNetwork
(
object
):
"""
Thing wrapper over a Class1NeuralNetwork that supports deserializing it
lazily as needed.
"""
@classmethod
def
wrap
(
cls
,
instance
):
"""
Return a LazyLoadingClass1NeuralNetwork given a Class1NeuralNetwork.
If the given instance is a LazyLoadingClass1NeuralNetwork it is
returned unchanged.
Parameters
----------
instance : Class1NeuralNetwork or LazyLoadingClass1NeuralNetwork
Returns
-------
LazyLoadingClass1NeuralNetwork
"""
if
isinstance
(
instance
,
cls
):
return
instance
elif
isinstance
(
instance
,
Class1NeuralNetwork
):
...
...
@@ -317,12 +553,40 @@ class LazyLoadingClass1NeuralNetwork(object):
@classmethod
def
wrap_list
(
cls
,
lst
):
"""
Wrap each element of a list of Class1NeuralNetwork instances
Parameters
----------
lst : list of (Class1NeuralNetwork or LazyLoadingClass1NeuralNetwork)
Returns
-------
list of LazyLoadingClass1NeuralNetwork
"""
return
[
cls
.
wrap
(
instance
)
for
instance
in
lst
]
def
__init__
(
self
,
model
=
None
,
config
=
None
,
weights_filename
=
None
):
"""
Specify either
'
model
'
(to wrap an already loaded instance) or both
of
"
config
"
and
"
weights_filename
"
(to wrap a not yet loaded instance).
Parameters
----------
model : Class1NeuralNetwork, optional
If not specified you must specify both
'
config
'
and
'
weights_filename
'
config : dict, optional
As returned by `Class1NeuralNetwork.get_config`
weights_filename : string, optional
Path to weights
"""
if
model
is
None
:
assert
config
is
not
None
assert
weights_filename
is
not
None
...
...
@@ -336,6 +600,14 @@ class LazyLoadingClass1NeuralNetwork(object):
@property
def
instance
(
self
):
"""
Return the wrapped Class1NeuralNetwork instance, which will be loaded
the first time it is accessed and cached thereafter.
Returns
-------
Class1NeuralNetwork
"""
if
self
.
model
is
None
:
self
.
model
=
Class1NeuralNetwork
.
from_config
(
self
.
config
)
self
.
model
.
restore_weights
(
self
.
weights_filename
)
...
...
This diff is collapsed.
Click to expand it.
test/test_known_class1_epitopes.py
+
27
−
5
View file @
057e272a
...
...
@@ -12,6 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
tempfile
import
shutil
import
pandas
import
mhcflurry.class1_affinity_prediction
from
mhcflurry.downloads
import
get_path
...
...
@@ -56,6 +59,12 @@ def test_A1_MAGE_epitope_downloaded_models():
predict_and_check
(
"
HLA-A*01:01
"
,
"
EVDPIGHLY
"
)
def
test_A2_HIV_epitope_downloaded_models
():
# Test the A2 HIV epitope SLYNTVATL from
# The HIV-1 HLA-A2-SLYNTVATL Is a Help-Independent CTL Epitope
predict_and_check
(
"
HLA-A*02:01
"
,
"
SLYNTVATL
"
)
def
test_A1_trained_models
():
allele
=
"
HLA-A*01:01
"
df
=
pandas
.
read_csv
(
...
...
@@ -113,9 +122,22 @@ def test_A1_trained_models():
predict_and_check
(
"
HLA-A*01:01
"
,
"
EVDPIGHLY
"
,
predictors
=
[
predictor
])
def
test_A2_HIV_epitope_downloaded_models
():
# Test the A2 HIV epitope SLYNTVATL from
# The HIV-1 HLA-A2-SLYNTVATL Is a Help-Independent CTL Epitope
predict_and_check
(
"
HLA-A*02:01
"
,
"
SLYNTVATL
"
)
models_dir
=
tempfile
.
mkdtemp
(
"
_models
"
)
print
(
models_dir
)
predictor
.
save
(
models_dir
)
predictor2
=
Class1AffinityPredictor
.
load
(
models_dir
)
predict_and_check
(
"
HLA-A*01:01
"
,
"
EVDPIGHLY
"
,
predictors
=
[
predictor2
])
shutil
.
rmtree
(
models_dir
)
predictor3
=
Class1AffinityPredictor
(
allele_to_allele_specific_models
=
{
allele
:
[
predictor
.
allele_to_allele_specific_models
[
allele
][
0
]]
})
predict_and_check
(
"
HLA-A*01:01
"
,
"
EVDPIGHLY
"
,
predictors
=
[
predictor3
])
models_dir
=
tempfile
.
mkdtemp
(
"
_models
"
)
print
(
models_dir
)
predictor3
.
save
(
models_dir
)
predictor4
=
Class1AffinityPredictor
.
load
(
models_dir
)
predict_and_check
(
"
HLA-A*01:01
"
,
"
EVDPIGHLY
"
,
predictors
=
[
predictor4
])
shutil
.
rmtree
(
models_dir
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment