Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
mhc_rank
Manage
Activity
Members
Labels
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Patrick Skillman-Lawrence
mhc_rank
Commits
f6585612
Commit
f6585612
authored
7 years ago
by
Tim O'Donnell
Browse files
Options
Downloads
Patches
Plain Diff
More optimizations to Class1AffinityPredictor.predict_to_dataframe
parent
764d8594
Loading
Loading
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
mhcflurry/class1_affinity_predictor.py
+73
-39
73 additions, 39 deletions
mhcflurry/class1_affinity_predictor.py
test/test_speed.py
+6
-0
6 additions, 0 deletions
test/test_speed.py
with
79 additions
and
39 deletions
mhcflurry/class1_affinity_predictor.py
+
73
−
39
View file @
f6585612
...
...
@@ -107,6 +107,21 @@ class Class1AffinityPredictor(object):
if
not
allele_to_percent_rank_transform
:
allele_to_percent_rank_transform
=
{}
self
.
allele_to_percent_rank_transform
=
allele_to_percent_rank_transform
self
.
_cache
=
{}
def
clear_cache
(
self
):
"""
Clear values cached based on the neural networks in this predictor.
Users should call this after mutating any of the following:
- class1_pan_allele_models
- allele_to_allele_specific_models
- allele_to_fixed_length_sequence
Methods that mutate these instance variables will call this method on
their own if needed.
"""
self
.
_cache
.
clear
()
@property
def
neural_networks
(
self
):
...
...
@@ -209,6 +224,7 @@ class Class1AffinityPredictor(object):
current_models
.
append
(
model
)
new_model_names
.
append
(
model_name
)
self
.
clear_cache
()
return
new_model_names
@property
...
...
@@ -220,10 +236,12 @@ class Class1AffinityPredictor(object):
-------
list of string
"""
result
=
set
(
self
.
allele_to_allele_specific_models
)
if
self
.
allele_to_fixed_length_sequence
:
result
=
result
.
union
(
self
.
allele_to_fixed_length_sequence
)
return
sorted
(
result
)
if
'
supported_alleles
'
not
in
self
.
_cache
:
result
=
set
(
self
.
allele_to_allele_specific_models
)
if
self
.
allele_to_fixed_length_sequence
:
result
=
result
.
union
(
self
.
allele_to_fixed_length_sequence
)
self
.
_cache
[
"
supported_alleles
"
]
=
sorted
(
result
)
return
self
.
_cache
[
"
supported_alleles
"
]
@property
def
supported_peptide_lengths
(
self
):
...
...
@@ -236,13 +254,15 @@ class Class1AffinityPredictor(object):
(int, int) tuple
"""
models
=
list
(
self
.
class1_pan_allele_models
)
for
allele_models
in
self
.
allele_to_allele_specific_models
.
values
():
models
.
extend
(
allele_models
)
length_ranges
=
[
model
.
supported_peptide_lengths
for
model
in
models
]
return
(
max
(
lower
for
(
lower
,
upper
)
in
length_ranges
),
min
(
upper
for
(
lower
,
upper
)
in
length_ranges
))
if
'
supported_peptide_lengths
'
not
in
self
.
_cache
:
length_ranges
=
set
(
network
.
supported_peptide_lengths
for
network
in
self
.
neural_networks
)
result
=
(
max
(
lower
for
(
lower
,
upper
)
in
length_ranges
),
min
(
upper
for
(
lower
,
upper
)
in
length_ranges
))
self
.
_cache
[
"
supported_peptide_lengths
"
]
=
result
return
self
.
_cache
[
"
supported_peptide_lengths
"
]
def
save
(
self
,
models_dir
,
model_names_to_write
=
None
):
"""
...
...
@@ -582,6 +602,7 @@ class Class1AffinityPredictor(object):
models_dir_for_save
,
model_names_to_write
=
[
model_name
])
models
.
append
(
model
)
self
.
clear_cache
()
return
models
def
fit_class1_pan_allele_models
(
...
...
@@ -673,6 +694,7 @@ class Class1AffinityPredictor(object):
models_dir_for_save
,
model_names_to_write
=
[
model_name
])
models
.
append
(
model
)
self
.
clear_cache
()
return
models
def
percentile_ranks
(
self
,
affinities
,
allele
=
None
,
alleles
=
None
,
throw
=
True
):
...
...
@@ -821,7 +843,9 @@ class Class1AffinityPredictor(object):
raise
ValueError
(
"
Must specify
'
allele
'
or
'
alleles
'
.
"
)
peptides
=
EncodableSequences
.
create
(
peptides
)
df
=
peptides
.
sequences_df
.
rename
(
columns
=
{
'
sequence
'
:
'
peptide
'
})
df
=
pandas
.
DataFrame
({
'
peptide
'
:
peptides
.
sequences
},
copy
=
False
)
if
allele
is
not
None
:
if
alleles
is
not
None
:
...
...
@@ -856,9 +880,10 @@ class Class1AffinityPredictor(object):
peptides
.
max_length
>
max_peptide_length
):
# Only compute this if needed
all_peptide_lengths_supported
=
False
sequence_length
=
df
.
peptide
.
str
.
len
()
df
[
"
supported_peptide_length
"
]
=
(
(
df
.
sequence_length
>=
min_peptide_length
)
&
(
df
.
sequence_length
<=
max_peptide_length
))
(
sequence_length
>=
min_peptide_length
)
&
(
sequence_length
<=
max_peptide_length
))
if
(
~
df
.
supported_peptide_length
).
any
():
msg
=
(
"
%d peptides have lengths outside of supported range [%d, %d]:
"
...
...
@@ -875,6 +900,16 @@ class Class1AffinityPredictor(object):
df
[
"
supported_peptide_length
"
]
=
True
all_peptide_lengths_supported
=
True
num_pan_models
=
len
(
self
.
class1_pan_allele_models
)
max_single_allele_models
=
max
(
len
(
self
.
allele_to_allele_specific_models
.
get
(
allele
,
[]))
for
allele
in
unique_alleles
)
predictions_array
=
numpy
.
zeros
(
shape
=
(
df
.
shape
[
0
],
num_pan_models
+
max_single_allele_models
),
dtype
=
"
float64
"
)
predictions_array
[:]
=
numpy
.
nan
if
self
.
class1_pan_allele_models
:
unsupported_alleles
=
[
allele
for
allele
in
...
...
@@ -897,7 +932,7 @@ class Class1AffinityPredictor(object):
allele_to_fixed_length_sequence
=
self
.
allele_to_fixed_length_sequence
)
masked_peptides
=
peptides
.
sequences
[
mask
]
for
(
i
,
model
)
in
enumerate
(
self
.
class1_pan_allele_models
):
df
.
loc
[
mask
,
"
model_pan_%d
"
%
i
]
=
model
.
predict
(
predictions_array
[
mask
,
i
]
=
model
.
predict
(
masked_peptides
,
allele_encoding
=
masked_allele_encoding
)
...
...
@@ -927,41 +962,41 @@ class Class1AffinityPredictor(object):
if
mask
is
None
or
mask
.
all
():
# Common case optimization
for
(
i
,
model
)
in
enumerate
(
models
):
df
[
"
model_single_%d
"
%
i
]
=
model
.
predict
(
peptides
)
predictions_array
[:,
num_pan_models
+
i
]
=
(
model
.
predict
(
peptides
))
elif
mask
.
sum
()
>
0
:
allele_
peptides
=
EncodableSequences
.
create
(
peptides
_for_allele
=
EncodableSequences
.
create
(
df
.
ix
[
mask
].
peptide
.
values
)
for
(
i
,
model
)
in
enumerate
(
models
):
df
.
loc
[
mask
,
"
model_single_%d
"
%
i
]
=
model
.
predict
(
allele_peptides
)
# Geometric mean
df_predictions
=
df
[
[
c
for
c
in
df
.
columns
if
c
.
startswith
(
"
model_
"
)]
]
predictions_array
[
mask
,
num_pan_models
+
i
,
]
=
model
.
predict
(
peptides_for_allele
)
if
callable
(
centrality_measure
):
centrality_function
=
centrality_measure
else
:
centrality_function
=
CENTRALITY_MEASURES
[
centrality_measure
]
logs
=
numpy
.
log
(
df_
predictions
)
log_centers
=
centrality_function
(
logs
.
values
)
logs
=
numpy
.
log
(
predictions
_array
)
log_centers
=
centrality_function
(
logs
)
df
[
"
prediction
"
]
=
numpy
.
exp
(
log_centers
)
if
include_confidence_intervals
:
df
[
"
prediction_low
"
]
=
numpy
.
exp
(
logs
.
quantile
(
0.05
,
axis
=
1
))
df
[
"
prediction_high
"
]
=
numpy
.
exp
(
logs
.
quantile
(
0.95
,
axis
=
1
))
df
[
"
prediction_low
"
]
=
numpy
.
exp
(
numpy
.
percentile
(
logs
,
5.0
,
axis
=
1
))
df
[
"
prediction_high
"
]
=
numpy
.
exp
(
numpy
.
percentile
(
logs
,
95.0
,
axis
=
1
))
del
df
[
"
normalized_allele
"
]
del
df
[
"
supported_peptide_length
"
]
if
include_individual_model_predictions
:
columns
=
sorted
(
df
.
columns
,
key
=
lambda
c
:
c
.
startswith
(
'
model_
'
))
else
:
columns
=
[
c
for
c
in
df
.
columns
if
c
not
in
df_predictions
.
columns
]
columns
.
remove
(
"
normalized_allele
"
)
columns
.
remove
(
"
supported_peptide_length
"
)
columns
.
remove
(
"
sequence_length
"
)
for
i
in
range
(
num_pan_models
):
df
[
"
model_pan_%d
"
%
i
]
=
predictions_array
[:,
i
]
for
i
in
range
(
max_single_allele_models
):
df
[
"
model_single_%d
"
%
i
]
=
predictions_array
[
:,
num_pan_models
+
i
]
if
include_percentile_ranks
:
if
self
.
allele_to_percent_rank_transform
:
...
...
@@ -969,10 +1004,9 @@ class Class1AffinityPredictor(object):
df
.
prediction
,
alleles
=
df
.
normalized_allele
.
values
,
throw
=
throw
)
columns
.
append
(
"
prediction_percentile
"
)
else
:
warnings
.
warn
(
"
No percentile rank information available.
"
)
return
df
[
columns
].
copy
()
return
df
@staticmethod
def
save_weights
(
weights_list
,
filename
):
...
...
This diff is collapsed.
Click to expand it.
test/test_speed.py
+
6
−
0
View file @
f6585612
...
...
@@ -51,6 +51,12 @@ def test_speed(profile=False):
DOWNLOADED_PREDICTOR
.
predict
(
peptides
,
allele
=
"
HLA-A*02:01
"
)
end
(
"
pred_already_encoded_%d
"
%
NUM2
)
NUM_REPEATS
=
100
start
(
"
pred_already_encoded_%d_%d_times
"
%
(
NUM2
,
NUM_REPEATS
))
for
_
in
range
(
NUM_REPEATS
):
DOWNLOADED_PREDICTOR
.
predict
(
peptides
,
allele
=
"
HLA-A*02:01
"
)
end
(
"
pred_already_encoded_%d_%d_times
"
%
(
NUM2
,
NUM_REPEATS
))
print
(
"
SPEED BENCHMARK
"
)
print
(
"
Results:
\n
%s
"
%
str
(
pandas
.
Series
(
timings
)))
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment