Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
mhc_rank
Manage
Activity
Members
Labels
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Patrick Skillman-Lawrence
mhc_rank
Commits
8f797a74
Commit
8f797a74
authored
5 years ago
by
Tim O'Donnell
Browse files
Options
Downloads
Patches
Plain Diff
fixes
parent
d1ef4aae
Loading
Loading
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
downloads-generation/models_class1_pan_unselected/GENERATE.sh
+5
-3
5 additions, 3 deletions
...loads-generation/models_class1_pan_unselected/GENERATE.sh
test/test_class1_pan.py
+32
-4
32 additions, 4 deletions
test/test_class1_pan.py
with
37 additions
and
7 deletions
downloads-generation/models_class1_pan_unselected/GENERATE.sh
+
5
−
3
View file @
8f797a74
...
...
@@ -42,13 +42,15 @@ time mhcflurry-class1-train-pan-allele-models \
--held-out-measurements-per-allele-fraction-and-max
0.25 100
\
--ensemble-size
4
\
--hyperparameters
hyperparameters.yaml
\
--out-models-dir
models
\
--out-models-dir
models
-unselected
\
#--num-jobs $(expr $PROCESSORS \* 2) --gpus $GPUS --max-workers-per-gpu 2 --max-tasks-per-worker 50
cp
$SCRIPT_ABSOLUTE_PATH
.
bzip2 LOG.txt
tar
-cjf
"../
${
DOWNLOAD_NAME
}
.tar.bz2"
*
tar
-cjf
"../
${
DOWNLOAD_NAME
}
.with_unselected.tar.bz2"
*
echo
"Created archive:
$SCRATCH_DIR
/
${
DOWNLOAD_NAME
}
.with_unselected.tar.bz2"
echo
"Created archive:
$SCRATCH_DIR
/
$DOWNLOAD_NAME
.tar.bz2"
ls
*
|
grep
-v
models-unselected | xargs
-I
{}
tar
-cjf
"../
${
DOWNLOAD_NAME
}
.tar.bz2"
{}
echo
"Created archive:
$SCRATCH_DIR
/
${
DOWNLOAD_NAME
}
.tar.bz2"
\ No newline at end of file
This diff is collapsed.
Click to expand it.
test/test_class1_pan.py
+
32
−
4
View file @
8f797a74
...
...
@@ -9,9 +9,10 @@ import tempfile
import
subprocess
from
copy
import
deepcopy
from
sklearn.metrics
import
roc_auc_score
import
pandas
from
numpy.testing
import
assert_
array_less
,
assert_equal
from
numpy.testing
import
assert_
,
assert_equal
from
mhcflurry
import
Class1AffinityPredictor
,
Class1NeuralNetwork
from
mhcflurry.allele_encoding
import
AlleleEncoding
...
...
@@ -35,7 +36,7 @@ HYPERPARAMETERS = {
'
minibatch_size
'
:
128
,
'
optimizer
'
:
'
rmsprop
'
,
'
output_activation
'
:
'
sigmoid
'
,
'
patience
'
:
2
0
,
'
patience
'
:
1
0
,
'
peptide_allele_merge_activation
'
:
''
,
'
peptide_allele_merge_method
'
:
'
concatenate
'
,
'
peptide_amino_acid_encoding
'
:
'
BLOSUM62
'
,
...
...
@@ -71,6 +72,17 @@ TRAIN_DF = TRAIN_DF.loc[TRAIN_DF.peptide.str.len() >= 8]
TRAIN_DF
=
TRAIN_DF
.
loc
[
TRAIN_DF
.
peptide
.
str
.
len
()
<=
15
]
MS_HITS_DF
=
pandas
.
read_csv
(
get_path
(
"
data_curated
"
,
"
curated_training_data.with_mass_spec.csv.bz2
"
))
MS_HITS_DF
=
MS_HITS_DF
.
loc
[
MS_HITS_DF
.
allele
.
isin
(
ALLELE_TO_SEQUENCE
)]
MS_HITS_DF
=
MS_HITS_DF
.
loc
[
MS_HITS_DF
.
peptide
.
str
.
len
()
>=
8
]
MS_HITS_DF
=
MS_HITS_DF
.
loc
[
MS_HITS_DF
.
peptide
.
str
.
len
()
<=
15
]
MS_HITS_DF
=
MS_HITS_DF
.
loc
[
~
MS_HITS_DF
.
peptide
.
isin
(
TRAIN_DF
.
peptide
)]
print
(
"
Loaded %d training and %d ms hits
"
%
(
len
(
TRAIN_DF
),
len
(
MS_HITS_DF
)))
def
test_train_simple
():
network
=
Class1NeuralNetwork
(
**
HYPERPARAMETERS
)
allele_encoding
=
AlleleEncoding
(
...
...
@@ -82,8 +94,24 @@ def test_train_simple():
allele_encoding
=
allele_encoding
,
inequalities
=
TRAIN_DF
.
measurement_inequality
.
values
)
validation_df
=
MS_HITS_DF
.
copy
()
validation_df
[
"
hit
"
]
=
1
decoys_df
=
MS_HITS_DF
.
copy
()
decoys_df
[
"
hit
"
]
=
0
decoys_df
[
"
allele
"
]
=
decoys_df
.
allele
.
sample
(
frac
=
1.0
).
values
validation_df
=
pandas
.
concat
([
validation_df
,
decoys_df
],
ignore_index
=
True
)
predictions
=
network
.
predict
(
peptides
=
TRAIN_DF
.
peptide
.
values
,
allele_encoding
=
allele_encoding
)
peptides
=
validation_df
.
peptide
.
values
,
allele_encoding
=
AlleleEncoding
(
validation_df
.
allele
.
values
,
borrow_from
=
allele_encoding
))
print
(
pandas
.
Series
(
predictions
).
describe
())
score
=
roc_auc_score
(
validation_df
.
hit
,
-
1
*
predictions
)
print
(
"
AUC
"
,
score
)
assert_
(
score
>
0.6
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment