Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
mhc_rank
Manage
Activity
Members
Labels
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Patrick Skillman-Lawrence
mhc_rank
Commits
de6d1864
Commit
de6d1864
authored
8 years ago
by
Alex Rubinsteyn
Browse files
Options
Downloads
Patches
Plain Diff
use imputed X, Y, weights in training
parent
a11524f2
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
mhcflurry/imputation.py
+6
-10
6 additions, 10 deletions
mhcflurry/imputation.py
script/mhcflurry-train-class1-allele-specific-models.py
+34
-11
34 additions, 11 deletions
script/mhcflurry-train-class1-allele-specific-models.py
with
40 additions
and
21 deletions
mhcflurry/imputation.py
+
6
−
10
View file @
de6d1864
...
...
@@ -21,16 +21,12 @@ from collections import defaultdict
import
logging
import
numpy
as
np
from
fancyimpute.dictionary_helpers
import
(
dense_matrix_from_nested_dictionary
)
from
fancyimpute
import
(
KNN
,
IterativeSVD
,
SimpleFill
,
SoftImpute
,
MICE
)
from
fancyimpute.knn
import
KNN
from
fancyimpute.iterative_svd
import
IterativeSVD
from
fancyimpute.simple_fill
import
SimpleFill
from
fancyimpute.soft_impute
import
SoftImpute
from
fancyimpute.mice
import
MICE
from
fancyimpute.dictionary_helpers
import
dense_matrix_from_nested_dictionary
from
.data
import
(
create_allele_data_from_peptide_to_ic50_dict
,
...
...
This diff is collapsed.
Click to expand it.
script/mhcflurry-train-class1-allele-specific-models.py
+
34
−
11
View file @
de6d1864
...
...
@@ -53,7 +53,7 @@ from mhcflurry.paths import (
CLASS1_MODEL_DIRECTORY
,
CLASS1_DATA_DIRECTORY
)
from
mhcflurry.imputation
import
imputer_from_name
,
create_imputed_datasets
from
mhcflurry.imputation
import
create_imputed_datasets
,
imputer_from_name
CSV_FILENAME
=
"
combined_human_class1_dataset.csv
"
CSV_PATH
=
join
(
CLASS1_DATA_DIRECTORY
,
CSV_FILENAME
)
...
...
@@ -98,7 +98,7 @@ parser.add_argument(
"
--imputation-method
"
,
default
=
None
,
choices
=
(
"
mice
"
,
"
knn
"
,
"
softimpute
"
,
"
svd
"
,
"
mean
"
),
type
=
imputer_from_name
,
type
=
lambda
s
:
s
.
strip
().
lower
()
,
help
=
"
Use the given imputation method to generate data for pre-training models
"
)
# add options for neural network hyperparameters
...
...
@@ -125,8 +125,14 @@ if __name__ == "__main__":
Y_all
=
np
.
concatenate
([
group
.
Y
for
group
in
allele_data_dict
.
values
()])
print
(
"
Total Dataset size = %d
"
%
len
(
Y_all
))
if
args
.
imputation_method
is
not
None
:
# TODO: use imputed data for training
if
args
.
imputation_method
is
None
:
imputer
=
None
else
:
imputer
=
imputer_from_name
(
args
.
imputation_method
)
if
imputer
is
None
:
imputed_data_dict
=
{}
else
:
imputed_data_dict
=
create_imputed_datasets
(
allele_data_dict
,
args
.
imputation_method
)
...
...
@@ -138,17 +144,30 @@ if __name__ == "__main__":
alleles
=
sorted
(
allele_data_dict
.
keys
())
for
allele_name
in
alleles
:
allele_name
=
normalize_allele_name
(
allele_name
)
if
allele_name
.
isdigit
():
print
(
"
Skipping allele %s
"
%
(
allele_name
,))
continue
allele_data
=
allele_data_dict
[
allele_name
]
X
=
allele_data
.
X_index
Y
=
allele_data
.
Y
weights
=
allele_data
.
weights
n_allele
=
len
(
allele_data
.
Y
)
assert
len
(
X
)
==
n_allele
assert
len
(
weights
)
==
n_allele
if
allele_name
in
imputed_data_dict
:
imputed_data
=
imputed_data_dict
[
allele_name
]
X_pretrain
=
imputed_data
.
X_index
Y_pretrain
=
imputed_data
.
Y
weights_pretrain
=
imputed_data
.
weights
else
:
X_pretrain
=
None
Y_pretrain
=
None
weights_pretrain
=
None
# normalize allele name to check if it's just
allele_name
=
normalize_allele_name
(
allele_name
)
if
allele_name
.
isdigit
():
print
(
"
Skipping allele %s
"
%
(
allele_name
,))
continue
print
(
"
\n
=== Training predictor for %s: %d samples, %d unique
"
%
(
allele_name
,
...
...
@@ -189,8 +208,12 @@ if __name__ == "__main__":
remove
(
hdf_path
)
model
.
fit
(
allele_data
.
X_index
,
allele_data
.
Y
,
X
=
allele_data
.
X_index
,
Y
=
allele_data
.
Y
,
sample_weights
=
weights
,
X_pretrain
=
X_pretrain
,
Y_pretrain
=
Y_pretrain
,
sample_weights_pretrain
=
weights_pretrain
,
n_training_epochs
=
args
.
training_epochs
,
verbose
=
True
)
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment