Skip to content
Snippets Groups Projects
Commit 00b17305 authored by Alex Rubinsteyn's avatar Alex Rubinsteyn
Browse files

Merge pull request #18 from hammerlab/small-fixes

Minor fixes
parents 04591deb eefe769e
No related branches found
No related tags found
No related merge requests found
...@@ -19,10 +19,10 @@ Download, Normalize, and Combine Training Data: ...@@ -19,10 +19,10 @@ Download, Normalize, and Combine Training Data:
(make sure you have `wget` available, e.g. `brew install wget` on Mac OS X) (make sure you have `wget` available, e.g. `brew install wget` on Mac OS X)
``` ```
scripts/download-iedb.sh script/download-iedb.sh
scripts/download-peters-2013-dataset.sh script/download-peters-2013-dataset.sh
scripts/create-iedb-class1-dataset.py script/create-iedb-class1-dataset.py
scripts/create-combined-class1-dataset.py script/create-combined-class1-dataset.py
``` ```
## Train Neural Network Models ## Train Neural Network Models
......
...@@ -36,46 +36,50 @@ def add_hyperparameter_arguments_to_parser(parser): ...@@ -36,46 +36,50 @@ def add_hyperparameter_arguments_to_parser(parser):
"--training-epochs", "--training-epochs",
type=int, type=int,
default=N_EPOCHS, default=N_EPOCHS,
help="Number of training epochs") help="Number of training epochs. Default: %(default)s")
parser.add_argument( parser.add_argument(
"--initialization", "--initialization",
default=INITIALIZATION_METHOD, default=INITIALIZATION_METHOD,
help="Initialization for neural network weights") help="Initialization for neural network weights Default: %(default)s")
parser.add_argument( parser.add_argument(
"--activation", "--activation",
default=ACTIVATION, default=ACTIVATION,
help="Activation function for neural network layers") help="Activation function for neural network layers. "
"Default: %(default)s")
parser.add_argument( parser.add_argument(
"--embedding-size", "--embedding-size",
type=int, type=int,
default=EMBEDDING_DIM, default=EMBEDDING_DIM,
help="Size of vector representations for embedding amino acids") help="Size of vector representations for embedding amino acids. "
"Default: %(default)s")
parser.add_argument( parser.add_argument(
"--hidden-layer-size", "--hidden-layer-size",
type=int, type=int,
default=HIDDEN_LAYER_SIZE, default=HIDDEN_LAYER_SIZE,
help="Size of hidden neural network layer") help="Size of hidden neural network layer. Default: %(default)s")
parser.add_argument( parser.add_argument(
"--dropout", "--dropout",
type=float, type=float,
default=DROPOUT_PROBABILITY, default=DROPOUT_PROBABILITY,
help="Dropout probability after neural network layers") help="Dropout probability after neural network layers. "
"Default: %(default)s")
parser.add_argument( parser.add_argument(
"--max-ic50", "--max-ic50",
type=float, type=float,
default=MAX_IC50, default=MAX_IC50,
help="Largest IC50 represented by neural network output") help="Largest IC50 represented by neural network output. "
"Default: %(default)s")
parser.add_argument( parser.add_argument(
"--learning-rate", "--learning-rate",
type=float, type=float,
default=0.001, default=0.001,
help="Learning rate for training neural network") help="Learning rate for training neural network. Default: %(default)s")
return parser return parser
...@@ -41,7 +41,7 @@ IEDB_PICKLE_PATH = join(CLASS1_DATA_DIRECTORY, IEDB_PICKLE_FILENAME) ...@@ -41,7 +41,7 @@ IEDB_PICKLE_PATH = join(CLASS1_DATA_DIRECTORY, IEDB_PICKLE_FILENAME)
KIM_2013_CSV_FILENAME = "bdata.20130222.mhci.public.1.txt" KIM_2013_CSV_FILENAME = "bdata.20130222.mhci.public.1.txt"
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument( parser.add_argument(
"--ic50-fraction-tolerance", "--ic50-fraction-tolerance",
...@@ -49,40 +49,46 @@ parser.add_argument( ...@@ -49,40 +49,46 @@ parser.add_argument(
type=float, type=float,
help=( help=(
"How much can the IEDB and NetMHCpan IC50 differ and still be" "How much can the IEDB and NetMHCpan IC50 differ and still be"
" considered compatible (as a fraction of the NetMHCpan value)")) " considered compatible (as a fraction of the NetMHCpan value). "
"Default: %(default)s"))
parser.add_argument( parser.add_argument(
"--min-assay-overlap-size", "--min-assay-overlap-size",
type=int, type=int,
default=1, default=1,
help="Minimum number of entries overlapping between IEDB assay and NetMHCpan data") help="Minimum number of entries overlapping between IEDB assay and "
"NetMHCpan data. Default: %(default)s")
parser.add_argument( parser.add_argument(
"--min-assay-fraction-same", "--min-assay-fraction-same",
type=float, type=float,
help="Minimum fraction of peptides whose IC50 values agree with the NetMHCpan data", help="Minimum fraction of peptides whose IC50 values agree with the "
"NetMHCpan data. Default: %(default)s",
default=0.9) default=0.9)
parser.add_argument( parser.add_argument(
"--iedb-pickle-path", "--iedb-pickle-path",
default=IEDB_PICKLE_PATH, default=IEDB_PICKLE_PATH,
help="Path to .pickle file containing dictionary of IEDB assay datasets") help="Path to .pickle file containing dictionary of IEDB assay datasets. "
"Default: %(default)s")
parser.add_argument( parser.add_argument(
"--netmhcpan-csv-path", "--netmhcpan-csv-path",
default=KIM_2013_CSV_FILENAME, default=KIM_2013_CSV_FILENAME,
help="Path to CSV with NetMHCpan dataset from 2013 Peters paper") help="Path to CSV with NetMHCpan dataset from 2013 Peters paper. "
"Default: %(default)s")
parser.add_argument( parser.add_argument(
"--output-dir", "--output-dir",
default=CLASS1_DATA_DIRECTORY, default=CLASS1_DATA_DIRECTORY,
help="Path to directory where output CSV should be written") help="Path to directory where output CSV should be written. "
"Default: %(default)s")
parser.add_argument( parser.add_argument(
"--output-csv-filename", "--output-csv-filename",
default=CLASS1_DATA_CSV_FILENAME, default=CLASS1_DATA_CSV_FILENAME,
help="Name of combined CSV file") help="Name of combined CSV file. Default: %(default)s")
parser.add_argument( parser.add_argument(
"--extra-dataset-csv-path", "--extra-dataset-csv-path",
......
...@@ -33,25 +33,27 @@ from mhcflurry.paths import CLASS1_DATA_DIRECTORY ...@@ -33,25 +33,27 @@ from mhcflurry.paths import CLASS1_DATA_DIRECTORY
IEDB_SOURCE_FILENAME = "mhc_ligand_full.csv" IEDB_SOURCE_FILENAME = "mhc_ligand_full.csv"
PICKLE_OUTPUT_FILENAME = "iedb_human_class1_assay_datasets.pickle" PICKLE_OUTPUT_FILENAME = "iedb_human_class1_assay_datasets.pickle"
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument( parser.add_argument(
"--input-csv", "--input-csv",
default=IEDB_SOURCE_FILENAME, default=IEDB_SOURCE_FILENAME,
help="CSV file with IEDB's MHC binding data") help="CSV file with IEDB's MHC binding data. Default: %(default)s")
parser.add_argument( parser.add_argument(
"--output-dir", "--output-dir",
default=CLASS1_DATA_DIRECTORY, default=CLASS1_DATA_DIRECTORY,
help="Directory to write output pickle file") help="Directory to write output pickle file. Default: %(default)s")
parser.add_argument( parser.add_argument(
"--output-pickle-filename", "--output-pickle-filename",
default=PICKLE_OUTPUT_FILENAME, default=PICKLE_OUTPUT_FILENAME,
help="Path to .pickle file containing dictionary of IEDB assay datasets") help="Path to .pickle file containing dictionary of IEDB assay datasets. "
"Default: %(default)s")
parser.add_argument( parser.add_argument(
"--alleles", "--alleles",
metavar="ALLELE",
nargs="+", nargs="+",
default=[], default=[],
help="Restrict dataset to specified alleles") help="Restrict dataset to specified alleles")
......
...@@ -61,12 +61,14 @@ parser = argparse.ArgumentParser() ...@@ -61,12 +61,14 @@ parser = argparse.ArgumentParser()
parser.add_argument( parser.add_argument(
"--binding-data-csv", "--binding-data-csv",
default=CSV_PATH, default=CSV_PATH,
help="CSV file with 'mhc', 'peptide', 'peptide_length', 'meas' columns") help="CSV file with 'mhc', 'peptide', 'peptide_length', 'meas' columns. "
"Default: %(default)s")
parser.add_argument( parser.add_argument(
"--output-dir", "--output-dir",
default=CLASS1_MODEL_DIRECTORY, default=CLASS1_MODEL_DIRECTORY,
help="Output directory for allele-specific predictor HDF weights files") help="Output directory for allele-specific predictor HDF weights files. "
"Default: %(default)s")
parser.add_argument( parser.add_argument(
"--overwrite", "--overwrite",
...@@ -77,11 +79,15 @@ parser.add_argument( ...@@ -77,11 +79,15 @@ parser.add_argument(
parser.add_argument( parser.add_argument(
"--min-samples-per-allele", "--min-samples-per-allele",
default=5, default=5,
help="Don't train predictors for alleles with fewer samples than this", metavar="N",
help="Don't train predictors for alleles with fewer than N samples. "
"Default: %(default)s",
type=int) type=int)
parser.add_argument( parser.add_argument(
"--alleles", "--alleles",
metavar="ALLELE",
help="Alleles to train",
default=[], default=[],
nargs="+", nargs="+",
type=normalize_allele_name) type=normalize_allele_name)
......
...@@ -64,7 +64,7 @@ if __name__ == '__main__': ...@@ -64,7 +64,7 @@ if __name__ == '__main__':
'numpy>=1.7', 'numpy>=1.7',
'pandas>=0.13.1', 'pandas>=0.13.1',
'appdirs', 'appdirs',
'theano', 'theano>=0.8.2',
'keras', 'keras',
'fancyimpute', 'fancyimpute',
'scikit-learn', 'scikit-learn',
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment