Skip to content
Snippets Groups Projects
Commit eefe769e authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

Minor fixes

 - Add default values and usage to argparse help messages
 - Require theano >=0.8.2 (was getting errors instantiating a model at 0.7.0)
parent 713707b3
No related branches found
No related tags found
No related merge requests found
......@@ -19,10 +19,10 @@ Download, Normalize, and Combine Training Data:
(make sure you have `wget` available, e.g. `brew install wget` on Mac OS X)
```
scripts/download-iedb.sh
scripts/download-peters-2013-dataset.sh
scripts/create-iedb-class1-dataset.py
scripts/create-combined-class1-dataset.py
script/download-iedb.sh
script/download-peters-2013-dataset.sh
script/create-iedb-class1-dataset.py
script/create-combined-class1-dataset.py
```
## Train Neural Network Models
......
......@@ -36,46 +36,50 @@ def add_hyperparameter_arguments_to_parser(parser):
"--training-epochs",
type=int,
default=N_EPOCHS,
help="Number of training epochs")
help="Number of training epochs. Default: %(default)s")
parser.add_argument(
"--initialization",
default=INITIALIZATION_METHOD,
help="Initialization for neural network weights")
help="Initialization for neural network weights Default: %(default)s")
parser.add_argument(
"--activation",
default=ACTIVATION,
help="Activation function for neural network layers")
help="Activation function for neural network layers. "
"Default: %(default)s")
parser.add_argument(
"--embedding-size",
type=int,
default=EMBEDDING_DIM,
help="Size of vector representations for embedding amino acids")
help="Size of vector representations for embedding amino acids. "
"Default: %(default)s")
parser.add_argument(
"--hidden-layer-size",
type=int,
default=HIDDEN_LAYER_SIZE,
help="Size of hidden neural network layer")
help="Size of hidden neural network layer. Default: %(default)s")
parser.add_argument(
"--dropout",
type=float,
default=DROPOUT_PROBABILITY,
help="Dropout probability after neural network layers")
help="Dropout probability after neural network layers. "
"Default: %(default)s")
parser.add_argument(
"--max-ic50",
type=float,
default=MAX_IC50,
help="Largest IC50 represented by neural network output")
help="Largest IC50 represented by neural network output. "
"Default: %(default)s")
parser.add_argument(
"--learning-rate",
type=float,
default=0.001,
help="Learning rate for training neural network")
help="Learning rate for training neural network. Default: %(default)s")
return parser
numpy>= 1.7
pandas>=0.13.1
appdirs
theano
theano>=0.8.2
keras<1.0
h5py
cherrypy
......
......@@ -41,7 +41,7 @@ IEDB_PICKLE_PATH = join(CLASS1_DATA_DIRECTORY, IEDB_PICKLE_FILENAME)
KIM_2013_CSV_FILENAME = "bdata.20130222.mhci.public.1.txt"
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument(
"--ic50-fraction-tolerance",
......@@ -49,40 +49,46 @@ parser.add_argument(
type=float,
help=(
"How much can the IEDB and NetMHCpan IC50 differ and still be"
" considered compatible (as a fraction of the NetMHCpan value)"))
" considered compatible (as a fraction of the NetMHCpan value). "
"Default: %(default)s"))
parser.add_argument(
"--min-assay-overlap-size",
type=int,
default=1,
help="Minimum number of entries overlapping between IEDB assay and NetMHCpan data")
help="Minimum number of entries overlapping between IEDB assay and "
"NetMHCpan data. Default: %(default)s")
parser.add_argument(
"--min-assay-fraction-same",
type=float,
help="Minimum fraction of peptides whose IC50 values agree with the NetMHCpan data",
help="Minimum fraction of peptides whose IC50 values agree with the "
"NetMHCpan data. Default: %(default)s",
default=0.9)
parser.add_argument(
"--iedb-pickle-path",
default=IEDB_PICKLE_PATH,
help="Path to .pickle file containing dictionary of IEDB assay datasets")
help="Path to .pickle file containing dictionary of IEDB assay datasets. "
"Default: %(default)s")
parser.add_argument(
"--netmhcpan-csv-path",
default=KIM_2013_CSV_FILENAME,
help="Path to CSV with NetMHCpan dataset from 2013 Peters paper")
help="Path to CSV with NetMHCpan dataset from 2013 Peters paper. "
"Default: %(default)s")
parser.add_argument(
"--output-dir",
default=CLASS1_DATA_DIRECTORY,
help="Path to directory where output CSV should be written")
help="Path to directory where output CSV should be written. "
"Default: %(default)s")
parser.add_argument(
"--output-csv-filename",
default=CLASS1_DATA_CSV_FILENAME,
help="Name of combined CSV file")
help="Name of combined CSV file. Default: %(default)s")
parser.add_argument(
"--extra-dataset-csv-path",
......
......@@ -33,25 +33,27 @@ from mhcflurry.paths import CLASS1_DATA_DIRECTORY
IEDB_SOURCE_FILENAME = "mhc_ligand_full.csv"
PICKLE_OUTPUT_FILENAME = "iedb_human_class1_assay_datasets.pickle"
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument(
"--input-csv",
default=IEDB_SOURCE_FILENAME,
help="CSV file with IEDB's MHC binding data")
help="CSV file with IEDB's MHC binding data. Default: %(default)s")
parser.add_argument(
"--output-dir",
default=CLASS1_DATA_DIRECTORY,
help="Directory to write output pickle file")
help="Directory to write output pickle file. Default: %(default)s")
parser.add_argument(
"--output-pickle-filename",
default=PICKLE_OUTPUT_FILENAME,
help="Path to .pickle file containing dictionary of IEDB assay datasets")
help="Path to .pickle file containing dictionary of IEDB assay datasets. "
"Default: %(default)s")
parser.add_argument(
"--alleles",
metavar="ALLELE",
nargs="+",
default=[],
help="Restrict dataset to specified alleles")
......
......@@ -61,12 +61,14 @@ parser = argparse.ArgumentParser()
parser.add_argument(
"--binding-data-csv",
default=CSV_PATH,
help="CSV file with 'mhc', 'peptide', 'peptide_length', 'meas' columns")
help="CSV file with 'mhc', 'peptide', 'peptide_length', 'meas' columns. "
"Default: %(default)s")
parser.add_argument(
"--output-dir",
default=CLASS1_MODEL_DIRECTORY,
help="Output directory for allele-specific predictor HDF weights files")
help="Output directory for allele-specific predictor HDF weights files. "
"Default: %(default)s")
parser.add_argument(
"--overwrite",
......@@ -77,11 +79,15 @@ parser.add_argument(
parser.add_argument(
"--min-samples-per-allele",
default=5,
help="Don't train predictors for alleles with fewer samples than this",
metavar="N",
help="Don't train predictors for alleles with fewer than N samples. "
"Default: %(default)s",
type=int)
parser.add_argument(
"--alleles",
metavar="ALLELE",
help="Alleles to train",
default=[],
nargs="+",
type=normalize_allele_name)
......
......@@ -64,7 +64,7 @@ if __name__ == '__main__':
'numpy>=1.7',
'pandas>=0.13.1',
'appdirs',
'theano',
'theano>=0.8.2',
'keras',
'fancyimpute',
'scikit-learn',
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment