From eefe769e8eb39252521cd96e19e970c32ae2dddf Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Thu, 21 Apr 2016 16:15:04 -0400 Subject: [PATCH] Minor fixes - Add default values and usage to argparse help messages - Require theano >=0.8.2 (was getting errors instantiating a model at 0.7.0) --- README.md | 8 +++---- .../class1_allele_specific_hyperparameters.py | 20 ++++++++++------- requirements.txt | 2 +- script/create-combined-class1-dataset.py | 22 ++++++++++++------- script/create-iedb-class1-dataset.py | 10 +++++---- ...rry-train-class1-allele-specific-models.py | 12 +++++++--- setup.py | 2 +- 7 files changed, 47 insertions(+), 29 deletions(-) diff --git a/README.md b/README.md index dbbeb1f1..b9dfadcc 100644 --- a/README.md +++ b/README.md @@ -19,10 +19,10 @@ Download, Normalize, and Combine Training Data: (make sure you have `wget` available, e.g. `brew install wget` on Mac OS X) ``` -scripts/download-iedb.sh -scripts/download-peters-2013-dataset.sh -scripts/create-iedb-class1-dataset.py -scripts/create-combined-class1-dataset.py +script/download-iedb.sh +script/download-peters-2013-dataset.sh +script/create-iedb-class1-dataset.py +script/create-combined-class1-dataset.py ``` ## Train Neural Network Models diff --git a/mhcflurry/class1_allele_specific_hyperparameters.py b/mhcflurry/class1_allele_specific_hyperparameters.py index e1e0c366..33e15cdb 100644 --- a/mhcflurry/class1_allele_specific_hyperparameters.py +++ b/mhcflurry/class1_allele_specific_hyperparameters.py @@ -36,46 +36,50 @@ def add_hyperparameter_arguments_to_parser(parser): "--training-epochs", type=int, default=N_EPOCHS, - help="Number of training epochs") + help="Number of training epochs. Default: %(default)s") parser.add_argument( "--initialization", default=INITIALIZATION_METHOD, - help="Initialization for neural network weights") + help="Initialization for neural network weights Default: %(default)s") parser.add_argument( "--activation", default=ACTIVATION, - help="Activation function for neural network layers") + help="Activation function for neural network layers. " + "Default: %(default)s") parser.add_argument( "--embedding-size", type=int, default=EMBEDDING_DIM, - help="Size of vector representations for embedding amino acids") + help="Size of vector representations for embedding amino acids. " + "Default: %(default)s") parser.add_argument( "--hidden-layer-size", type=int, default=HIDDEN_LAYER_SIZE, - help="Size of hidden neural network layer") + help="Size of hidden neural network layer. Default: %(default)s") parser.add_argument( "--dropout", type=float, default=DROPOUT_PROBABILITY, - help="Dropout probability after neural network layers") + help="Dropout probability after neural network layers. " + "Default: %(default)s") parser.add_argument( "--max-ic50", type=float, default=MAX_IC50, - help="Largest IC50 represented by neural network output") + help="Largest IC50 represented by neural network output. " + "Default: %(default)s") parser.add_argument( "--learning-rate", type=float, default=0.001, - help="Learning rate for training neural network") + help="Learning rate for training neural network. Default: %(default)s") return parser diff --git a/requirements.txt b/requirements.txt index d3011424..f1babbea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ numpy>= 1.7 pandas>=0.13.1 appdirs -theano +theano>=0.8.2 keras<1.0 h5py cherrypy diff --git a/script/create-combined-class1-dataset.py b/script/create-combined-class1-dataset.py index c8880de4..fbfad25a 100755 --- a/script/create-combined-class1-dataset.py +++ b/script/create-combined-class1-dataset.py @@ -41,7 +41,7 @@ IEDB_PICKLE_PATH = join(CLASS1_DATA_DIRECTORY, IEDB_PICKLE_FILENAME) KIM_2013_CSV_FILENAME = "bdata.20130222.mhci.public.1.txt" -parser = argparse.ArgumentParser() +parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument( "--ic50-fraction-tolerance", @@ -49,40 +49,46 @@ parser.add_argument( type=float, help=( "How much can the IEDB and NetMHCpan IC50 differ and still be" - " considered compatible (as a fraction of the NetMHCpan value)")) + " considered compatible (as a fraction of the NetMHCpan value). " + "Default: %(default)s")) parser.add_argument( "--min-assay-overlap-size", type=int, default=1, - help="Minimum number of entries overlapping between IEDB assay and NetMHCpan data") + help="Minimum number of entries overlapping between IEDB assay and " + "NetMHCpan data. Default: %(default)s") parser.add_argument( "--min-assay-fraction-same", type=float, - help="Minimum fraction of peptides whose IC50 values agree with the NetMHCpan data", + help="Minimum fraction of peptides whose IC50 values agree with the " + "NetMHCpan data. Default: %(default)s", default=0.9) parser.add_argument( "--iedb-pickle-path", default=IEDB_PICKLE_PATH, - help="Path to .pickle file containing dictionary of IEDB assay datasets") + help="Path to .pickle file containing dictionary of IEDB assay datasets. " + "Default: %(default)s") parser.add_argument( "--netmhcpan-csv-path", default=KIM_2013_CSV_FILENAME, - help="Path to CSV with NetMHCpan dataset from 2013 Peters paper") + help="Path to CSV with NetMHCpan dataset from 2013 Peters paper. " + "Default: %(default)s") parser.add_argument( "--output-dir", default=CLASS1_DATA_DIRECTORY, - help="Path to directory where output CSV should be written") + help="Path to directory where output CSV should be written. " + "Default: %(default)s") parser.add_argument( "--output-csv-filename", default=CLASS1_DATA_CSV_FILENAME, - help="Name of combined CSV file") + help="Name of combined CSV file. Default: %(default)s") parser.add_argument( "--extra-dataset-csv-path", diff --git a/script/create-iedb-class1-dataset.py b/script/create-iedb-class1-dataset.py index f6f147c1..d059909b 100755 --- a/script/create-iedb-class1-dataset.py +++ b/script/create-iedb-class1-dataset.py @@ -33,25 +33,27 @@ from mhcflurry.paths import CLASS1_DATA_DIRECTORY IEDB_SOURCE_FILENAME = "mhc_ligand_full.csv" PICKLE_OUTPUT_FILENAME = "iedb_human_class1_assay_datasets.pickle" -parser = argparse.ArgumentParser() +parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument( "--input-csv", default=IEDB_SOURCE_FILENAME, - help="CSV file with IEDB's MHC binding data") + help="CSV file with IEDB's MHC binding data. Default: %(default)s") parser.add_argument( "--output-dir", default=CLASS1_DATA_DIRECTORY, - help="Directory to write output pickle file") + help="Directory to write output pickle file. Default: %(default)s") parser.add_argument( "--output-pickle-filename", default=PICKLE_OUTPUT_FILENAME, - help="Path to .pickle file containing dictionary of IEDB assay datasets") + help="Path to .pickle file containing dictionary of IEDB assay datasets. " + "Default: %(default)s") parser.add_argument( "--alleles", + metavar="ALLELE", nargs="+", default=[], help="Restrict dataset to specified alleles") diff --git a/script/mhcflurry-train-class1-allele-specific-models.py b/script/mhcflurry-train-class1-allele-specific-models.py index 3995808d..dab69768 100755 --- a/script/mhcflurry-train-class1-allele-specific-models.py +++ b/script/mhcflurry-train-class1-allele-specific-models.py @@ -61,12 +61,14 @@ parser = argparse.ArgumentParser() parser.add_argument( "--binding-data-csv", default=CSV_PATH, - help="CSV file with 'mhc', 'peptide', 'peptide_length', 'meas' columns") + help="CSV file with 'mhc', 'peptide', 'peptide_length', 'meas' columns. " + "Default: %(default)s") parser.add_argument( "--output-dir", default=CLASS1_MODEL_DIRECTORY, - help="Output directory for allele-specific predictor HDF weights files") + help="Output directory for allele-specific predictor HDF weights files. " + "Default: %(default)s") parser.add_argument( "--overwrite", @@ -77,11 +79,15 @@ parser.add_argument( parser.add_argument( "--min-samples-per-allele", default=5, - help="Don't train predictors for alleles with fewer samples than this", + metavar="N", + help="Don't train predictors for alleles with fewer than N samples. " + "Default: %(default)s", type=int) parser.add_argument( "--alleles", + metavar="ALLELE", + help="Alleles to train", default=[], nargs="+", type=normalize_allele_name) diff --git a/setup.py b/setup.py index 382a5786..3078c409 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ if __name__ == '__main__': 'numpy>=1.7', 'pandas>=0.13.1', 'appdirs', - 'theano', + 'theano>=0.8.2', 'keras', 'fancyimpute', 'scikit-learn', -- GitLab