diff --git a/README.md b/README.md index dbbeb1f1305746d92e3fc3198621f5b75029e657..b9dfadcc124ddd4734826551dcf2c23364031802 100644 --- a/README.md +++ b/README.md @@ -19,10 +19,10 @@ Download, Normalize, and Combine Training Data: (make sure you have `wget` available, e.g. `brew install wget` on Mac OS X) ``` -scripts/download-iedb.sh -scripts/download-peters-2013-dataset.sh -scripts/create-iedb-class1-dataset.py -scripts/create-combined-class1-dataset.py +script/download-iedb.sh +script/download-peters-2013-dataset.sh +script/create-iedb-class1-dataset.py +script/create-combined-class1-dataset.py ``` ## Train Neural Network Models diff --git a/mhcflurry/class1_allele_specific_hyperparameters.py b/mhcflurry/class1_allele_specific_hyperparameters.py index e1e0c36650d3f3e8dba54a68725de0e3de81301d..33e15cdba88b2b7ea3dd5a52ef7a4925606682c0 100644 --- a/mhcflurry/class1_allele_specific_hyperparameters.py +++ b/mhcflurry/class1_allele_specific_hyperparameters.py @@ -36,46 +36,50 @@ def add_hyperparameter_arguments_to_parser(parser): "--training-epochs", type=int, default=N_EPOCHS, - help="Number of training epochs") + help="Number of training epochs. Default: %(default)s") parser.add_argument( "--initialization", default=INITIALIZATION_METHOD, - help="Initialization for neural network weights") + help="Initialization for neural network weights Default: %(default)s") parser.add_argument( "--activation", default=ACTIVATION, - help="Activation function for neural network layers") + help="Activation function for neural network layers. " + "Default: %(default)s") parser.add_argument( "--embedding-size", type=int, default=EMBEDDING_DIM, - help="Size of vector representations for embedding amino acids") + help="Size of vector representations for embedding amino acids. " + "Default: %(default)s") parser.add_argument( "--hidden-layer-size", type=int, default=HIDDEN_LAYER_SIZE, - help="Size of hidden neural network layer") + help="Size of hidden neural network layer. Default: %(default)s") parser.add_argument( "--dropout", type=float, default=DROPOUT_PROBABILITY, - help="Dropout probability after neural network layers") + help="Dropout probability after neural network layers. " + "Default: %(default)s") parser.add_argument( "--max-ic50", type=float, default=MAX_IC50, - help="Largest IC50 represented by neural network output") + help="Largest IC50 represented by neural network output. " + "Default: %(default)s") parser.add_argument( "--learning-rate", type=float, default=0.001, - help="Learning rate for training neural network") + help="Learning rate for training neural network. Default: %(default)s") return parser diff --git a/requirements.txt b/requirements.txt index d30114246bb4a1bb9b2bbf1dfb51e61c6d586486..f1babbeae2cfeb74c412b1c5741d4bce989a45fa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ numpy>= 1.7 pandas>=0.13.1 appdirs -theano +theano>=0.8.2 keras<1.0 h5py cherrypy diff --git a/script/create-combined-class1-dataset.py b/script/create-combined-class1-dataset.py index c8880de4de331c57f236624c46c8702ca7c84aa7..fbfad25aafc6e9a9811e7767b0dd527b93388fd5 100755 --- a/script/create-combined-class1-dataset.py +++ b/script/create-combined-class1-dataset.py @@ -41,7 +41,7 @@ IEDB_PICKLE_PATH = join(CLASS1_DATA_DIRECTORY, IEDB_PICKLE_FILENAME) KIM_2013_CSV_FILENAME = "bdata.20130222.mhci.public.1.txt" -parser = argparse.ArgumentParser() +parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument( "--ic50-fraction-tolerance", @@ -49,40 +49,46 @@ parser.add_argument( type=float, help=( "How much can the IEDB and NetMHCpan IC50 differ and still be" - " considered compatible (as a fraction of the NetMHCpan value)")) + " considered compatible (as a fraction of the NetMHCpan value). " + "Default: %(default)s")) parser.add_argument( "--min-assay-overlap-size", type=int, default=1, - help="Minimum number of entries overlapping between IEDB assay and NetMHCpan data") + help="Minimum number of entries overlapping between IEDB assay and " + "NetMHCpan data. Default: %(default)s") parser.add_argument( "--min-assay-fraction-same", type=float, - help="Minimum fraction of peptides whose IC50 values agree with the NetMHCpan data", + help="Minimum fraction of peptides whose IC50 values agree with the " + "NetMHCpan data. Default: %(default)s", default=0.9) parser.add_argument( "--iedb-pickle-path", default=IEDB_PICKLE_PATH, - help="Path to .pickle file containing dictionary of IEDB assay datasets") + help="Path to .pickle file containing dictionary of IEDB assay datasets. " + "Default: %(default)s") parser.add_argument( "--netmhcpan-csv-path", default=KIM_2013_CSV_FILENAME, - help="Path to CSV with NetMHCpan dataset from 2013 Peters paper") + help="Path to CSV with NetMHCpan dataset from 2013 Peters paper. " + "Default: %(default)s") parser.add_argument( "--output-dir", default=CLASS1_DATA_DIRECTORY, - help="Path to directory where output CSV should be written") + help="Path to directory where output CSV should be written. " + "Default: %(default)s") parser.add_argument( "--output-csv-filename", default=CLASS1_DATA_CSV_FILENAME, - help="Name of combined CSV file") + help="Name of combined CSV file. Default: %(default)s") parser.add_argument( "--extra-dataset-csv-path", diff --git a/script/create-iedb-class1-dataset.py b/script/create-iedb-class1-dataset.py index f6f147c10be20a07820d1ca92c4b7d7ffd0315cf..d059909bf072e5f8e3acd80ff69d4c30f07cee2b 100755 --- a/script/create-iedb-class1-dataset.py +++ b/script/create-iedb-class1-dataset.py @@ -33,25 +33,27 @@ from mhcflurry.paths import CLASS1_DATA_DIRECTORY IEDB_SOURCE_FILENAME = "mhc_ligand_full.csv" PICKLE_OUTPUT_FILENAME = "iedb_human_class1_assay_datasets.pickle" -parser = argparse.ArgumentParser() +parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument( "--input-csv", default=IEDB_SOURCE_FILENAME, - help="CSV file with IEDB's MHC binding data") + help="CSV file with IEDB's MHC binding data. Default: %(default)s") parser.add_argument( "--output-dir", default=CLASS1_DATA_DIRECTORY, - help="Directory to write output pickle file") + help="Directory to write output pickle file. Default: %(default)s") parser.add_argument( "--output-pickle-filename", default=PICKLE_OUTPUT_FILENAME, - help="Path to .pickle file containing dictionary of IEDB assay datasets") + help="Path to .pickle file containing dictionary of IEDB assay datasets. " + "Default: %(default)s") parser.add_argument( "--alleles", + metavar="ALLELE", nargs="+", default=[], help="Restrict dataset to specified alleles") diff --git a/script/mhcflurry-train-class1-allele-specific-models.py b/script/mhcflurry-train-class1-allele-specific-models.py index 3995808d6499a47a8d2d0e1abc875f149c04b506..dab6976812c4b17e7741fa590dc3a183aaffd7a5 100755 --- a/script/mhcflurry-train-class1-allele-specific-models.py +++ b/script/mhcflurry-train-class1-allele-specific-models.py @@ -61,12 +61,14 @@ parser = argparse.ArgumentParser() parser.add_argument( "--binding-data-csv", default=CSV_PATH, - help="CSV file with 'mhc', 'peptide', 'peptide_length', 'meas' columns") + help="CSV file with 'mhc', 'peptide', 'peptide_length', 'meas' columns. " + "Default: %(default)s") parser.add_argument( "--output-dir", default=CLASS1_MODEL_DIRECTORY, - help="Output directory for allele-specific predictor HDF weights files") + help="Output directory for allele-specific predictor HDF weights files. " + "Default: %(default)s") parser.add_argument( "--overwrite", @@ -77,11 +79,15 @@ parser.add_argument( parser.add_argument( "--min-samples-per-allele", default=5, - help="Don't train predictors for alleles with fewer samples than this", + metavar="N", + help="Don't train predictors for alleles with fewer than N samples. " + "Default: %(default)s", type=int) parser.add_argument( "--alleles", + metavar="ALLELE", + help="Alleles to train", default=[], nargs="+", type=normalize_allele_name) diff --git a/setup.py b/setup.py index 382a5786e33c789363c9c5e4b7dd9d6101096186..3078c409d8f7048aa5abada27bd14469ed5ffa89 100644 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ if __name__ == '__main__': 'numpy>=1.7', 'pandas>=0.13.1', 'appdirs', - 'theano', + 'theano>=0.8.2', 'keras', 'fancyimpute', 'scikit-learn',