Merge pull request #18 from hammerlab/small-fixes

Minor fixes

Merge pull request #18 from hammerlab/small-fixes
Minor fixes
00b17305 · Alex Rubinsteyn · 04591deb · eefe769e · 00b17305 · 00b17305
Commit 00b17305 authored 8 years ago by Alex Rubinsteyn
--- a/README.md
+++ b/README.md
@@ -19,10 +19,10 @@ Download, Normalize, and Combine Training Data:
 (make sure you have `wget` available, e.g. `brew install wget` on Mac OS X)
 ```
-scripts/download-iedb.sh
+script/download-iedb.sh
-scripts/download-peters-2013-dataset.sh
+script/download-peters-2013-dataset.sh
-scripts/create-iedb-class1-dataset.py
+script/create-iedb-class1-dataset.py
-scripts/create-combined-class1-dataset.py
+script/create-combined-class1-dataset.py
 ```
 ## Train Neural Network Models

--- a/mhcflurry/class1_allele_specific_hyperparameters.py
+++ b/mhcflurry/class1_allele_specific_hyperparameters.py
@@ -36,46 +36,50 @@ def add_hyperparameter_arguments_to_parser(parser):
        "--training-epochs",
        type=int,
        default=N_EPOCHS,
-        help="Number of training epochs")
+        help="Number of training epochs. Default: %(default)s")
    parser.add_argument(
        "--initialization",
        default=INITIALIZATION_METHOD,
-        help="Initialization for neural network weights")
+        help="Initialization for neural network weights Default: %(default)s")
    parser.add_argument(
        "--activation",
        default=ACTIVATION,
-        help="Activation function for neural network layers")
+        help="Activation function for neural network layers. "
+        "Default: %(default)s")
    parser.add_argument(
        "--embedding-size",
        type=int,
        default=EMBEDDING_DIM,
-        help="Size of vector representations for embedding amino acids")
+        help="Size of vector representations for embedding amino acids. "
+        "Default: %(default)s")
    parser.add_argument(
        "--hidden-layer-size",
        type=int,
        default=HIDDEN_LAYER_SIZE,
-        help="Size of hidden neural network layer")
+        help="Size of hidden neural network layer. Default: %(default)s")
    parser.add_argument(
        "--dropout",
        type=float,
        default=DROPOUT_PROBABILITY,
-        help="Dropout probability after neural network layers")
+        help="Dropout probability after neural network layers. "
+        "Default: %(default)s")
    parser.add_argument(
        "--max-ic50",
        type=float,
        default=MAX_IC50,
-        help="Largest IC50 represented by neural network output")
+        help="Largest IC50 represented by neural network output. "
+        "Default: %(default)s")
    parser.add_argument(
        "--learning-rate",
        type=float,
        default=0.001,
-        help="Learning rate for training neural network")
+        help="Learning rate for training neural network. Default: %(default)s")
    return parser
--- a/requirements.txt
+++ b/requirements.txt
 numpy>= 1.7
 pandas>=0.13.1
 appdirs
-theano
+theano>=0.8.2
 keras<1.0
 h5py
 cherrypy

--- a/script/create-combined-class1-dataset.py
+++ b/script/create-combined-class1-dataset.py
@@ -41,7 +41,7 @@ IEDB_PICKLE_PATH = join(CLASS1_DATA_DIRECTORY, IEDB_PICKLE_FILENAME)
 KIM_2013_CSV_FILENAME = "bdata.20130222.mhci.public.1.txt"
-parser = argparse.ArgumentParser()
+parser = argparse.ArgumentParser(usage=__doc__)
 parser.add_argument(
    "--ic50-fraction-tolerance",
@@ -49,40 +49,46 @@ parser.add_argument(
    type=float,
    help=(
        "How much can the IEDB and NetMHCpan IC50 differ and still be"
-        " considered compatible (as a fraction of the NetMHCpan value)"))
+        " considered compatible (as a fraction of the NetMHCpan value). "
+        "Default: %(default)s"))
 parser.add_argument(
    "--min-assay-overlap-size",
    type=int,
    default=1,
-    help="Minimum number of entries overlapping between IEDB assay and NetMHCpan data")
+    help="Minimum number of entries overlapping between IEDB assay and "
+    "NetMHCpan data. Default: %(default)s")
 parser.add_argument(
    "--min-assay-fraction-same",
    type=float,
-    help="Minimum fraction of peptides whose IC50 values agree with the NetMHCpan data",
+    help="Minimum fraction of peptides whose IC50 values agree with the "
+    "NetMHCpan data. Default: %(default)s",
    default=0.9)
 parser.add_argument(
    "--iedb-pickle-path",
    default=IEDB_PICKLE_PATH,
-    help="Path to .pickle file containing dictionary of IEDB assay datasets")
+    help="Path to .pickle file containing dictionary of IEDB assay datasets. "
+    "Default: %(default)s")
 parser.add_argument(
    "--netmhcpan-csv-path",
    default=KIM_2013_CSV_FILENAME,
-    help="Path to CSV with NetMHCpan dataset from 2013 Peters paper")
+    help="Path to CSV with NetMHCpan dataset from 2013 Peters paper. "
+    "Default: %(default)s")
 parser.add_argument(
    "--output-dir",
    default=CLASS1_DATA_DIRECTORY,
-    help="Path to directory where output CSV should be written")
+    help="Path to directory where output CSV should be written. "
+    "Default: %(default)s")
 parser.add_argument(
    "--output-csv-filename",
    default=CLASS1_DATA_CSV_FILENAME,
-    help="Name of combined CSV file")
+    help="Name of combined CSV file. Default: %(default)s")
 parser.add_argument(
    "--extra-dataset-csv-path",

--- a/script/create-iedb-class1-dataset.py
+++ b/script/create-iedb-class1-dataset.py
@@ -33,25 +33,27 @@ from mhcflurry.paths import CLASS1_DATA_DIRECTORY
 IEDB_SOURCE_FILENAME = "mhc_ligand_full.csv"
 PICKLE_OUTPUT_FILENAME = "iedb_human_class1_assay_datasets.pickle"
-parser = argparse.ArgumentParser()
+parser = argparse.ArgumentParser(usage=__doc__)
 parser.add_argument(
    "--input-csv",
    default=IEDB_SOURCE_FILENAME,
-    help="CSV file with IEDB's MHC binding data")
+    help="CSV file with IEDB's MHC binding data. Default: %(default)s")
 parser.add_argument(
    "--output-dir",
    default=CLASS1_DATA_DIRECTORY,
-    help="Directory to write output pickle file")
+    help="Directory to write output pickle file. Default: %(default)s")
 parser.add_argument(
    "--output-pickle-filename",
    default=PICKLE_OUTPUT_FILENAME,
-    help="Path to .pickle file containing dictionary of IEDB assay datasets")
+    help="Path to .pickle file containing dictionary of IEDB assay datasets. "
+    "Default: %(default)s")
 parser.add_argument(
    "--alleles",
+    metavar="ALLELE",
    nargs="+",
    default=[],
    help="Restrict dataset to specified alleles")

--- a/script/mhcflurry-train-class1-allele-specific-models.py
+++ b/script/mhcflurry-train-class1-allele-specific-models.py
@@ -61,12 +61,14 @@ parser = argparse.ArgumentParser()
 parser.add_argument(
    "--binding-data-csv",
    default=CSV_PATH,
-    help="CSV file with 'mhc', 'peptide', 'peptide_length', 'meas' columns")
+    help="CSV file with 'mhc', 'peptide', 'peptide_length', 'meas' columns. "
+    "Default: %(default)s")
 parser.add_argument(
    "--output-dir",
    default=CLASS1_MODEL_DIRECTORY,
-    help="Output directory for allele-specific predictor HDF weights files")
+    help="Output directory for allele-specific predictor HDF weights files. "
+    "Default: %(default)s")
 parser.add_argument(
    "--overwrite",
@@ -77,11 +79,15 @@ parser.add_argument(
 parser.add_argument(
    "--min-samples-per-allele",
    default=5,
-    help="Don't train predictors for alleles with fewer samples than this",
+    metavar="N",
+    help="Don't train predictors for alleles with fewer than N samples. "
+    "Default: %(default)s",
    type=int)
 parser.add_argument(
    "--alleles",
+    metavar="ALLELE",
+    help="Alleles to train",
    default=[],
    nargs="+",
    type=normalize_allele_name)

--- a/setup.py
+++ b/setup.py
@@ -64,7 +64,7 @@ if __name__ == '__main__':
            'numpy>=1.7',
            'pandas>=0.13.1',
            'appdirs',
-            'theano',
+            'theano>=0.8.2',
            'keras',
            'fancyimpute',
            'scikit-learn',