From 836456d8fe16224cd80039722ba3afc78a524040 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Thu, 25 Jan 2018 19:34:25 -0500 Subject: [PATCH] add models_class1 download (now with mass-spec) --- .../models_class1/GENERATE.sh | 45 +++++++++++++++++ downloads-generation/models_class1/README.md | 9 ++++ .../models_class1/hyperparameters.yaml | 50 +++++++++++++++++++ 3 files changed, 104 insertions(+) create mode 100755 downloads-generation/models_class1/GENERATE.sh create mode 100644 downloads-generation/models_class1/README.md create mode 100644 downloads-generation/models_class1/hyperparameters.yaml diff --git a/downloads-generation/models_class1/GENERATE.sh b/downloads-generation/models_class1/GENERATE.sh new file mode 100755 index 00000000..a22249fc --- /dev/null +++ b/downloads-generation/models_class1/GENERATE.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# +# Train standard MHCflurry Class I models. +# Calls mhcflurry-class1-train-allele-specific-models on curated training data +# using the hyperparameters in "hyperparameters.yaml". +# +set -e +set -x + +DOWNLOAD_NAME=models_class1 +SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation +SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")" +SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH") + +mkdir -p "$SCRATCH_DIR" +rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME" +mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME" + +# Send stdout and stderr to a logfile included with the archive. +exec > >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt") +exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2) + +# Log some environment info +date +pip freeze +git status + +cd $SCRATCH_DIR/$DOWNLOAD_NAME + +mkdir models + +cp $SCRIPT_DIR/hyperparameters.yaml . + +time mhcflurry-class1-train-allele-specific-models \ + --data "$(mhcflurry-downloads path data_curated)/curated_training_data.with_mass_spec.csv.bz2" \ + --hyperparameters hyperparameters.yaml \ + --out-models-dir models \ + --percent-rank-calibration-num-peptides-per-length 1000000 \ + --min-measurements-per-allele 75 + +cp $SCRIPT_ABSOLUTE_PATH . +bzip2 LOG.txt +tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" * + +echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2" diff --git a/downloads-generation/models_class1/README.md b/downloads-generation/models_class1/README.md new file mode 100644 index 00000000..ce784b40 --- /dev/null +++ b/downloads-generation/models_class1/README.md @@ -0,0 +1,9 @@ +# Class I allele-specific models (ensemble) + +This download contains trained MHC Class I MHCflurry models. + +To generate this download run: + +``` +./GENERATE.sh +``` \ No newline at end of file diff --git a/downloads-generation/models_class1/hyperparameters.yaml b/downloads-generation/models_class1/hyperparameters.yaml new file mode 100644 index 00000000..545f8d1d --- /dev/null +++ b/downloads-generation/models_class1/hyperparameters.yaml @@ -0,0 +1,50 @@ +[{ +########################################## +# ENSEMBLE SIZE +########################################## +"n_models": 8, + +########################################## +# OPTIMIZATION +########################################## +"max_epochs": 500, +"patience": 20, +"early_stopping": true, +"validation_split": 0.1, +"minibatch_size": 512, +"loss": "custom:mse_with_inequalities", + +########################################## +# RANDOM NEGATIVE PEPTIDES +########################################## +"random_negative_rate": 0.2, +"random_negative_constant": 25, +"random_negative_affinity_min": 20000.0, +"random_negative_affinity_max": 50000.0, + +########################################## +# PEPTIDE REPRESENTATION +########################################## +# One of "one-hot", "embedding", or "BLOSUM62". +"peptide_amino_acid_encoding": "BLOSUM62", +"use_embedding": false, # maintained for backward compatability +"embedding_output_dim": 8, # only used if using embedding +"kmer_size": 15, + +########################################## +# NEURAL NETWORK ARCHITECTURE +########################################## +"locally_connected_layers": [ + { + "filters": 8, + "activation": "tanh", + "kernel_size": 3 + } +], +"activation": "relu", +"output_activation": "sigmoid", +"layer_sizes": [16], +"dense_layer_l1_regularization": 0.001, +"batch_normalization": false, +"dropout_probability": 0.0, +}] -- GitLab