diff --git a/downloads-generation/models_class1_allele_specific_single_kim2014_only/GENERATE.sh b/downloads-generation/models_class1_allele_specific_single_kim2014_only/GENERATE.sh new file mode 100644 index 0000000000000000000000000000000000000000..3d7eb35498d406522bd199d440bff48e43be6bf2 --- /dev/null +++ b/downloads-generation/models_class1_allele_specific_single_kim2014_only/GENERATE.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +if [[ $# -eq 0 ]] ; then + echo 'WARNING: This script is intended to be called with additional arguments to pass to mhcflurry-class1-allele-specific-cv-and-train' + echo 'At minimum you probably want to pass --dask-scheduler <IP:PORT> as training many models on one node is extremely ' + echo 'slow.' +fi + +set -e +set -x + +DOWNLOAD_NAME=models_class1_allele_specific_single_kim2014_only +SCRATCH_DIR=/tmp/mhcflurry-downloads-generation +SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")" +SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH") + +mkdir -p "$SCRATCH_DIR" +rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME" +mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME" + +# Send stdout and stderr to a logfile included with the archive. +exec > >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt") +exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2) + +# Log some environment info +date +pip freeze +git rev-parse HEAD +git status + +cd $SCRATCH_DIR/$DOWNLOAD_NAME + +mkdir models + +cp $SCRIPT_DIR/models.py $SCRIPT_DIR/imputer.json . +python models.py > models.json + +time mhcflurry-class1-allele-specific-cv-and-train \ + --model-architectures models.json \ + --imputer-description imputer.json \ + --train-data "$(mhcflurry-downloads path data_kim2014)/bdata.2009.mhci.public.1.txt" \ + --test-data "$(mhcflurry-downloads path data_kim2014)/bdata.2013.mhci.public.blind.1.txt" \ + --min-samples-per-allele 70 \ + --out-cv-results cv.csv \ + --out-production-results production.csv \ + --out-models models \ + --verbose \ + "$@" + +cp $SCRIPT_ABSOLUTE_PATH . +tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" * + +echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2" diff --git a/downloads-generation/models_class1_allele_specific_single_kim2014_only/README.md b/downloads-generation/models_class1_allele_specific_single_kim2014_only/README.md new file mode 100644 index 0000000000000000000000000000000000000000..6cecfebb82cf745063e5244b36877980801cb818 --- /dev/null +++ b/downloads-generation/models_class1_allele_specific_single_kim2014_only/README.md @@ -0,0 +1,4 @@ +# Class I allele specific models (single) trained and tested in Kim 2014 dataset + +This is a reimplementation of the analysis in [Predicting Peptide-MHC Binding Affinities With Imputed Training Data](http://biorxiv.org/content/early/2016/05/22/054775). + diff --git a/downloads-generation/models_class1_allele_specific_single_kim2014_only/imputer.json b/downloads-generation/models_class1_allele_specific_single_kim2014_only/imputer.json new file mode 100644 index 0000000000000000000000000000000000000000..f761431606102dd65adadb5dcdcf036a966bee9c --- /dev/null +++ b/downloads-generation/models_class1_allele_specific_single_kim2014_only/imputer.json @@ -0,0 +1,8 @@ +{ + "imputation_method_name": "mice", + "n_burn_in": 5, + "n_imputations": 50, + "n_nearest_columns": 25, + "min_observations_per_peptide": 5, + "min_observations_per_allele": 100 +} diff --git a/downloads-generation/models_class1_allele_specific_single_kim2014_only/models.py b/downloads-generation/models_class1_allele_specific_single_kim2014_only/models.py new file mode 100644 index 0000000000000000000000000000000000000000..6375cd4510bfebadd4df529a6884c1eb1632f162 --- /dev/null +++ b/downloads-generation/models_class1_allele_specific_single_kim2014_only/models.py @@ -0,0 +1,16 @@ +import sys +from mhcflurry.class1_allele_specific.train import HYPERPARAMETER_DEFAULTS +import json + +models = HYPERPARAMETER_DEFAULTS.models_grid( + #impute=[False, True], + impute=[False], + activation=["tanh"], + layer_sizes=[[12], [64], [128]], + embedding_output_dim=[8, 32, 64], + dropout_probability=[0, .1, .25], + # fraction_negative=[0, .1, .2], + n_training_epochs=[250]) + +sys.stderr.write("Models: %d\n" % len(models)) +print(json.dumps(models, indent=4))