Skip to content
Snippets Groups Projects
Commit 836456d8 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

add models_class1 download (now with mass-spec)

parent bbfd350d
No related merge requests found
#!/bin/bash
#
# Train standard MHCflurry Class I models.
# Calls mhcflurry-class1-train-allele-specific-models on curated training data
# using the hyperparameters in "hyperparameters.yaml".
#
set -e
set -x
DOWNLOAD_NAME=models_class1
SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation
SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH")
mkdir -p "$SCRATCH_DIR"
rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME"
mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME"
# Send stdout and stderr to a logfile included with the archive.
exec > >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt")
exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2)
# Log some environment info
date
pip freeze
git status
cd $SCRATCH_DIR/$DOWNLOAD_NAME
mkdir models
cp $SCRIPT_DIR/hyperparameters.yaml .
time mhcflurry-class1-train-allele-specific-models \
--data "$(mhcflurry-downloads path data_curated)/curated_training_data.with_mass_spec.csv.bz2" \
--hyperparameters hyperparameters.yaml \
--out-models-dir models \
--percent-rank-calibration-num-peptides-per-length 1000000 \
--min-measurements-per-allele 75
cp $SCRIPT_ABSOLUTE_PATH .
bzip2 LOG.txt
tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2"
# Class I allele-specific models (ensemble)
This download contains trained MHC Class I MHCflurry models.
To generate this download run:
```
./GENERATE.sh
```
\ No newline at end of file
[{
##########################################
# ENSEMBLE SIZE
##########################################
"n_models": 8,
##########################################
# OPTIMIZATION
##########################################
"max_epochs": 500,
"patience": 20,
"early_stopping": true,
"validation_split": 0.1,
"minibatch_size": 512,
"loss": "custom:mse_with_inequalities",
##########################################
# RANDOM NEGATIVE PEPTIDES
##########################################
"random_negative_rate": 0.2,
"random_negative_constant": 25,
"random_negative_affinity_min": 20000.0,
"random_negative_affinity_max": 50000.0,
##########################################
# PEPTIDE REPRESENTATION
##########################################
# One of "one-hot", "embedding", or "BLOSUM62".
"peptide_amino_acid_encoding": "BLOSUM62",
"use_embedding": false, # maintained for backward compatability
"embedding_output_dim": 8, # only used if using embedding
"kmer_size": 15,
##########################################
# NEURAL NETWORK ARCHITECTURE
##########################################
"locally_connected_layers": [
{
"filters": 8,
"activation": "tanh",
"kernel_size": 3
}
],
"activation": "relu",
"output_activation": "sigmoid",
"layer_sizes": [16],
"dense_layer_l1_regularization": 0.001,
"batch_normalization": false,
"dropout_probability": 0.0,
}]
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment