Skip to content
Snippets Groups Projects
Commit e65d7a79 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

move

parent 7a688ba5
No related merge requests found
...@@ -5,7 +5,7 @@ ...@@ -5,7 +5,7 @@
set -e set -e
set -x set -x
DOWNLOAD_NAME=models_class1_pan DOWNLOAD_NAME=models_class1_pan_unselected
SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation
SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")" SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH") SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH")
...@@ -30,46 +30,29 @@ mkdir models ...@@ -30,46 +30,29 @@ mkdir models
cp $SCRIPT_DIR/generate_hyperparameters.py . cp $SCRIPT_DIR/generate_hyperparameters.py .
python generate_hyperparameters.py > hyperparameters.yaml python generate_hyperparameters.py > hyperparameters.yaml
cp $SCRIPT_DIR/write_validation_data.py .
GPUS=$(nvidia-smi -L 2> /dev/null | wc -l) || GPUS=0 GPUS=$(nvidia-smi -L 2> /dev/null | wc -l) || GPUS=0
echo "Detected GPUS: $GPUS" echo "Detected GPUS: $GPUS"
PROCESSORS=$(getconf _NPROCESSORS_ONLN) PROCESSORS=$(getconf _NPROCESSORS_ONLN)
echo "Detected processors: $PROCESSORS" echo "Detected processors: $PROCESSORS"
export PYTHONUNBUFFERED=1 for kind in with_mass_spec no_mass_spec
VERBOSITY=1 do
mhcflurry-class1-train-pan-allele-models \
mhcflurry-class1-train-pan-allele-models \ --data "$(mhcflurry-downloads path data_curated)/curated_training_data.${kind}.csv.bz2" \
--data "$(mhcflurry-downloads path data_curated)/curated_training_data.with_mass_spec.csv.bz2" \ --allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \
--allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \ --pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \
--pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \ --held-out-measurements-per-allele-fraction-and-max 0.25 100 \
--held-out-measurements-per-allele-fraction-and-max 0.25 100 \ --ensemble-size 4 \
--ensemble-size 4 \ --hyperparameters hyperparameters.yaml \
--hyperparameters hyperparameters.yaml \ --out-models-dir models.${kind} \
--out-models-dir models-unselected.with_mass_spec \ --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ --verbosity 0 \
--verbosity $VERBOSITY \ --num-jobs $GPUS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1
--num-jobs $GPUS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1 done
mhcflurry-class1-train-pan-allele-models \
--data "$(mhcflurry-downloads path data_curated)/curated_training_data.no_mass_spec.csv.bz2" \
--allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \
--pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \
--held-out-measurements-per-allele-fraction-and-max 0.25 100 \
--ensemble-size 4 \
--hyperparameters hyperparameters.yaml \
--out-models-dir models-unselected.no_mass_spec \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--verbosity $VERBOSITY \
--num-jobs $GPUS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1
cp $SCRIPT_ABSOLUTE_PATH . cp $SCRIPT_ABSOLUTE_PATH .
bzip2 LOG.txt bzip2 LOG.txt
for i in $(ls LOG-worker.*.txt) ; do bzip2 $i ; done for i in $(ls LOG-worker.*.txt) ; do bzip2 $i ; done
tar -cjf "../${DOWNLOAD_NAME}.with_unselected.tar.bz2" * tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
echo "Created archive: $SCRATCH_DIR/${DOWNLOAD_NAME}.with_unselected.tar.bz2"
ls -d * | grep -v models-unselected | xargs -I {} tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" {}
echo "Created archive: $SCRATCH_DIR/${DOWNLOAD_NAME}.tar.bz2" echo "Created archive: $SCRATCH_DIR/${DOWNLOAD_NAME}.tar.bz2"
...@@ -152,21 +152,21 @@ def run(argv=sys.argv[1:]): ...@@ -152,21 +152,21 @@ def run(argv=sys.argv[1:]):
if num_folds <= 1: if num_folds <= 1:
raise ValueError("Too few folds: ", num_folds) raise ValueError("Too few folds: ", num_folds)
#df = df.loc[ df = df.loc[
# (df.peptide.str.len() >= min_peptide_length) & (df.peptide.str.len() >= min_peptide_length) &
# (df.peptide.str.len() <= max_peptide_length) (df.peptide.str.len() <= max_peptide_length)
#] ]
#print("Subselected to %d-%dmers: %s" % ( print("Subselected to %d-%dmers: %s" % (
# min_peptide_length, max_peptide_length, str(df.shape))) min_peptide_length, max_peptide_length, str(df.shape)))
print("Num folds: ", num_folds, "fraction included:") print("Num folds: ", num_folds, "fraction included:")
print(df[fold_cols].mean()) print(df[fold_cols].mean())
# Allele names in data are assumed to be already normalized. # Allele names in data are assumed to be already normalized.
#df = df.loc[df.allele.isin(alleles)].dropna() df = df.loc[df.allele.isin(alleles)].dropna()
#print("Subselected to supported alleles: %s" % str(df.shape)) print("Subselected to supported alleles: %s" % str(df.shape))
#print("Selected %d alleles: %s" % (len(alleles), ' '.join(alleles))) print("Selected %d alleles: %s" % (len(alleles), ' '.join(alleles)))
metadata_dfs["model_selection_data"] = df metadata_dfs["model_selection_data"] = df
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment