Skip to content
Snippets Groups Projects
Commit e65d7a79 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

move

parent 7a688ba5
No related merge requests found
......@@ -5,7 +5,7 @@
set -e
set -x
DOWNLOAD_NAME=models_class1_pan
DOWNLOAD_NAME=models_class1_pan_unselected
SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation
SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH")
......@@ -30,46 +30,29 @@ mkdir models
cp $SCRIPT_DIR/generate_hyperparameters.py .
python generate_hyperparameters.py > hyperparameters.yaml
cp $SCRIPT_DIR/write_validation_data.py .
GPUS=$(nvidia-smi -L 2> /dev/null | wc -l) || GPUS=0
echo "Detected GPUS: $GPUS"
PROCESSORS=$(getconf _NPROCESSORS_ONLN)
echo "Detected processors: $PROCESSORS"
export PYTHONUNBUFFERED=1
VERBOSITY=1
mhcflurry-class1-train-pan-allele-models \
--data "$(mhcflurry-downloads path data_curated)/curated_training_data.with_mass_spec.csv.bz2" \
--allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \
--pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \
--held-out-measurements-per-allele-fraction-and-max 0.25 100 \
--ensemble-size 4 \
--hyperparameters hyperparameters.yaml \
--out-models-dir models-unselected.with_mass_spec \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--verbosity $VERBOSITY \
--num-jobs $GPUS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1
mhcflurry-class1-train-pan-allele-models \
--data "$(mhcflurry-downloads path data_curated)/curated_training_data.no_mass_spec.csv.bz2" \
--allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \
--pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \
--held-out-measurements-per-allele-fraction-and-max 0.25 100 \
--ensemble-size 4 \
--hyperparameters hyperparameters.yaml \
--out-models-dir models-unselected.no_mass_spec \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--verbosity $VERBOSITY \
--num-jobs $GPUS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1
for kind in with_mass_spec no_mass_spec
do
mhcflurry-class1-train-pan-allele-models \
--data "$(mhcflurry-downloads path data_curated)/curated_training_data.${kind}.csv.bz2" \
--allele-sequences "$(mhcflurry-downloads path allele_sequences)/allele_sequences.csv" \
--pretrain-data "$(mhcflurry-downloads path random_peptide_predictions)/predictions.csv.bz2" \
--held-out-measurements-per-allele-fraction-and-max 0.25 100 \
--ensemble-size 4 \
--hyperparameters hyperparameters.yaml \
--out-models-dir models.${kind} \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--verbosity 0 \
--num-jobs $GPUS --max-tasks-per-worker 1 --gpus $GPUS --max-workers-per-gpu 1
done
cp $SCRIPT_ABSOLUTE_PATH .
bzip2 LOG.txt
for i in $(ls LOG-worker.*.txt) ; do bzip2 $i ; done
tar -cjf "../${DOWNLOAD_NAME}.with_unselected.tar.bz2" *
echo "Created archive: $SCRATCH_DIR/${DOWNLOAD_NAME}.with_unselected.tar.bz2"
ls -d * | grep -v models-unselected | xargs -I {} tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" {}
tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
echo "Created archive: $SCRATCH_DIR/${DOWNLOAD_NAME}.tar.bz2"
......@@ -152,21 +152,21 @@ def run(argv=sys.argv[1:]):
if num_folds <= 1:
raise ValueError("Too few folds: ", num_folds)
#df = df.loc[
# (df.peptide.str.len() >= min_peptide_length) &
# (df.peptide.str.len() <= max_peptide_length)
#]
#print("Subselected to %d-%dmers: %s" % (
# min_peptide_length, max_peptide_length, str(df.shape)))
df = df.loc[
(df.peptide.str.len() >= min_peptide_length) &
(df.peptide.str.len() <= max_peptide_length)
]
print("Subselected to %d-%dmers: %s" % (
min_peptide_length, max_peptide_length, str(df.shape)))
print("Num folds: ", num_folds, "fraction included:")
print(df[fold_cols].mean())
# Allele names in data are assumed to be already normalized.
#df = df.loc[df.allele.isin(alleles)].dropna()
#print("Subselected to supported alleles: %s" % str(df.shape))
df = df.loc[df.allele.isin(alleles)].dropna()
print("Subselected to supported alleles: %s" % str(df.shape))
#print("Selected %d alleles: %s" % (len(alleles), ' '.join(alleles)))
print("Selected %d alleles: %s" % (len(alleles), ' '.join(alleles)))
metadata_dfs["model_selection_data"] = df
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment