Skip to content
Snippets Groups Projects
Commit 52909241 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

Rename data_mass_spec_benchmark to data_predictions

parent e14c82ac
No related branches found
No related tags found
No related merge requests found
Showing
with 207 additions and 19 deletions
......@@ -70,7 +70,7 @@ do
cp $SCRIPT_DIR/make_benchmark.py .
time python make_benchmark.py \
--hits "$(pwd)/hits_with_tpm.csv.bz2" \
--proteome-peptides "$(mhcflurry-downloads path data_mass_spec_benchmark)/proteome_peptides.all.csv.bz2" \
--proteome-peptides "$(mhcflurry-downloads path data_predictions)/proteome_peptides.all.csv.bz2" \
--decoys-per-hit 110 \
--exclude-train-data "$EXCLUDE_TRAIN_DATA" \
--only-format MONOALLELIC \
......@@ -95,7 +95,7 @@ do
cp $SCRIPT_DIR/make_benchmark.py .
time python make_benchmark.py \
--hits "$(pwd)/hits_with_tpm.csv.bz2" \
--proteome-peptides "$(mhcflurry-downloads path data_mass_spec_benchmark)/proteome_peptides.all.csv.bz2" \
--proteome-peptides "$(mhcflurry-downloads path data_predictions)/proteome_peptides.all.csv.bz2" \
--decoys-per-hit 110 \
--exclude-train-data "$EXCLUDE_TRAIN_DATA" \
--only-format MULTIALLELIC \
......
......@@ -77,7 +77,7 @@ def run():
if 'netmhcpan4.ba' in args.predictors:
precomputed_dfs['netmhcpan4.ba'] = load_results(
get_path("data_mass_spec_benchmark", "predictions/all.netmhcpan4.ba"),
get_path("data_predictions", "predictions/all.netmhcpan4.ba"),
result_df=pandas.DataFrame(
dtype=numpy.float32,
index=peptides,
......@@ -87,7 +87,7 @@ def run():
if 'netmhcpan4.el' in args.predictors:
precomputed_dfs['netmhcpan4.el'] = load_results(
get_path("data_mass_spec_benchmark", "predictions/all.netmhcpan4.el"),
get_path("data_predictions", "predictions/all.netmhcpan4.el"),
result_df=pandas.DataFrame(
dtype=numpy.float32,
index=peptides,
......@@ -96,7 +96,7 @@ def run():
if 'mixmhcpred' in args.predictors:
precomputed_dfs['mixmhcpred'] = load_results(
get_path("data_mass_spec_benchmark", "predictions/all.mixmhcpred"),
get_path("data_predictions", "predictions/all.mixmhcpred"),
result_df=pandas.DataFrame(
dtype=numpy.float32,
index=peptides,
......
......@@ -16,7 +16,7 @@
#
# SECOND ARGUMENT: whether to reuse predictions from existing downloaded data
# reuse-all - reuse predictions and peptide / allele lists from existing
# downloaded data_mass_spec_benchmark.
# downloaded data_predictions.
# reuse-none - fully self-contained run; do not reuse anything.
# reuse-predictions - reuse predictions but not peptide or allele lists. Any
# new peptides not already included will be run.
......@@ -26,7 +26,7 @@
set -e
set -x
DOWNLOAD_NAME=data_mass_spec_benchmark
DOWNLOAD_NAME=data_predictions
SCRATCH_DIR=${TMPDIR-/tmp}/mhcflurry-downloads-generation
SCRIPT_ABSOLUTE_PATH="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/$(basename "${BASH_SOURCE[0]}")"
SCRIPT_DIR=$(dirname "$SCRIPT_ABSOLUTE_PATH")
......
......@@ -79,7 +79,7 @@ else
cp $SCRIPT_DIR/make_benchmark.py .
time python make_benchmark.py \
--hits "$(pwd)/hits_with_tpm.csv.bz2" \
--proteome-peptides "$(mhcflurry-downloads path data_mass_spec_benchmark)/proteome_peptides.all.csv.bz2" \
--proteome-peptides "$(mhcflurry-downloads path data_predictions)/proteome_peptides.all.csv.bz2" \
--decoys-per-hit 2 \
--exclude-pmid 31844290 31495665 31154438 \
--only-format MULTIALLELIC \
......@@ -94,14 +94,14 @@ else
mhcflurry-class1-train-presentation-models \
--data "$(pwd)/train_data.csv.bz2" \
--affinity-predictor "$(mhcflurry-downloads path models_class1_pan)/models.combined" \
--processing-predictor-with-flanks "$(mhcflurry-downloads path models_class1_processing)/models" \
--processing-predictor-without-flanks "$(mhcflurry-downloads path models_class1_processing_variants)/models.selected.no_flank" \
--processing-predictor-with-flanks "$(mhcflurry-downloads path models_class1_processing)/models.selected.with_flanks" \
--processing-predictor-without-flanks "$(mhcflurry-downloads path models_class1_processing)/models.selected.no_flank" \
--out-models-dir "$(pwd)/models"
fi
cp "$(mhcflurry-downloads path models_class1_pan)/models.combined/train_data.csv.bz2" models/affinity_predictor_train_data.csv.bz2
cp "$(mhcflurry-downloads path models_class1_processing)/models/train_data.csv.bz2" models/processing_predictor_train_data.csv.bz2
cp "$(mhcflurry-downloads path models_class1_processing_variants)/models.selected.no_flank/train_data.csv.bz2" models/processing_predictor_no_flank_train_data.csv.bz2
cp "$(mhcflurry-downloads path models_class1_processing)/models.selected.with_flanks/train_data.csv.bz2" models/processing_predictor_train_data.csv.bz2
cp "$(mhcflurry-downloads path models_class1_processing)/models.selected.no_flank/train_data.csv.bz2" models/processing_predictor_no_flank_train_data.csv.bz2
cp $SCRIPT_ABSOLUTE_PATH .
bzip2 -f "$LOG"
......
......@@ -91,8 +91,8 @@ else
cp $SCRIPT_DIR/make_train_data.py .
time python make_train_data.py \
--hits "$(pwd)/hits_with_tpm.csv.bz2" \
--predictions "$(mhcflurry-downloads path data_mass_spec_benchmark)/predictions/all.mhcflurry.combined" \
--proteome-peptides "$(mhcflurry-downloads path data_mass_spec_benchmark)/proteome_peptides.all.csv.bz2" \
--predictions "$(mhcflurry-downloads path data_predictions)/predictions/all.mhcflurry.combined" \
--proteome-peptides "$(mhcflurry-downloads path data_predictions)/proteome_peptides.all.csv.bz2" \
--ppv-multiplier 100 \
--hit-multiplier-to-take 2 \
--out "$(pwd)/train_data.csv"
......
../models_class1_processing/cluster_submit_script_header.mssm_hpc.lsf
\ No newline at end of file
#!/bin/bash
#BSUB -J MHCf-{work_item_num} # Job name
#BSUB -P acc_nkcancer # allocation account or Unix group
#BSUB -q gpu # queue
#BSUB -R rusage[ngpus_excl_p=1] # 1 exclusive GPU
#BSUB -R span[hosts=1] # one node
#BSUB -n 1 # number of compute cores
#BSUB -W 10:00 # walltime in HH:MM
#BSUB -R rusage[mem=20000] # mb memory requested
#BSUB -o {work_dir}/%J.stdout # output log (%J : JobID)
#BSUB -eo {work_dir}/STDERR # error log
#BSUB -L /bin/bash # Initialize the execution environment
#
set -e
set -x
echo "Subsequent stderr output redirected to stdout" >&2
exec 2>&1
export TMPDIR=/local/JOBS/mhcflurry-{work_item_num}
export PATH=$HOME/.conda/envs/py36b/bin/:$PATH
export PYTHONUNBUFFERED=1
export KMP_SETTINGS=1
free -m
module add cuda/10.0.130
module list
export CUDNN_HOME=/hpc/users/odonnt02/oss/cudnn/cuda
export LD_LIBRARY_PATH=$CUDNN_HOME/lib64:$LD_LIBRARY_PATH
export CMAKE_LIBRARY_PATH=$CUDNN_HOME/lib64:$CMAKE_LIBRARY_PATH
export INCLUDE_PATH=$CUDNN_HOME/include:$INCLUDE_PATH
export C_INCLUDE_PATH=$CUDNN_HOME/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=$CUDNN_HOME/include:$CPLUS_INCLUDE_PATH
export CMAKE_INCLUDE_PATH=$CUDNN_HOME/include:$CMAKE_INCLUDE_PATH
python -c 'import tensorflow as tf ; print("GPU AVAILABLE" if tf.test.is_gpu_available() else "GPU NOT AVAILABLE")'
env
cd {work_dir}
../models_class1_processing/generate_hyperparameters.py
\ No newline at end of file
"""
Generate grid of hyperparameters
"""
from __future__ import print_function
from sys import stdout, stderr
from copy import deepcopy
from yaml import dump
base_hyperparameters = dict(
convolutional_filters=64,
convolutional_kernel_size=8,
convolutional_kernel_l1_l2=(0.00, 0.0),
flanking_averages=True,
n_flank_length=15,
c_flank_length=15,
post_convolutional_dense_layer_sizes=[],
minibatch_size=512,
dropout_rate=0.5,
convolutional_activation="relu",
patience=20,
learning_rate=0.001)
grid = []
def hyperparrameters_grid():
for learning_rate in [0.001]:
for convolutional_activation in ["tanh", "relu"]:
for convolutional_filters in [256, 512]:
for flanking_averages in [True]:
for convolutional_kernel_size in [11, 13, 15, 17]:
for l1 in [0.0, 1e-6]:
for s in [[8], [16]]:
for d in [0.3, 0.5]:
new = deepcopy(base_hyperparameters)
new["learning_rate"] = learning_rate
new["convolutional_activation"] = convolutional_activation
new["convolutional_filters"] = convolutional_filters
new["flanking_averages"] = flanking_averages
new["convolutional_kernel_size"] = convolutional_kernel_size
new["convolutional_kernel_l1_l2"] = (l1, 0.0)
new["post_convolutional_dense_layer_sizes"] = s
new["dropout_rate"] = d
yield new
for new in hyperparrameters_grid():
if new not in grid:
grid.append(new)
print("Hyperparameters grid size: %d" % len(grid), file=stderr)
dump(grid, stdout)
......@@ -48,7 +48,7 @@ releases:
url: https://github.com/openvax/mhcflurry/releases/download/1.6.0/data_evaluation.20200209.tar.bz2
default: false
- name: data_mass_spec_benchmark
- name: data_predictions
part_urls:
- https://github.com/openvax/mhcflurry/releases/download/pre-1.7.0/data_mass_spec_benchmark.20200428.tar.bz2.part.aa
- https://github.com/openvax/mhcflurry/releases/download/pre-1.7.0/data_mass_spec_benchmark.20200428.tar.bz2.part.ab
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment