diff --git a/downloads-generation/models_class1_pan_refined/GENERATE.sh b/downloads-generation/models_class1_pan_refined/GENERATE.sh index 3cae8f14e4bf6b5aade9eb2607a6000a78d62353..4674cdc236e3399d6a45b514426647921a2f1269 100755 --- a/downloads-generation/models_class1_pan_refined/GENERATE.sh +++ b/downloads-generation/models_class1_pan_refined/GENERATE.sh @@ -89,11 +89,16 @@ fi # ******************************************************** echo "Beginning production run" -time python make_multiallelic_training_data.py \ - --hits "$(mhcflurry-downloads path data_mass_spec_annotated)/annotated_ms.csv.bz2" \ - --expression "$(mhcflurry-downloads path data_curated)/rna_expression.csv.bz2" \ - --decoys-per-hit 1 \ - --out train.multiallelic.csv +if [ -f "$SCRIPT_DIR/train.multiallelic.csv" ]; then + echo "Using existing multiallelic train data." + cp "$SCRIPT_DIR/train.multiallelic.csv" . +else + time python make_multiallelic_training_data.py \ + --hits "$(mhcflurry-downloads path data_mass_spec_annotated)/annotated_ms.csv.bz2" \ + --expression "$(mhcflurry-downloads path data_curated)/rna_expression.csv.bz2" \ + --decoys-per-hit 1 \ + --out train.multiallelic.csv +fi ALLELE_LIST=$(bzcat "$MONOALLELIC_TRAIN" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq) ALLELE_LIST+=$(cat train.multiallelic.csv | cut -f 7 -d , | gerp -v hla | uniq | tr ' ' '\n' | sort | uniq)