From 2dcc52c5008f71ca92cc29479c9910da48c78249 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Fri, 24 Jan 2020 21:09:31 -0500
Subject: [PATCH] fix

---
 downloads-generation/models_class1_presentation/GENERATE.sh   | 4 +---
 .../models_class1_presentation/make_benchmark.py              | 4 ++--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/downloads-generation/models_class1_presentation/GENERATE.sh b/downloads-generation/models_class1_presentation/GENERATE.sh
index d8998f41..369f70f1 100755
--- a/downloads-generation/models_class1_presentation/GENERATE.sh
+++ b/downloads-generation/models_class1_presentation/GENERATE.sh
@@ -92,9 +92,7 @@ mhcflurry-class1-train-presentation-models \
     --affinity-predictor "$(mhcflurry-downloads path models_class1_pan)/models.combined" \
     --cleavage-predictor-with-flanks "$(mhcflurry-downloads path models_class1_cleavage)/models.selected" \
     --cleavage-predictor-without-flanks "$(mhcflurry-downloads path models_class1_cleavage_variants)/models.selected.no_flank" \
-    --out-models-dir "$(pwd)/models" \
-    --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
-    $PARALLELISM_ARGS
+    --out-models-dir "$(pwd)/models"
 
 cp $SCRIPT_ABSOLUTE_PATH .
 bzip2 -f "$LOG"
diff --git a/downloads-generation/models_class1_presentation/make_benchmark.py b/downloads-generation/models_class1_presentation/make_benchmark.py
index 532474a6..6a14d7b6 100644
--- a/downloads-generation/models_class1_presentation/make_benchmark.py
+++ b/downloads-generation/models_class1_presentation/make_benchmark.py
@@ -53,6 +53,7 @@ parser.add_argument(
 def run():
     args = parser.parse_args(sys.argv[1:])
     hit_df = pandas.read_csv(args.hits)
+    original_sample_ids = hit_df.sample_id.unique()
     numpy.testing.assert_equal(hit_df.hit_id.nunique(), len(hit_df))
     hit_df = hit_df.loc[
         (hit_df.mhc_class == "I") &
@@ -74,8 +75,7 @@ def run():
         assert not (args.only_pmid and args.exclude_pmid)
 
         pmids = list(args.only_pmid) + list(args.exclude_pmid)
-        hit_pmids = hit_df.pmid.unique()
-        missing = [pmid for pmid in pmids if pmid not in hit_pmids]
+        missing = [pmid for pmid in pmids if pmid not in original_sample_ids]
         assert not missing, missing
 
         mask = hit_df.pmid.isin(pmids)
-- 
GitLab