From 300ad9400705bdd77464dff82537d845185d0ea6 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Wed, 25 Sep 2019 12:22:41 -0400
Subject: [PATCH] fix

---
 downloads-generation/data_curated/README.md         | 5 +++--
 downloads-generation/data_curated/curate_by_pmid.py | 4 ++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/downloads-generation/data_curated/README.md b/downloads-generation/data_curated/README.md
index 7ac75d20..25227bee 100644
--- a/downloads-generation/data_curated/README.md
+++ b/downloads-generation/data_curated/README.md
@@ -1,9 +1,10 @@
 # Combined training data
 
-This download contains the data used to train the production class1 MHCflurry models. This data is derived from a recent [IEDB](http://www.iedb.org/home_v3.php) export as well as the data from [Kim 2014](http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-241). 
+This download contains the data used to train the production class1 MHCflurry models. This data is derived from a recent [IEDB](http://www.iedb.org/home_v3.php) export as well as the data from [Kim 2014](http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-241), as well as a number of other sources.
 
 To generate this download run:
 
 ```
+pip install -r requirements.txt  # for the first time you generate this download
 ./GENERATE.sh
-```
\ No newline at end of file
+```
diff --git a/downloads-generation/data_curated/curate_by_pmid.py b/downloads-generation/data_curated/curate_by_pmid.py
index 4d478b9d..5d62f62b 100755
--- a/downloads-generation/data_curated/curate_by_pmid.py
+++ b/downloads-generation/data_curated/curate_by_pmid.py
@@ -229,7 +229,7 @@ def handle_pmid_26992070(*filenames):
         for num in ["1", "2"]:
             allele_info[
                 "HLA-%s %s" % (gene, num)
-            ] = "HLA-" + gene + allele_info["HLA-%s %s" % (gene, num)]
+            ] = "HLA-" + gene + "*" + allele_info["HLA-%s %s" % (gene, num)]
     cell_line_to_allele = allele_info.apply(" ".join, axis=1)
 
     sheets = {}
@@ -585,7 +585,7 @@ def handle_pmid_31495665(filename):
         "MAPTAC_DRB3*01:01_dm-": "mixed",
     }
 
-    df = pandas.read_excel(filename, sheetname="DataS1B")
+    df = pandas.read_excel(filename, sheet_name="DataS1B")
     results = []
     for sample_id in df.columns:
         if hla_type[sample_id] is None:
-- 
GitLab