From 300ad9400705bdd77464dff82537d845185d0ea6 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Wed, 25 Sep 2019 12:22:41 -0400 Subject: [PATCH] fix --- downloads-generation/data_curated/README.md | 5 +++-- downloads-generation/data_curated/curate_by_pmid.py | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/downloads-generation/data_curated/README.md b/downloads-generation/data_curated/README.md index 7ac75d20..25227bee 100644 --- a/downloads-generation/data_curated/README.md +++ b/downloads-generation/data_curated/README.md @@ -1,9 +1,10 @@ # Combined training data -This download contains the data used to train the production class1 MHCflurry models. This data is derived from a recent [IEDB](http://www.iedb.org/home_v3.php) export as well as the data from [Kim 2014](http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-241). +This download contains the data used to train the production class1 MHCflurry models. This data is derived from a recent [IEDB](http://www.iedb.org/home_v3.php) export as well as the data from [Kim 2014](http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-241), as well as a number of other sources. To generate this download run: ``` +pip install -r requirements.txt # for the first time you generate this download ./GENERATE.sh -``` \ No newline at end of file +``` diff --git a/downloads-generation/data_curated/curate_by_pmid.py b/downloads-generation/data_curated/curate_by_pmid.py index 4d478b9d..5d62f62b 100755 --- a/downloads-generation/data_curated/curate_by_pmid.py +++ b/downloads-generation/data_curated/curate_by_pmid.py @@ -229,7 +229,7 @@ def handle_pmid_26992070(*filenames): for num in ["1", "2"]: allele_info[ "HLA-%s %s" % (gene, num) - ] = "HLA-" + gene + allele_info["HLA-%s %s" % (gene, num)] + ] = "HLA-" + gene + "*" + allele_info["HLA-%s %s" % (gene, num)] cell_line_to_allele = allele_info.apply(" ".join, axis=1) sheets = {} @@ -585,7 +585,7 @@ def handle_pmid_31495665(filename): "MAPTAC_DRB3*01:01_dm-": "mixed", } - df = pandas.read_excel(filename, sheetname="DataS1B") + df = pandas.read_excel(filename, sheet_name="DataS1B") results = [] for sample_id in df.columns: if hla_type[sample_id] is None: -- GitLab