diff --git a/downloads-generation/data_curated/curate_ms_by_pmid.py b/downloads-generation/data_curated/curate_ms_by_pmid.py index a8f312dc88aef0bd2c3d03e4444f0ad39cbd4ffe..99a269611f9860ef51593801c0043f647e46a29b 100755 --- a/downloads-generation/data_curated/curate_ms_by_pmid.py +++ b/downloads-generation/data_curated/curate_ms_by_pmid.py @@ -202,7 +202,7 @@ def handle_pmid_25576301(filename): result_df["format"] = "multiallelic" allele_map = { - 'Fib': "HLA-A*03:01 HLA-A*23:01 HLA-B*08:01 HLA-B*15:18 HLA-C*07:02 HLA-C*07:04", + 'Fib': "HLA-A*03:01 HLA-A*23:01 HLA-B*08:01 HLA-B*15:18 HLA-C*07:02 HLA-C*07:04", 'HCC1937': "HLA-A*23:01 HLA-A*24:02 HLA-B*07:02 HLA-B*40:01 HLA-C*03:04 HLA-C*07:02", 'SupB15WT': None, # four digit alleles unknown, will drop sample 'SupB15RT': None, @@ -1114,6 +1114,7 @@ def run(): ms_df = pandas.concat(ms_dfs, ignore_index=True, sort=False) ms_df["cell_line"] = ms_df["cell_line"].fillna("") + ms_df["hla"] = ms_df["hla"].str.strip().str.replace(r'\s+', ' ') sample_table = ms_df[ ["sample_id", "pmid", "expression_dataset", "cell_line", "sample_type"] diff --git a/mhcflurry/downloads.yml b/mhcflurry/downloads.yml index 9af7375631a75883bb13909665e1b4db215e567a..16a4d83e5228119c5ae9078d02c5201f38709a9a 100644 --- a/mhcflurry/downloads.yml +++ b/mhcflurry/downloads.yml @@ -58,7 +58,7 @@ releases: default: false - name: data_mass_spec_annotated - url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_annotated.20191220.tar.bz2 + url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_annotated.20191226.tar.bz2 default: false - name: data_references