From 1826aae687ab7fd150c7beb70143193bafa04408 Mon Sep 17 00:00:00 2001
From: Tim O'Donnell <timodonnell@gmail.com>
Date: Wed, 1 Jan 2020 16:24:45 -0500
Subject: [PATCH] Start 1.6.0

---
 downloads-generation/data_curated/GENERATE.sh |  10 +-
 downloads-generation/data_curated/curate.py   |  27 +++--
 mhcflurry/downloads.yml                       | 102 +++++++++++++++++-
 3 files changed, 127 insertions(+), 12 deletions(-)

diff --git a/downloads-generation/data_curated/GENERATE.sh b/downloads-generation/data_curated/GENERATE.sh
index 2c404381..6f9a515f 100755
--- a/downloads-generation/data_curated/GENERATE.sh
+++ b/downloads-generation/data_curated/GENERATE.sh
@@ -57,7 +57,6 @@ bzip2 rna_expression.csv
 
 rm -rf ms
 
-# With mass-spec data
 time python curate.py \
     --data-iedb \
         "$(mhcflurry-downloads path data_iedb)/mhc_ligand_full.csv.bz2" \
@@ -70,6 +69,15 @@ time python curate.py \
     --out-affinity-csv curated_training_data.affinity.csv \
     --out-mass-spec-csv curated_training_data.mass_spec.csv
 
+time python curate.py \
+    --data-iedb \
+        "$(mhcflurry-downloads path data_iedb)/mhc_ligand_full.csv.bz2" \
+    --data-kim2014 \
+        "$(mhcflurry-downloads path data_published)/bdata.20130222.mhci.public.1.txt" \
+    --data-systemhc-atlas \
+        "$(mhcflurry-downloads path data_systemhcatlas)/data.csv.bz2" \
+    --out-csv curated_training_data.no_additional_ms.csv
+
 for i in $(ls *.csv)
 do
     bzip2 $i
diff --git a/downloads-generation/data_curated/curate.py b/downloads-generation/data_curated/curate.py
index e14680b6..9c56c2f7 100755
--- a/downloads-generation/data_curated/curate.py
+++ b/downloads-generation/data_curated/curate.py
@@ -252,6 +252,7 @@ def load_data_additional_ms(filename):
     df["measurement_type"] = "qualitative"
     df["measurement_kind"] = "mass_spec"
     df["measurement_source"] = "MS:pmid:" + df["original_pmid"].map(str)
+    df["original_allele"] = ""
     return df
 
 
@@ -311,18 +312,26 @@ def run():
     print("Measurement kind:")
     print(df.measurement_kind.value_counts())
 
-    df.to_csv(args.out_csv, index=False)
-    print("Wrote: %s" % os.path.abspath(args.out_csv))
+    print("Measurement source / kind:")
+    print(
+        df.groupby(
+            ["measurement_source", "measurement_kind"]
+        ).peptide.count().sort_values())
 
-    if args.out_affinity_csv:
-        df.loc[df.measurement_kind == "affinity"].to_csv(
-            args.out_affinity_csv, index=False)
-        print("Wrote: %s" % os.path.abspath(args.out_affinity_csv))
+    def write(write_df, filename):
+        filename = os.path.abspath(filename)
+        write_df.to_csv(filename, index=False)
+        print("Wrote [%d lines]: %s" % (len(write_df), filename))
 
+    write(df, args.out_csv)
+    if args.out_affinity_csv:
+        write(
+            df.loc[df.measurement_kind == "affinity"],
+            args.out_affinity_csv)
     if args.out_mass_spec_csv:
-        df.loc[df.measurement_kind == "mass_spec"].to_csv(
-            args.out_mass_spec_csv, index=False)
-        print("Wrote: %s" % os.path.abspath(args.out_mass_spec_csv))
+        write(
+            df.loc[df.measurement_kind == "mass_spec"],
+            args.out_mass_spec_csv)
 
 
 if __name__ == '__main__':
diff --git a/mhcflurry/downloads.yml b/mhcflurry/downloads.yml
index 940e0a40..a9eb33bd 100644
--- a/mhcflurry/downloads.yml
+++ b/mhcflurry/downloads.yml
@@ -8,7 +8,7 @@
 # by name, the downloads with "default=true" are downloaded.
 
 # This should usually be the latest release.
-current-release: 1.5.0
+current-release: 1.6.0
 
 # An integer indicating what models the current MHCflurry code base is compatible
 # with. Increment this integer when changes are made to MHCflurry that would break
@@ -17,6 +17,104 @@ current-compatibility-version: 2
 
 # Add new releases here as they are made.
 releases:
+    1.6.0:
+        compatibility-version: 2
+        downloads:
+            - name: models_class1_pan
+              url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/models_class1_pan.20200101.tar.bz2
+              default: false
+
+            - name: models_class1_pan_unselected
+              part_urls:
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/models_class1_pan_unselected.20200101.tar.bz2.part.aa
+              default: false
+
+            - name: models_class1_pan_refined
+              url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/models_class1_pan_refined.20191212c.tar.bz2
+              default: false
+
+            - name: models_class1_pan_variants
+              part_urls:
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/models_class1_pan_variants.20200101.tar.bz2.part.aa
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/models_class1_pan_variants.20200101.tar.bz2.part.ab
+              default: false
+
+            - name: data_mass_spec_benchmark
+              part_urls:
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.aa
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.ab
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.ac
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.ad
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.ae
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.af
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.ag
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.ah
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.ai
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.aj
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.ak
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.al
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.am
+                - https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_benchmark.20191225.tar.bz2.part.an
+              default: false
+
+            - name: data_mass_spec_annotated
+              url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_mass_spec_annotated.20191226.tar.bz2
+              default: false
+
+            - name: data_references
+              url: https://github.com/openvax/mhcflurry/releases/download/pre-1.4.0/data_references.20190927.tar.bz2
+              default: false
+
+            - name: data_iedb
+              url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_iedb.20191220.tar.bz2
+              default: false
+
+            - name: data_systemhcatlas
+              url: http://github.com/openvax/mhcflurry/releases/download/pan-dev1/data_systemhcatlas.20190506.tar.bz2
+              default: false
+
+            - name: allele_sequences
+              url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/allele_sequences.20191231.tar.bz2
+              default: false
+
+            - name: random_peptide_predictions
+              url: http://github.com/openvax/mhcflurry/releases/download/pan-dev1/random_peptide_predictions.20190506.tar.bz2
+              default: false
+
+            - name: data_published
+              url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_published.20191220.tar.bz2
+              default: false
+
+            - name: data_curated
+              url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_curated.20191226.tar.bz2
+              default: true
+
+            # Older downloads
+            - name: models_class1
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.2/models_class1.20180225.tar.bz2
+              default: true
+
+            - name: models_class1_selected_no_mass_spec
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.2/models_class1_selected_no_mass_spec.20180225.tar.bz2
+              default: false
+
+            - name: models_class1_unselected
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.2/models_class1_unselected.20180221.tar.bz2
+              default: false
+
+            - name: models_class1_trained_with_mass_spec
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.2.1/models_class1_trained_with_mass_spec.20180228.tar.bz2
+              default: false
+
+            - name: models_class1_unselected_with_mass_spec
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.2.1/models_class1_unselected_with_mass_spec.20180227.tar.bz2
+              default: false
+
+            - name: models_class1_minimal
+              url: http://github.com/openvax/mhcflurry/releases/download/pre-1.2/models_class1_minimal.20180226.tar.bz2
+              default: false
+
+
     1.5.0:
         compatibility-version: 2
         downloads:
@@ -74,7 +172,7 @@ releases:
               default: false
 
             - name: allele_sequences
-              url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/allele_sequences.20191231.tar.bz2
+              url: http://github.com/openvax/mhcflurry/releases/download/pan-dev1/allele_sequences.20190506.tar.bz2
               default: false
 
             - name: random_peptide_predictions
-- 
GitLab