From 7b6c4dcf04a0fb0486660f627d1a2b4b0f2b8f23 Mon Sep 17 00:00:00 2001 From: Alex Rubinsteyn <alex.rubinsteyn@gmail.com> Date: Tue, 3 May 2016 09:35:54 -0400 Subject: [PATCH] added unit tests for load_allele_datasets --- mhcflurry/data.py | 3 ++- test/data_10mer.csv | 2 ++ test/data_8mer.csv | 2 ++ test/data_9mer.csv | 2 ++ test/test_load_allele_datasets.py | 41 +++++++++++++++++++++++++++++++ 5 files changed, 49 insertions(+), 1 deletion(-) create mode 100644 test/data_10mer.csv create mode 100644 test/data_8mer.csv create mode 100644 test/data_9mer.csv create mode 100644 test/test_load_allele_datasets.py diff --git a/mhcflurry/data.py b/mhcflurry/data.py index fdb176f1..1ce28b2f 100644 --- a/mhcflurry/data.py +++ b/mhcflurry/data.py @@ -337,7 +337,8 @@ def load_allele_datasets( peptide_column_name=None, peptide_length_column_name="peptide_length", ic50_column_name="meas", - only_human=False): + only_human=False, + shuffle=True): """ Loads an IEDB dataset, extracts "hot-shot" encoding of fixed length peptides and log-transforms the IC50 measurement. Returns dictionary mapping allele diff --git a/test/data_10mer.csv b/test/data_10mer.csv new file mode 100644 index 00000000..e24afab5 --- /dev/null +++ b/test/data_10mer.csv @@ -0,0 +1,2 @@ +peptide,mhc,meas +AAAAAAAAAA,HLA-A0201,400 \ No newline at end of file diff --git a/test/data_8mer.csv b/test/data_8mer.csv new file mode 100644 index 00000000..7fbbcb5a --- /dev/null +++ b/test/data_8mer.csv @@ -0,0 +1,2 @@ +peptide,mhc,meas +AAAAAAAA,HLA-A0201,400 \ No newline at end of file diff --git a/test/data_9mer.csv b/test/data_9mer.csv new file mode 100644 index 00000000..9953dd40 --- /dev/null +++ b/test/data_9mer.csv @@ -0,0 +1,2 @@ +peptide,mhc,meas +AAAAAAAAA,HLA-A0201,400 \ No newline at end of file diff --git a/test/test_load_allele_datasets.py b/test/test_load_allele_datasets.py new file mode 100644 index 00000000..0c64bc8a --- /dev/null +++ b/test/test_load_allele_datasets.py @@ -0,0 +1,41 @@ +from mhcflurry.data import load_allele_datasets + +def test_load_allele_datasets_8mer(): + allele_dict = load_allele_datasets("data_8mer.csv") + assert len(allele_dict) == 1 + assert set(allele_dict.keys()) == {"HLA-A0201"} + dataset = allele_dict["HLA-A0201"] + print(dataset) + assert len(set(dataset.original_peptides)) == 1 + assert len(dataset.original_peptides) == 9 + assert len(dataset.peptides) == 9 + assert len(dataset.original_peptides[0]) == 8 + assert len(dataset.peptides[0]) == 9 + +def test_load_allele_datasets_9mer(): + allele_dict = load_allele_datasets("data_9mer.csv") + assert len(allele_dict) == 1 + assert set(allele_dict.keys()) == {"HLA-A0201"} + dataset = allele_dict["HLA-A0201"] + print(dataset) + assert len(dataset.original_peptides) == 1 + assert len(dataset.peptides) == 1 + assert len(dataset.original_peptides[0]) == 9 + assert dataset.original_peptides[0] == dataset.peptides[0] + +def test_load_allele_datasets_10mer(): + allele_dict = load_allele_datasets("data_10mer.csv") + assert len(allele_dict) == 1 + assert set(allele_dict.keys()) == {"HLA-A0201"} + dataset = allele_dict["HLA-A0201"] + print(dataset) + assert len(set(dataset.original_peptides)) == 1 + assert len(dataset.peptides) == 10, len(dataset.peptides) + assert len(dataset.original_peptides) == 10 + assert len(dataset.original_peptides[0]) == 10 + assert len(dataset.peptides[0]) == 9 + +if __name__ == "__main__": + test_load_allele_datasets_8mer() + test_load_allele_datasets_9mer() + test_load_allele_datasets_10mer() -- GitLab