diff --git a/mhcflurry/data.py b/mhcflurry/data.py index fdb176f17efd9d4a288737a763ab3cf55ea4cf89..1ce28b2f8d3c52ec22db1161c3f638a04ba76af0 100644 --- a/mhcflurry/data.py +++ b/mhcflurry/data.py @@ -337,7 +337,8 @@ def load_allele_datasets( peptide_column_name=None, peptide_length_column_name="peptide_length", ic50_column_name="meas", - only_human=False): + only_human=False, + shuffle=True): """ Loads an IEDB dataset, extracts "hot-shot" encoding of fixed length peptides and log-transforms the IC50 measurement. Returns dictionary mapping allele diff --git a/test/data_10mer.csv b/test/data_10mer.csv new file mode 100644 index 0000000000000000000000000000000000000000..e24afab51d0b9078a89187a8df3578c21c7610b1 --- /dev/null +++ b/test/data_10mer.csv @@ -0,0 +1,2 @@ +peptide,mhc,meas +AAAAAAAAAA,HLA-A0201,400 \ No newline at end of file diff --git a/test/data_8mer.csv b/test/data_8mer.csv new file mode 100644 index 0000000000000000000000000000000000000000..7fbbcb5a33747145b683d899c0dc2b9fabfac175 --- /dev/null +++ b/test/data_8mer.csv @@ -0,0 +1,2 @@ +peptide,mhc,meas +AAAAAAAA,HLA-A0201,400 \ No newline at end of file diff --git a/test/data_9mer.csv b/test/data_9mer.csv new file mode 100644 index 0000000000000000000000000000000000000000..9953dd40bba9d6555d2f4baadbf4b6834a3503ff --- /dev/null +++ b/test/data_9mer.csv @@ -0,0 +1,2 @@ +peptide,mhc,meas +AAAAAAAAA,HLA-A0201,400 \ No newline at end of file diff --git a/test/test_load_allele_datasets.py b/test/test_load_allele_datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..0c64bc8a6e0db2cbbddf3d84682b4761125707ff --- /dev/null +++ b/test/test_load_allele_datasets.py @@ -0,0 +1,41 @@ +from mhcflurry.data import load_allele_datasets + +def test_load_allele_datasets_8mer(): + allele_dict = load_allele_datasets("data_8mer.csv") + assert len(allele_dict) == 1 + assert set(allele_dict.keys()) == {"HLA-A0201"} + dataset = allele_dict["HLA-A0201"] + print(dataset) + assert len(set(dataset.original_peptides)) == 1 + assert len(dataset.original_peptides) == 9 + assert len(dataset.peptides) == 9 + assert len(dataset.original_peptides[0]) == 8 + assert len(dataset.peptides[0]) == 9 + +def test_load_allele_datasets_9mer(): + allele_dict = load_allele_datasets("data_9mer.csv") + assert len(allele_dict) == 1 + assert set(allele_dict.keys()) == {"HLA-A0201"} + dataset = allele_dict["HLA-A0201"] + print(dataset) + assert len(dataset.original_peptides) == 1 + assert len(dataset.peptides) == 1 + assert len(dataset.original_peptides[0]) == 9 + assert dataset.original_peptides[0] == dataset.peptides[0] + +def test_load_allele_datasets_10mer(): + allele_dict = load_allele_datasets("data_10mer.csv") + assert len(allele_dict) == 1 + assert set(allele_dict.keys()) == {"HLA-A0201"} + dataset = allele_dict["HLA-A0201"] + print(dataset) + assert len(set(dataset.original_peptides)) == 1 + assert len(dataset.peptides) == 10, len(dataset.peptides) + assert len(dataset.original_peptides) == 10 + assert len(dataset.original_peptides[0]) == 10 + assert len(dataset.peptides[0]) == 9 + +if __name__ == "__main__": + test_load_allele_datasets_8mer() + test_load_allele_datasets_9mer() + test_load_allele_datasets_10mer()