diff --git a/mhcflurry/package_metadata.py b/mhcflurry/package_metadata.py index ebb027f3d3b96d218831e3b466215e0f0f60005b..b61541487b806f901562668b39423482dc38a3b7 100644 --- a/mhcflurry/package_metadata.py +++ b/mhcflurry/package_metadata.py @@ -1,2 +1,2 @@ -__version__ = "0.0.6" +__version__ = "0.0.7" diff --git a/script/create-combined-class1-dataset.py b/script/create-combined-class1-dataset.py index fbfad25aafc6e9a9811e7767b0dd527b93388fd5..b75fbca603294db6c134d61288f84cf7da36d312 100755 --- a/script/create-combined-class1-dataset.py +++ b/script/create-combined-class1-dataset.py @@ -168,6 +168,15 @@ if __name__ == "__main__": combined_df = pd.DataFrame( combined_columns, columns=["species", "mhc", "peptide", "peptide_length", "meas"]) + + # filter out post-translation modifications and peptides with unknown + # residues + modified_peptide_mask = combined_df.peptide.str.contains("+") + n_modified = modified_peptide_mask.sum() + if n_modified > 0: + print("Dropping %d modified peptides" % n_modified) + combined_df = combined_df[~modified_peptide_mask] + print("New entry allele distribution") for (allele, count) in new_allele_counts.most_common(): print("%s: %d" % (allele, count))