diff --git a/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py b/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py index e6ed769b917b119d48b859626f43e6a97c02cb5c..8eeb45f208721a1a55267f8279a38b0f3e203e4f 100644 --- a/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py +++ b/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py @@ -106,7 +106,11 @@ def run(argv=sys.argv[1:]): alleles = [normalize_allele_name(a) for a in args.allele] alleles = sorted(set(alleles)) - peptides = pandas.read_csv(args.input_peptides).peptide.unique() + peptides = pandas.read_csv(args.input_peptides).peptide.drop_duplicates() + print("Filtering to valid peptides. Starting at: ", len(peptides)) + peptides = peptides[peptides.str.match("[ACDEFGHIKLMNPQRSTVWY]+")] + print("Filtered to: ", len(peptides)) + peptides = peptides.unique() num_peptides = len(peptides) print("Predictions for %d alleles x %d peptides." % ( @@ -137,7 +141,7 @@ def run(argv=sys.argv[1:]): GLOBAL_DATA["args"] = { 'verbose': args.verbosity > 0, 'model_kwargs': { - 'batch_size': args.prediction_batch_size, + 'batch_size': args.batch_size, } } @@ -239,7 +243,6 @@ def predict_for_allele( predictions = predictor.predict( peptides=chunk_peptides, allele=allele, - verbose=verbose, throw=False, model_kwargs=model_kwargs).astype('float32') if verbose: