diff --git a/downloads-generation/models_class1_pan/GENERATE.sh b/downloads-generation/models_class1_pan/GENERATE.sh index 112c03b723bdaf56ba68faba9acf2803264b0c85..8e9ce2089a3dee33d313bc030b220b01688fca28 100755 --- a/downloads-generation/models_class1_pan/GENERATE.sh +++ b/downloads-generation/models_class1_pan/GENERATE.sh @@ -114,7 +114,7 @@ do cp "$MODELS_DIR/train_data.csv.bz2" "models.${kind}/train_data.csv.bz2" # We are now calibrating all alleles. - # Previously had argument: --allele $ALLELE_LIST \ + # Previously had argument: --allele $ALLELE_LIST time mhcflurry-calibrate-percentile-ranks \ --models-dir models.${kind} \ --match-amino-acid-distribution-data "$MODELS_DIR/train_data.csv.bz2" \ diff --git a/mhcflurry/class1_presentation_predictor.py b/mhcflurry/class1_presentation_predictor.py index d95ca0c9637c871ea8a8fc62727d8a640cddb01b..69a531c61faf5df8a6d6cc574af8d0ca226b8221 100644 --- a/mhcflurry/class1_presentation_predictor.py +++ b/mhcflurry/class1_presentation_predictor.py @@ -217,6 +217,7 @@ class Class1PresentationPredictor(object): model.intercept_ = row.intercept model.coef_ = numpy.expand_dims( row[self.model_inputs].values, axis=0) + model.classes_ = numpy.array([0, 1]) else: model = self._models_cache[name] return model diff --git a/mhcflurry/select_pan_allele_models_command.py b/mhcflurry/select_pan_allele_models_command.py index 92a0170238277e681ce00fa4f26d61d503e31c46..c8d5132af513218787a8aa34882a4c5ab57722a5 100644 --- a/mhcflurry/select_pan_allele_models_command.py +++ b/mhcflurry/select_pan_allele_models_command.py @@ -258,10 +258,7 @@ def run(argv=sys.argv[1:]): pprint(result) fold_num = result['fold_num'] (all_models_for_fold, _) = folds_to_predictors[fold_num] - models = [ - all_models_for_fold[i] - for i in result['selected_indices'] - ] + models = result['selected_models'] summary_df = result['summary'].copy() summary_df.index = summary_df.index.map( lambda idx: all_models_for_fold[idx]) @@ -271,7 +268,6 @@ def run(argv=sys.argv[1:]): len(models), fold_num, result['selected_indices'])) models_by_fold[fold_num] = models for model in models: - model.clear_allele_representations() result_predictor.add_pan_allele_model(model) summary_df = pandas.concat(summary_dfs, ignore_index=False) @@ -363,6 +359,9 @@ def model_select( break assert selected + selected_models = [models[i] for i in selected] + for model in selected_models: + model.clear_allele_representations() summary_df = pandas.Series(individual_model_scores)[ numpy.arange(len(models)) @@ -372,6 +371,7 @@ def model_select( return { 'fold_num': fold_num, 'selected_indices': selected, + 'selected_models': selected_models, 'summary': summary_df, # indexed by model index }