From 5618cf96060c91d3121d7a420235333d122e47bc Mon Sep 17 00:00:00 2001
From: Alex Rubinsteyn <alex.rubinsteyn@gmail.com>
Date: Tue, 2 Feb 2016 19:00:39 -0500
Subject: [PATCH] results weren't being split by allele

---
 .../matrix-completion-hyperparameter-search.py      | 13 +++++++------
 scripts/create-combined-class1-dataset.py           |  4 ++--
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/experiments/matrix-completion-hyperparameter-search.py b/experiments/matrix-completion-hyperparameter-search.py
index eaefef90..7fb41891 100755
--- a/experiments/matrix-completion-hyperparameter-search.py
+++ b/experiments/matrix-completion-hyperparameter-search.py
@@ -97,13 +97,13 @@ parser.add_argument(
 
 parser.add_argument(
     "--second-hidden-layer-sizes",
-    default=[0, 50],
+    default=[0],
     type=parse_int_list)
 
 
 parser.add_argument(
     "--dropouts",
-    default=[0.0, 0.25],
+    default=[0.0],
     type=parse_float_list)
 
 parser.add_argument(
@@ -194,6 +194,7 @@ if __name__ == "__main__":
 
     scores = ScoreSet(
         index=[
+            "allele",
             "dropout_probability",
             "embedding_dim_size",
             "hidden_layer_size1",
@@ -250,8 +251,8 @@ if __name__ == "__main__":
                             embedding_input_dim=21 if args.unknown_amino_acids else 20,
                         )
                         predictors[key] = predictor
-                        initial_weights[key] = predictor.model.get_weights()
-                        initial_optimizer_states[key] = predictor.model.optimizer.get_state()
+                        initial_weights[key] = predictor.model.get_weights().copy()
+                        initial_optimizer_states[key] = predictor.model.optimizer.get_state().copy()
 
     # want at least 5 samples in each fold of CV
     # to make meaningful estimates of accuracy
@@ -368,7 +369,7 @@ if __name__ == "__main__":
             training_sample_weights = 1.0 / np.array(training_counts)
             Y_train = np.array([
                 train_dict[p] for p in training_row_peptides])
-            for key, predictor in predictors.items():
+            for key, predictor in sorted(predictors.items()):
 
                 print("\n-----")
                 print(
@@ -400,7 +401,7 @@ if __name__ == "__main__":
                     y_pred=y_pred,
                     max_ic50=args.max_ic50)
                 scores.add_many(
-                    key,
+                    ("%s," % allele) + key,
                     mae=mae,
                     tau=tau,
                     f1_score=f1_score,
diff --git a/scripts/create-combined-class1-dataset.py b/scripts/create-combined-class1-dataset.py
index 486e8fe3..094b6f5b 100755
--- a/scripts/create-combined-class1-dataset.py
+++ b/scripts/create-combined-class1-dataset.py
@@ -137,7 +137,7 @@ if __name__ == "__main__":
     for (allele, count) in new_allele_counts.most_common():
         print("%s: %d" % (allele, count))
     print("Combined DataFrame size: %d (+%d)" % (
-            len(combined_df),
-            len(combined_df) - len(nielsen_data)))
+        len(combined_df),
+        len(combined_df) - len(nielsen_data)))
     print("Writing %s..." % args.output_csv_path)
     combined_df.to_csv(args.output_csv_path, index=False)
-- 
GitLab