Skip to content
Snippets Groups Projects
Commit e6bef3d1 authored by Alex Rubinsteyn's avatar Alex Rubinsteyn
Browse files

added sanity checking to imputation helpers

parent df5ac484
No related branches found
No related tags found
No related merge requests found
# Copyright (c) 2015. Mount Sinai School of Medicine # Copyright (c) 2016. Mount Sinai School of Medicine
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,6 +12,11 @@ ...@@ -12,6 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import (
print_function,
division,
absolute_import,
)
import numpy as np import numpy as np
......
# Copyright (c) 2015. Mount Sinai School of Medicine # Copyright (c) 2016. Mount Sinai School of Medicine
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
......
# Copyright (c) 2015. Mount Sinai School of Medicine # Copyright (c) 2016. Mount Sinai School of Medicine
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
......
# Copyright (c) 2015. Mount Sinai School of Medicine # Copyright (c) 2016. Mount Sinai School of Medicine
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
......
# Copyright (c) 2015. Mount Sinai School of Medicine # Copyright (c) 2016. Mount Sinai School of Medicine
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -18,6 +18,8 @@ from __future__ import ( ...@@ -18,6 +18,8 @@ from __future__ import (
absolute_import, absolute_import,
) )
from collections import defaultdict from collections import defaultdict
import logging
import numpy as np import numpy as np
from fancyimpute.dictionary_helpers import ( from fancyimpute.dictionary_helpers import (
dense_matrix_from_nested_dictionary dense_matrix_from_nested_dictionary
...@@ -34,6 +36,21 @@ from .data import ( ...@@ -34,6 +36,21 @@ from .data import (
create_allele_data_from_peptide_to_ic50_dict, create_allele_data_from_peptide_to_ic50_dict,
) )
def _check_dense_pMHC_array(X, peptide_list, allele_list):
if len(peptide_list) != len(set(peptide_list)):
raise ValueError("Duplicate peptides detected in peptide list")
if len(allele_list) != len(set(allele_list)):
raise ValueError("Duplicate alleles detected in allele list")
n_rows, n_cols = X.shape
if n_rows != len(peptide_list):
raise ValueError(
"Expected dense array with shape %s to have %d rows" % (
X.shape, len(peptide_list)))
if n_cols != len(allele_list):
raise ValueError(
"Expected dense array with shape %s to have %d columns" % (
X.shape, len(allele_list)))
def prune_dense_matrix_and_labels( def prune_dense_matrix_and_labels(
X, X,
peptide_list, peptide_list,
...@@ -89,6 +106,7 @@ def prune_dense_matrix_and_labels( ...@@ -89,6 +106,7 @@ def prune_dense_matrix_and_labels(
X = X[:, keep_allele_indices] X = X[:, keep_allele_indices]
observed_mask = observed_mask[:, keep_allele_indices] observed_mask = observed_mask[:, keep_allele_indices]
allele_list = [allele_list[i] for i in keep_allele_indices] allele_list = [allele_list[i] for i in keep_allele_indices]
_check_dense_pMHC_array(X, peptide_list, allele_list)
return X, peptide_list, allele_list return X, peptide_list, allele_list
...@@ -135,6 +153,8 @@ def create_incomplete_dense_pMHC_matrix( ...@@ -135,6 +153,8 @@ def create_incomplete_dense_pMHC_matrix(
X, peptide_list, allele_list = \ X, peptide_list, allele_list = \
dense_matrix_from_nested_dictionary(peptide_to_allele_to_affinity_dict) dense_matrix_from_nested_dictionary(peptide_to_allele_to_affinity_dict)
_check_dense_pMHC_array(X, peptide_list, allele_list)
return prune_dense_matrix_and_labels( return prune_dense_matrix_and_labels(
X, X,
peptide_list, peptide_list,
...@@ -187,6 +207,7 @@ def create_imputed_datasets( ...@@ -187,6 +207,7 @@ def create_imputed_datasets(
# if all entries in the matrix are already filled in then don't # if all entries in the matrix are already filled in then don't
# try using an imputation algorithm since it might raise an # try using an imputation algorithm since it might raise an
# exception. # exception.
logging.warn("No missing values, using original data instead of imputation")
X_complete = X_incomplete X_complete = X_incomplete
else: else:
X_complete = imputer.complete(X_incomplete) X_complete = imputer.complete(X_incomplete)
......
# Copyright (c) 2015. Mount Sinai School of Medicine # Copyright (c) 2016. Mount Sinai School of Medicine
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -12,6 +12,11 @@ ...@@ -12,6 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import (
print_function,
division,
absolute_import,
)
from os.path import join from os.path import join
from appdirs import user_data_dir from appdirs import user_data_dir
......
...@@ -12,6 +12,11 @@ ...@@ -12,6 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import (
print_function,
division,
absolute_import,
)
import itertools import itertools
import logging import logging
......
...@@ -12,6 +12,11 @@ ...@@ -12,6 +12,11 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from __future__ import (
print_function,
division,
absolute_import,
)
from collections import defaultdict from collections import defaultdict
import numpy as np import numpy as np
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment