Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import numpy
from copy import copy
from .allele_encoding import AlleleEncoding
class MultipleAlleleEncoding(object):
def __init__(
self,
experiment_names=[],
experiment_to_allele_list={},
max_alleles_per_experiment=6,
allele_to_sequence=None,
borrow_from=None):
padded_experiment_to_allele_list = {}
for (name, alleles) in experiment_to_allele_list.items():
assert len(alleles) > 0
assert len(alleles) <= max_alleles_per_experiment
alleles_with_mask = alleles + [None] * (
max_alleles_per_experiment - len(alleles))
padded_experiment_to_allele_list[name] = alleles_with_mask
flattened_allele_list = []
for name in experiment_names:
flattened_allele_list.extend(padded_experiment_to_allele_list[name])
self.allele_encoding = AlleleEncoding(
alleles=flattened_allele_list,
allele_to_sequence=allele_to_sequence,
borrow_from=borrow_from
)
self.max_alleles_per_experiment = max_alleles_per_experiment
self.experiment_names = numpy.array(experiment_names)
def append_alleles(self, alleles):
extended_alleles = list(self.allele_encoding.alleles)
for allele in alleles:
extended_alleles.append(allele)
extended_alleles.extend(
[None] * (self.max_alleles_per_experiment - 1))
assert len(extended_alleles) % self.max_alleles_per_experiment == 0, (
len(extended_alleles))
self.allele_encoding = AlleleEncoding(
alleles=extended_alleles,
borrow_from=self.allele_encoding)
self.experiment_names = numpy.concatenate([
self.experiment_names,
numpy.tile(None, len(alleles))
])
@property
def indices(self):
return self.allele_encoding.indices.values.reshape(
(-1, self.max_alleles_per_experiment))
@property
def alleles(self):
return numpy.reshape(
self.allele_encoding.alleles.values,
(-1, self.max_alleles_per_experiment))
def compact(self):
result = copy(self)
result.allele_encoding = self.allele_encoding.compact()
return result
def allele_representations(self, encoding_name):
return self.allele_encoding.allele_representations(encoding_name)
@property
def allele_to_sequence(self):
return self.allele_encoding.allele_to_sequence
def fixed_length_vector_encoded_sequences(self, encoding_name):
raise NotImplementedError()
def shuffle_in_place(self, shuffle_permutation=None):
alleles_matrix = self.alleles
if shuffle_permutation is None:
shuffle_permutation = numpy.random.permutation(len(alleles_matrix))
self.allele_encoding = AlleleEncoding(
alleles=alleles_matrix[shuffle_permutation].flatten(),
borrow_from=self.allele_encoding
)
self.experiment_names = self.experiment_names[shuffle_permutation]