Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
"""
Generate models report.
"""
import sys
import argparse
from textwrap import wrap
import pypandoc
import pandas
from keras.utils.vis_utils import plot_model
from mhcflurry import __version__
from mhcflurry.downloads import get_path
from mhcflurry.class1_affinity_predictor import Class1AffinityPredictor
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument(
"--cv-summary-csv",
metavar="FILE.csv",
default=get_path(
"cross_validation_class1", "summary.all.csv", test_exists=False),
help="Cross validation scores summary. Default: %(default)s",
)
parser.add_argument(
"--class1-models-dir",
metavar="DIR",
default=get_path(
"models_class1", "models", test_exists=False),
help="Class1 models. Default: %(default)s",
)
parser.add_argument(
"--out-models-cv-rst",
metavar="FILE.rst",
help="rst output file",
)
parser.add_argument(
"--out-models-info-rst",
metavar="FILE.rst",
help="rst output file",
)
parser.add_argument(
"--out-models-architecture-png",
metavar="FILE.png",
help="png output file",
)
parser.add_argument(
"--out-models-supported-alleles-rst",
metavar="FILE.png",
help="png output file",
)
def go(argv):
args = parser.parse_args(argv)
predictor = None
if args.out_models_supported_alleles_rst:
# Supported alleles rst
if predictor is None:
predictor = Class1AffinityPredictor.load(args.class1_models_dir)
with open(args.out_models_supported_alleles_rst, "w") as fd:
fd.write(
"Models released with the current version of MHCflurry (%s) "
"support peptides of "
"length %d-%d and the following %d alleles:\n\n::\n\n\t%s\n\n" % (
__version__,
predictor.supported_peptide_lengths[0],
predictor.supported_peptide_lengths[1],
len(predictor.supported_alleles),
"\n\t".join(
wrap(", ".join(predictor.supported_alleles)))))
print("Wrote: %s" % args.out_models_supported_alleles_rst)
if args.out_models_architecture_png:
# Architecture diagram
if predictor is None:
predictor = Class1AffinityPredictor.load(args.class1_models_dir)
network = predictor.neural_networks[0].network()
plot_model(
network,
to_file=args.out_models_architecture_png,
show_layer_names=True,
show_shapes=True)
print("Wrote: %s" % args.out_models_architecture_png)
if args.out_models_info_rst:
# Architecture information rst
if predictor is None:
predictor = Class1AffinityPredictor.load(args.class1_models_dir)
network = predictor.neural_networks[0].network()
lines = []
network.summary(print_fn=lines.append)
with open(args.out_models_info_rst, "w") as fd:
fd.write("Layers and parameters summary: ")
fd.write("\n\n::\n\n")
for line in lines:
fd.write(" ")
fd.write(line)
fd.write("\n")
print("Wrote: %s" % args.out_models_info_rst)
if args.out_models_cv_rst:
# Models cv output
df = pandas.read_csv(args.cv_summary_csv)
sub_df = df.loc[
df.kind == "ensemble"
].sort_values("allele").dropna().copy().reset_index(drop=True)
sub_df["Allele"] = sub_df.allele
sub_df["CV Training Size"] = sub_df.train_size.astype(int)
sub_df["AUC"] = sub_df.auc
sub_df["F1"] = sub_df.f1
sub_df["Kendall Tau"] = sub_df.tau
sub_df = sub_df[sub_df.columns[-5:]]
html = sub_df.to_html(
index=False,
float_format=lambda v: "%0.3f" % v,
justify="left")
rst = pypandoc.convert_text(html, format="html", to="rst")
with open(args.out_models_cv_rst, "w") as fd:
fd.write(
"Showing estimated performance for %d alleles." % len(sub_df))
fd.write("\n\n")
fd.write(rst)
print("Wrote: %s" % args.out_models_cv_rst)
if __name__ == "__main__":
go(sys.argv[1:])