Newer
Older
Generate certain RST files used in documentation.
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
"""
import sys
import argparse
from textwrap import wrap
import pypandoc
import pandas
from keras.utils.vis_utils import plot_model
from mhcflurry import __version__
from mhcflurry.downloads import get_path
from mhcflurry.class1_affinity_predictor import Class1AffinityPredictor
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument(
"--cv-summary-csv",
metavar="FILE.csv",
default=get_path(
"cross_validation_class1", "summary.all.csv", test_exists=False),
help="Cross validation scores summary. Default: %(default)s",
)
parser.add_argument(
"--class1-models-dir",
metavar="DIR",
default=get_path(
"models_class1", "models", test_exists=False),
help="Class1 models. Default: %(default)s",
)
parser.add_argument(
"--out-models-cv-rst",
metavar="FILE.rst",
help="rst output file",
)
parser.add_argument(
"--out-models-info-rst",
metavar="FILE.rst",
help="rst output file",
)
parser.add_argument(
"--out-models-architecture-png",
metavar="FILE.png",
help="png output file",
)
parser.add_argument(
"--out-models-supported-alleles-rst",
metavar="FILE.png",
help="png output file",
)
def go(argv):
args = parser.parse_args(argv)
predictor = None
if args.out_models_supported_alleles_rst:
# Supported alleles rst
if predictor is None:
predictor = Class1AffinityPredictor.load(args.class1_models_dir)
with open(args.out_models_supported_alleles_rst, "w") as fd:
fd.write(
"Models released with the current version of MHCflurry (%s) "
"support peptides of "
"length %d-%d and the following %d alleles:\n\n::\n\n\t%s\n\n" % (
__version__,
predictor.supported_peptide_lengths[0],
predictor.supported_peptide_lengths[1],
len(predictor.supported_alleles),
"\n\t".join(
wrap(", ".join(predictor.supported_alleles)))))
print("Wrote: %s" % args.out_models_supported_alleles_rst)
if args.out_models_architecture_png:
# Architecture diagram
if predictor is None:
predictor = Class1AffinityPredictor.load(args.class1_models_dir)
network = predictor.neural_networks[0].network()
plot_model(
network,
to_file=args.out_models_architecture_png,
show_layer_names=True,
show_shapes=True)
print("Wrote: %s" % args.out_models_architecture_png)
if args.out_models_info_rst:
# Architecture information rst
if predictor is None:
predictor = Class1AffinityPredictor.load(args.class1_models_dir)
network = predictor.neural_networks[0].network()
lines = []
network.summary(print_fn=lines.append)
with open(args.out_models_info_rst, "w") as fd:
fd.write("Layers and parameters summary: ")
fd.write("\n\n::\n\n")
for line in lines:
fd.write(" ")
fd.write(line)
fd.write("\n")
print("Wrote: %s" % args.out_models_info_rst)
if args.out_models_cv_rst:
# Models cv output
df = pandas.read_csv(args.cv_summary_csv)
sub_df = df.loc[
df.kind == "ensemble"
sub_df["Allele"] = sub_df.allele
sub_df["CV Training Size"] = sub_df.train_size.astype(int)
sub_df["AUC"] = sub_df.auc
sub_df["F1"] = sub_df.f1
sub_df["Kendall Tau"] = sub_df.tau
sub_df = sub_df[sub_df.columns[-5:]]
html = sub_df.to_html(
index=False,
float_format=lambda v: "%0.3f" % v,
justify="left")
rst = pypandoc.convert_text(html, format="html", to="rst")
with open(args.out_models_cv_rst, "w") as fd:
fd.write(
"Showing estimated performance for %d alleles." % len(sub_df))
fd.write("\n\n")
fd.write(rst)
print("Wrote: %s" % args.out_models_cv_rst)
if __name__ == "__main__":
go(sys.argv[1:])