Skip to content
Snippets Groups Projects
Commit 384dab34 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

better docs

parent 0cc50935
No related branches found
No related tags found
No related merge requests found
...@@ -5,6 +5,7 @@ from __future__ import print_function ...@@ -5,6 +5,7 @@ from __future__ import print_function
import sys import sys
import argparse import argparse
from collections import OrderedDict, defaultdict from collections import OrderedDict, defaultdict
import os
from os.path import join, exists from os.path import join, exists
from os import mkdir from os import mkdir
...@@ -35,7 +36,7 @@ parser.add_argument( ...@@ -35,7 +36,7 @@ parser.add_argument(
) )
parser.add_argument( parser.add_argument(
"--logo-cutoff", "--logo-cutoff",
default=0.01, default=0.001,
type=float, type=float,
help="Fraction of top to use for motifs", help="Fraction of top to use for motifs",
) )
...@@ -46,10 +47,18 @@ parser.add_argument( ...@@ -46,10 +47,18 @@ parser.add_argument(
help="Fraction of top to use for length distribution", help="Fraction of top to use for length distribution",
) )
parser.add_argument( parser.add_argument(
"--lengths", "--length-distribution-lengths",
nargs="+",
default=[8, 9, 10, 11, 12, 13, 14, 15],
type=int,
help="Peptide lengths for length distribution plots",
)
parser.add_argument(
"--motif-lengths",
nargs="+",
default=[8, 9, 10, 11], default=[8, 9, 10, 11],
type=int, type=int,
help="Peptide lengths", help="Peptide lengths for motif plots",
) )
parser.add_argument( parser.add_argument(
"--out-dir", "--out-dir",
...@@ -59,6 +68,7 @@ parser.add_argument( ...@@ -59,6 +68,7 @@ parser.add_argument(
) )
parser.add_argument( parser.add_argument(
"--max-alleles", "--max-alleles",
default=None,
type=int, type=int,
metavar="N", metavar="N",
help="Only use N alleles (for testing)", help="Only use N alleles (for testing)",
...@@ -70,6 +80,8 @@ def model_info(models_dir): ...@@ -70,6 +80,8 @@ def model_info(models_dir):
join(models_dir, "length_distributions.csv.bz2")) join(models_dir, "length_distributions.csv.bz2"))
frequency_matrices_df = pandas.read_csv( frequency_matrices_df = pandas.read_csv(
join(models_dir, "frequency_matrices.csv.bz2")) join(models_dir, "frequency_matrices.csv.bz2"))
train_data_df = pandas.read_csv(
join(models_dir, "train_data.csv.bz2"))
distribution = frequency_matrices_df.loc[ distribution = frequency_matrices_df.loc[
(frequency_matrices_df.cutoff_fraction == 1.0), AMINO_ACIDS (frequency_matrices_df.cutoff_fraction == 1.0), AMINO_ACIDS
...@@ -79,9 +91,13 @@ def model_info(models_dir): ...@@ -79,9 +91,13 @@ def model_info(models_dir):
normalized_frequency_matrices.loc[:, AMINO_ACIDS] = ( normalized_frequency_matrices.loc[:, AMINO_ACIDS] = (
normalized_frequency_matrices[AMINO_ACIDS] / distribution) normalized_frequency_matrices[AMINO_ACIDS] / distribution)
observations_per_allele = (
train_data_df.groupby("allele").peptide.nunique().to_dict())
return { return {
'length_distributions': length_distributions_df, 'length_distributions': length_distributions_df,
'normalized_frequency_matrices': normalized_frequency_matrices, 'normalized_frequency_matrices': normalized_frequency_matrices,
'observations_per_allele': observations_per_allele,
} }
...@@ -114,9 +130,11 @@ def write_logo( ...@@ -114,9 +130,11 @@ def write_logo(
) )
pyplot.title("%s %d-mer (%s)" % (allele, length, models_label)) pyplot.title("%s %d-mer (%s)" % (allele, length, models_label))
pyplot.xticks(matrix.index.values) pyplot.xticks(matrix.index.values)
name = "%s_%dmer.%s.png" % (allele, length, models_label) name = "%s-%dmer.%s.png" % (
pyplot.savefig(join(out_dir, name)) allele.replace("*", "-").replace(":", "-"), length, models_label)
print("Wrote: ", name) filename = os.path.abspath(join(out_dir, name))
pyplot.savefig(filename)
print("Wrote: ", filename)
fig.clear() fig.clear()
pyplot.close(fig) pyplot.close(fig)
return name return name
...@@ -140,9 +158,12 @@ def write_length_distribution( ...@@ -140,9 +158,12 @@ def write_length_distribution(
pyplot.xlabel("") pyplot.xlabel("")
pyplot.xticks(rotation=0) pyplot.xticks(rotation=0)
pyplot.gca().get_legend().remove() pyplot.gca().get_legend().remove()
name = "%s.lengths.%s.png" % (allele, models_label) name = "%s.lengths.%s.png" % (
pyplot.savefig(join(out_dir, name)) allele.replace("*", "-").replace(":", "-"), models_label)
print("Wrote: ", name)
filename = os.path.abspath(join(out_dir, name))
pyplot.savefig(filename)
print("Wrote: ", filename)
fig.clear() fig.clear()
pyplot.close(fig) pyplot.close(fig)
return name return name
...@@ -208,7 +229,7 @@ def go(argv): ...@@ -208,7 +229,7 @@ def go(argv):
length_distribution_image_path = write_length_distribution( length_distribution_image_path = write_length_distribution(
length_distributions_df=length_distribution, length_distributions_df=length_distribution,
allele=allele, allele=allele,
lengths=args.lengths, lengths=args.length_distribution_lengths,
cutoff=args.length_cutoff, cutoff=args.length_cutoff,
out_dir=args.out_dir, out_dir=args.out_dir,
models_label=label) models_label=label)
...@@ -219,9 +240,12 @@ def go(argv): ...@@ -219,9 +240,12 @@ def go(argv):
"*" + ( "*" + (
"With mass-spec" if label == "with_mass_spec" else "Affinities only") "With mass-spec" if label == "with_mass_spec" else "Affinities only")
+ "*\n") + "*\n")
w("Training observations (unique peptides): %d" % (
info['observations_per_allele'].get(allele, 0)))
w("\n")
w(image(length_distribution_image_path)) w(image(length_distribution_image_path))
for length in args.lengths: for length in args.motif_lengths:
w(image(write_logo( w(image(write_logo(
normalized_frequency_matrices=normalized_frequency_matrices, normalized_frequency_matrices=normalized_frequency_matrices,
allele=allele, allele=allele,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment