Skip to content
Snippets Groups Projects
Commit 14cf5749 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

codes cleanup

parent ec75a7b6
No related merge requests found
......@@ -20,10 +20,11 @@ class PercentRankTransform(object):
assert self.bin_edges is None
assert len(values) > 0
(hist, self.bin_edges) = numpy.histogram(values, bins=self.n_bins)
self.cdf = numpy.ones(len(hist) + 2) * numpy.nan
self.cdf = numpy.ones(len(hist) + 3) * numpy.nan
self.cdf[0] = 0.0
self.cdf[1] = 0.0
self.cdf[-1] = 100.0
numpy.cumsum(hist / numpy.sum(hist) * 100.0, out=self.cdf[1:-1])
numpy.cumsum(hist / numpy.sum(hist) * 100.0, out=self.cdf[2:-1])
assert not numpy.isnan(self.cdf).any()
def transform(self, values):
......
......@@ -144,6 +144,8 @@ class MHCflurryTrainedOnHits(PresentationComponentModel):
def fit(self, hits_df):
assert 'experiment_name' in hits_df.columns
assert 'peptide' in hits_df.columns
if 'hit' in hits_df.columns:
assert (hits_df.hit == 1).all()
grouped = hits_df.groupby("experiment_name")
for (experiment_name, sub_df) in grouped:
......
......@@ -53,7 +53,7 @@ class PresentationComponentModel(object):
"""
raise NotImplementedError(str(self))
def clone_and_fit(self, peptides_df):
def clone_and_fit(self, hits_df):
"""
Clone the object and fit to given dataset with a weakref cache.
"""
......@@ -65,12 +65,12 @@ class PresentationComponentModel(object):
result = None
else:
key = dataframe_cryptographic_hash(
peptides_df[["experiment_name", "peptide"]])
hits_df[["experiment_name", "peptide"]])
result = self.cached_fits.get(key)
if result is None:
print("Cache miss in clone_and_fit: %s" % str(self))
result = self.clone()
result.fit(peptides_df)
result.fit(hits_df)
if self.cached_fits is not None:
self.cached_fits[key] = result
else:
......@@ -229,6 +229,9 @@ class PresentationComponentModel(object):
self.cached_predictions[cache_key] = return_value
return return_value
def fit_ensemble_and_predict(peptides_df):
raise NotImplementedError
def reset_cache(self):
for key in PresentationComponentModel.cache_fields:
obj_type = type(getattr(self, key))
......
......@@ -321,7 +321,6 @@ class PresentationModel(object):
assert len(self.presentation_models_predictors) == \
len(self.trained_component_models)
# peptides_df = peptides_df.reset_index(drop=True)
prediction_cols = []
presentation_model_predictions = {}
zipped = enumerate(
......@@ -343,20 +342,6 @@ class PresentationModel(object):
x_df = self.evaluate_expressions(df)
assert_no_null(x_df)
"""
with pandas.option_context('mode.use_inf_as_null', True):
null_x = x_df.ix[x_df.isnull().sum(axis=1) > 0]
if len(null_x) > 0:
null_x = null_x.copy()
null_x["peptide"] = peptides_df.ix[null_x.index, "peptide"]
null_x["experiment_name"] = peptides_df.ix[
null_x.index, "experiment_name"
]
raise ValueError(
"Null values in features:\n%s\ninputs:\n%s" % (
str(null_x), str(df.iloc[null_x.index])))
"""
prediction_col = "Prediction (Model %d)" % (i + 1)
assert prediction_col not in presentation_model_predictions
presentation_model_predictions[prediction_col] = (
......@@ -370,7 +355,10 @@ class PresentationModel(object):
del presentation_model_predictions[prediction_cols[0]]
else:
presentation_model_predictions["Prediction"] = numpy.mean(
[presentation_model_predictions[col] for col in prediction_cols],
[
presentation_model_predictions[col]
for col in prediction_cols
],
axis=0)
return pandas.DataFrame(presentation_model_predictions)
......
from nose.tools import eq_, assert_less
import numpy
from numpy.testing import assert_almost_equal
import pandas
from mhcflurry import amino_acid
from mhcflurry.antigen_presentation import (
decoy_strategies,
percent_rank_transform,
presentation_component_models,
presentation_model)
......@@ -62,6 +64,7 @@ PEPTIDES_DF["hit"] = [
for _, row in
PEPTIDES_DF.iterrows()
]
print("Hit rate: %0.3f" % PEPTIDES_DF.hit.mean())
HITS_DF = PEPTIDES_DF.ix[PEPTIDES_DF.hit].reset_index().copy()
del HITS_DF["hit"]
......@@ -70,6 +73,14 @@ del HITS_DF["hit"]
# Tests
def test_percent_rank_transform():
model = percent_rank_transform.PercentRankTransform()
model.fit(numpy.arange(1000))
assert_almost_equal(
model.transform([-2, 0, 50, 100, 2000]),
[0.0, 0.0, 5.0, 10.0, 100.0])
def test_mhcflurry_trained_on_hits():
mhcflurry_model = presentation_component_models.MHCflurryTrainedOnHits(
"basic",
......@@ -100,11 +111,11 @@ def test_presentation_model():
experiment_to_expression_group=EXPERIMENT_TO_EXPRESSION_GROUP,
transcripts=TRANSCIPTS_DF,
peptides_and_transcripts=PEPTIDES_AND_TRANSCRIPTS_DF,
random_peptides_for_percent_rank=make_random_peptides(10000, 9),
random_peptides_for_percent_rank=make_random_peptides(1000, 9),
)
decoys = decoy_strategies.UniformRandom(
make_random_peptides(10000, 9),
make_random_peptides(1000, 9),
decoys_per_hit=50)
terms = {
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment