Skip to content
Snippets Groups Projects
Commit 8f911c38 authored by Jeff Hammerbacher's avatar Jeff Hammerbacher
Browse files

Add a notebook trying to build a mhcflurry-like model from scratch to run on TF backend.

parent f9b61edf
Branches
Tags
No related merge requests found
%% Cell type:code id: tags:
``` python
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import tensorflow as tf
import seaborn as sns
from mhcflurry.dataset import Dataset
from mhcflurry.peptide_encoding import indices_to_hotshot_encoding
```
%% Cell type:code id: tags:
``` python
file_to_explore="/root/.local/share/mhcflurry/2/class1_data/combined_human_class1_dataset.csv"
dataset = Dataset.from_csv(
filename=file_to_explore,
sep=",",
peptide_column_name="peptide")
```
%% Cell type:code id: tags:
``` python
df = dataset.to_dataframe()
df.columns
```
%% Output
Index([u'species', u'allele', u'peptide', u'peptide_length', u'affinity', u'sample_weight'], dtype='object')
%% Cell type:code id: tags:
``` python
df[df.species == 'human'].groupby('affinity').size().order().tail(10)
```
%% Output
affinity
77922.077922 530
69767.441860 656
70000.000000 761
77777.777778 796
50000.000000 809
70422.535211 816
78125.000000 1382
5000.000000 2124
1.000000 3204
20000.000000 48651
dtype: int64
%% Cell type:code id: tags:
``` python
df_reduced = df[df.allele.isin(['HLA-A0201', 'HLA-A2301', 'HLA-A2402', 'HLA-A1101'])][['allele','affinity']].reset_index(drop=True)
```
%% Cell type:code id: tags:
``` python
sns.violinplot(x=df_reduced['allele'], y=np.log(df_reduced['affinity']))
```
%% Output
<matplotlib.axes.AxesSubplot at 0x7f2e61cac310>
%% Cell type:code id: tags:
``` python
df_reduced.groupby('allele').size()
```
%% Output
allele
HLA-A0201 12215
HLA-A1101 6343
HLA-A2301 2509
HLA-A2402 3320
dtype: int64
%% Cell type:code id: tags:
``` python
df_kmers = dataset.kmer_index_encoding()
```
%% Cell type:code id: tags:
``` python
training_hotshot = indices_to_hotshot_encoding(df_kmers[0])
```
%% Cell type:code id: tags:
``` python
from keras.models import Sequential
from keras.layers import Dense, Activation
model = Sequential()
model.add(Dense(input_dim=9*21, output_dim=1))
model.add(Activation("sigmoid"))
model.compile(loss="mse", optimizer="rmsprop")
```
%% Cell type:code id: tags:
``` python
from keras import backend
backend._BACKEND
```
%% Output
u'theano'
%% Cell type:code id: tags:
``` python
model.fit(training_hotshot, df_kmers[1], nb_epoch=5, batch_size=1)
```
%% Output
Epoch 1/5
74449/688472 [==>...........................] - ETA: 638s - loss: 2214902270.2741
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-113-abfac20bed23> in <module>()
----> 1 model.fit(training_hotshot, df_kmers[1], nb_epoch=5, batch_size=1)
/usr/local/lib/python2.7/dist-packages/keras/models.pyc in fit(self, x, y, batch_size, nb_epoch, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, **kwargs)
407 shuffle=shuffle,
408 class_weight=class_weight,
--> 409 sample_weight=sample_weight)
410
411 def evaluate(self, x, y, batch_size=32, verbose=1,
/usr/local/lib/python2.7/dist-packages/keras/engine/training.pyc in fit(self, x, y, batch_size, nb_epoch, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight)
1050 verbose=verbose, callbacks=callbacks,
1051 val_f=val_f, val_ins=val_ins, shuffle=shuffle,
-> 1052 callback_metrics=callback_metrics)
1053
1054 def evaluate(self, x, y, batch_size=32, verbose=1, sample_weight=None):
/usr/local/lib/python2.7/dist-packages/keras/engine/training.pyc in _fit_loop(self, f, ins, out_labels, batch_size, nb_epoch, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics)
794 batch_logs[l] = o
795
--> 796 callbacks.on_batch_end(batch_index, batch_logs)
797
798 epoch_logs = {}
/usr/local/lib/python2.7/dist-packages/keras/callbacks.pyc in on_batch_end(self, batch, logs)
58 t_before_callbacks = time.time()
59 for callback in self.callbacks:
---> 60 callback.on_batch_end(batch, logs)
61 self._delta_ts_batch_end.append(time.time() - t_before_callbacks)
62 delta_t_median = np.median(self._delta_ts_batch_end)
/usr/local/lib/python2.7/dist-packages/keras/callbacks.pyc in on_batch_end(self, batch, logs)
186 # will be handled by on_epoch_end
187 if self.verbose and self.seen < self.params['nb_sample']:
--> 188 self.progbar.update(self.seen, self.log_values)
189
190 def on_epoch_end(self, epoch, logs={}):
/usr/local/lib/python2.7/dist-packages/keras/utils/generic_utils.pyc in update(self, current, values)
66 if self.verbose == 1:
67 prev_total_width = self.total_width
---> 68 sys.stdout.write("\b" * prev_total_width)
69 sys.stdout.write("\r")
70
/usr/local/lib/python2.7/dist-packages/ipykernel/iostream.pyc in write(self, string)
315
316 is_child = (not self._is_master_process())
--> 317 self._buffer.write(string)
318 if is_child:
319 # newlines imply flush in subprocesses
ValueError: I/O operation on closed file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment