diff --git a/.travis.yml b/.travis.yml index c99940c62b6eedd6a0e3442b40fb85bd3bb0e8a0..1ad1f31e1660b6e43ae6b3db62820bc26f58561a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,8 +9,10 @@ before_install: # version is the same. - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then wget https://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh; + export TF_PACKAGE=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.10.1-cp27-none-linux_x86_64.whl else wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; + export TF_PACKAGE=https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-1.10.1-cp35-cp35m-linux_x86_64.whl fi - bash miniconda.sh -b -p $HOME/miniconda - export PATH="$HOME/miniconda/bin:$PATH" @@ -29,9 +31,11 @@ addons: install: - > conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION - numpy scipy nose pandas matplotlib mkl-service + numpy scipy nose pandas matplotlib mkl-service theano - source activate test-environment - pip install tensorflow pypandoc pylint 'theano>=1.0.4' + - pip install pypandoc pylint + - pip install --ignore-installed --upgrade $TF_PACKAGE - pip install -r requirements.txt - pip install . - pip freeze diff --git a/mhcflurry/allele_encoding_transforms.py b/mhcflurry/allele_encoding_transforms.py index 300925863007914da11f6de86521900d18bdaf57..88b361f52706116f516f8061734a721563ad0e89 100644 --- a/mhcflurry/allele_encoding_transforms.py +++ b/mhcflurry/allele_encoding_transforms.py @@ -25,13 +25,13 @@ class AlleleEncodingTransform(object): Parameters ---------- - fit : string to DataFrame + fit : string to array """ class PCATransform(AlleleEncodingTransform): name = 'pca' - serialization_keys = ['data'] + serialization_keys = ['mean', 'components'] def __init__(self): self.model = None @@ -51,19 +51,15 @@ class PCATransform(AlleleEncodingTransform): print("Fit complete in %0.3f sec." % (time.time() - start)) def get_fit(self): - df = pandas.DataFrame(self.model.components_) - df.columns = ["pca_%s" % c for c in df.columns] - df["mean"] = self.model.mean_ return { - 'data': df + 'mean': self.model.mean_, + 'components': self.model.components_, } def restore_fit(self, fit): - assert list(fit) == ['data'] - data = fit["data"] self.model = sklearn.decomposition.PCA() - self.model.mean_ = data["mean"].values - self.model.components_ = data.drop(columns="mean").values + self.model.mean_ = fit["mean"] + self.model.components_ = fit["components"] def transform(self, allele_representations): if not self.is_fit(): diff --git a/mhcflurry/class1_affinity_predictor.py b/mhcflurry/class1_affinity_predictor.py index 211ca1ad508d2fda72aa92f4880ec2a486b53bf7..1e9ad71f05d1b1c0a62b731c0ba08bdd36e4aee2 100644 --- a/mhcflurry/class1_affinity_predictor.py +++ b/mhcflurry/class1_affinity_predictor.py @@ -368,12 +368,9 @@ class Class1AffinityPredictor(object): if transform.is_fit(): fit_data = transform.get_fit() assert set(fit_data) == set(transform.serialization_keys) - for (serialization_key, fit_df) in fit_data.items(): - csv_path = join( - models_dir, - "%s.%s.csv" % (transform.name, serialization_key)) - fit_df.to_csv(csv_path) - logging.info("Wrote: %s" % csv_path) + target_path = join(models_dir, "%s.npz" % transform.name) + numpy.savez(target_path, **fit_data) + logging.info("Wrote: %s" % target_path) if self.allele_to_percent_rank_transform: percent_ranks_df = None @@ -447,15 +444,11 @@ class Class1AffinityPredictor(object): for transform_name in ALLELE_ENCODING_TRANSFORMS: klass = ALLELE_ENCODING_TRANSFORMS[transform_name] transform = klass() - restored_fit = {} - for serialization_key in klass.serialization_keys: - csv_path = join( - models_dir, - "%s.%s.csv" % (transform_name, serialization_key)) - if exists(csv_path): - restored_fit[serialization_key] = pandas.read_csv( - csv_path, index_col=0) - if restored_fit: + target_path = join(models_dir, "%s.npz" % transform_name) + if exists(target_path): + with numpy.load(target_path) as loaded: + restored_fit = dict( + (key, loaded[key]) for key in loaded.keys()) if set(restored_fit) != set(klass.serialization_keys): logging.warning( "Missing some allele encoding transform serialization " @@ -1116,12 +1109,11 @@ class Class1AffinityPredictor(object): ---------- list of array """ - loaded = numpy.load(filename) - weights = [ - loaded["array_%d" % i] - for i in range(len(loaded.keys())) - ] - loaded.close() + with numpy.load(filename) as loaded: + weights = [ + loaded["array_%d" % i] + for i in range(len(loaded.keys())) + ] return weights def calibrate_percentile_ranks(