update readme; remove pandoc from setup.py

d6d0fc9a · Tim O'Donnell · 2aa3ed5d · d6d0fc9a · d6d0fc9a · d6d0fc9a
Commit d6d0fc9a authored 7 years ago by Tim O'Donnell
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -63,15 +63,11 @@ generate:
 # Added by Tim:
 .PHONY: readme
 readme: text
+	rm -f package_readme/readme.generated.rst
 	cat package_readme/readme_header.rst \
 	    _build/text/package_readme/readme.template.txt \
 	    > package_readme/readme.generated.rst
-	#pandoc -B package_readme/readme_header.rst \
-	#    -f rst \
-	#    -t rst \
-	#    --base-header-level 2 \
-	#    _build/text/package_readme/readme.template.txt \
-	#    -o package_readme/readme.generated.rst
+	chmod 444 package_readme/readme.generated.rst  # read only

 .PHONY: clean
 clean:

--- a/docs/commandline_tutorial.rst
+++ b/docs/commandline_tutorial.rst
@@ -27,6 +27,7 @@ be customized with the ``--models`` argument. See ``mhcflurry-predict -h`` for
 details.

 .. command-output:: mhcflurry-predict --alleles HLA-A0201 HLA-A0301 --peptides SIINFEKL SIINFEKD SIINFEKQ
+    :nostderr:

 The predictions returned are affinities (KD) in nM. The ``prediction_low`` and
 ``prediction_high`` fields give the 5-95 percentile predictions across
@@ -43,3 +44,39 @@ You can also specify the input and output as CSV files. Run
 Fitting your own models
 -----------------------

+Scanning protein sequences for predicted epitopes
+-------------------------------------------------
+
+The `mhctools <https://github.com/hammerlab/mhctools>`__ package
+provides support for scanning protein sequences to find predicted
+epitopes. It supports MHCflurry as well as other binding predictors.
+Here is an example.
+
+First, install ``mhctools`` if it is not already installed:
+
+.. code:: shell
+
+    $ pip install mhctools
+
+We'll generate predictions across ``example.fasta``, a FASTA file with two short
+sequences:
+
+.. literalinclude:: /example.fasta
+
+Here's the ``mhctools`` invocation. See ``mhctools -h`` for more information.
+
+.. command-output::
+    mhctools
+        --mhc-predictor mhcflurry
+        --input-fasta-file example.fasta
+        --mhc-alleles A02:01,A03:01
+        --mhc-peptide-lengths 8,9,10,11
+        --extract-subsequences
+        --output-csv /tmp/result.csv
+    :ellipsis: 2,-2
+    :nostderr:
+
+This will write a file giving predictions for all subsequences of the specified lengths:
+
+.. command-output::
+    head -n 3 /tmp/result.csv
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -80,6 +80,9 @@ release = version
 # Added by tim
 autodoc_member_order = 'bysource'

+# Added by tim
+suppress_warnings = ['image.nonlocal_uri']
+
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
 #

--- a/docs/example.fasta
+++ b/docs/example.fasta
+>protein1
+MDSKGSSQKGSRLLLLLVVSNLLLCQGVVSTPVCPNGPGNCQV
+EMFNEFDKRYAQGKGFITMALNSCHTSSLPTPEDKEQAQQTHH
+>protein2
+VTEVRGMKGAPDAILSRAIEIEEENKRLLEGMEMIFGQVIPGA
+ARYSAFYNLLHCLRRDSSKIDTYLKLLNCRIIYNNNC
--- a/docs/generate.py
+++ b/docs/generate.py
 """
-Generate models report.
+Generate certain RST files used in documentation.
 """

 import sys

--- a/docs/package_readme/readme.generated.rst
+++ b/docs/package_readme/readme.generated.rst
+:orphan:
+
 .. image:: https://travis-ci.org/hammerlab/mhcflurry.svg?branch=master
    :target: https://travis-ci.org/hammerlab/mhcflurry

@@ -15,7 +17,7 @@ open source implementation.

 You can download pre-trained MHCflurry models fit to affinity
 measurements deposited in IEDB. See the
-"downloads_generation/models_class1" directory in the repository for
+“downloads_generation/models_class1” directory in the repository for
 the workflow used to train these predictors. Users with their own data
 can also fit their own MHCflurry models.

@@ -30,7 +32,7 @@ GPUs may optionally be used for a generally modest speed improvement.

 If you find MHCflurry useful in your research please cite:

-   O'Donnell, T. et al., 2017. MHCflurry: open-source class I MHC
+   O’Donnell, T. et al., 2017. MHCflurry: open-source class I MHC
   binding affinity prediction. bioRxiv. Available at:
   http://www.biorxiv.org/content/early/2017/08/09/174243.

@@ -79,7 +81,7 @@ Most users will use pre-trained MHCflurry models that we release.
 These models are distributed separately from the source code and may
 be downloaded with the following command:

-We also release other "downloads," such as curated training data and
+We also release other “downloads,” such as curated training data and
 some experimental models. To see what you have downloaded, run:


@@ -92,8 +94,6 @@ downloaded above but this can be customized with the "--models"
 argument. See "mhcflurry-predict -h" for details.

   $ mhcflurry-predict --alleles HLA-A0201 HLA-A0301 --peptides SIINFEKL SIINFEKD SIINFEKQ
-   2017-12-21 13:15:45.075649: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA
-   Using TensorFlow backend.
   allele,peptide,mhcflurry_prediction,mhcflurry_prediction_low,mhcflurry_prediction_high,mhcflurry_prediction_percentile
   HLA-A0201,SIINFEKL,4899.047843425702,2767.7636539507857,7269.683642935029,6.509787499999997
   HLA-A0201,SIINFEKD,21050.420242970613,16834.65859138968,24129.046091695887,34.297175
@@ -120,6 +120,52 @@ Fitting your own models
 ***********************


+Scanning protein sequences for predicted epitopes
+*************************************************
+
+The mhctools package provides support for scanning protein sequences
+to find predicted epitopes. It supports MHCflurry as well as other
+binding predictors. Here is an example.
+
+First, install "mhctools" if it is not already installed:
+
+   $ pip install mhctools
+
+We’ll generate predictions across "example.fasta", a FASTA file with
+two short sequences:
+
+   >protein1
+   MDSKGSSQKGSRLLLLLVVSNLLLCQGVVSTPVCPNGPGNCQV
+   EMFNEFDKRYAQGKGFITMALNSCHTSSLPTPEDKEQAQQTHH
+   >protein2
+   VTEVRGMKGAPDAILSRAIEIEEENKRLLEGMEMIFGQVIPGA
+   ARYSAFYNLLHCLRRDSSKIDTYLKLLNCRIIYNNNC
+
+Here’s the "mhctools" invocation. See "mhctools -h" for more
+information.
+
+   $ mhctools
+       --mhc-predictor mhcflurry
+       --input-fasta-file example.fasta
+       --mhc-alleles A02:01,A03:01
+       --mhc-peptide-lengths 8,9,10,11
+       --extract-subsequences
+       --output-csv /tmp/result.csv
+   2017-12-21 14:13:47,847 - mhctools.cli.args - INFO - Building MHC binding prediction type for alleles ['HLA-A*02:01', 'HLA-A*03:01'] and epitope lengths [8, 9, 10, 11]
+   2017-12-21 14:13:52,753 - mhctools.cli.script - INFO - 
+   ...
+   [1192 rows x 8 columns]
+   Wrote: /tmp/result.csv
+
+This will write a file giving predictions for all subsequences of the
+specified lengths:
+
+   $ head -n 3 /tmp/result.csv
+   source_sequence_name,offset,peptide,allele,affinity,percentile_rank,prediction_method_name,length
+   protein2,42,AARYSAFY,HLA-A*02:01,33829.639361000336,73.7865875,mhcflurry,8
+   protein2,42,AARYSAFYN,HLA-A*02:01,29747.41688667342,60.34871249999998,mhcflurry,9
+
+
 Library usage
 =============

@@ -326,3 +372,153 @@ peptides of length 8-15 and the following 124 alleles:
   Mamu-B*17:04, Mamu-B*39:01, Mamu-B*52:01, Mamu-B*66:01, Mamu-B*83:01,
   Mamu-B*87:01, Patr-A*01:01, Patr-A*03:01, Patr-A*04:01, Patr-A*07:01,
   Patr-A*09:01, Patr-B*01:01, Patr-B*13:01, Patr-B*24:01
+
+[image: Build Status][image] [image: Coverage Status][image]
+
+
+mhcflurry
+=========
+
+Open source neural network models for peptide-MHC binding affinity
+prediction
+
+The adaptive immune system depends on the presentation of protein
+fragments by MHC molecules. Machine learning models of this
+interaction are used in studies of infectious diseases, autoimmune
+diseases, vaccine development, and cancer immunotherapy.
+
+MHCflurry supports Class I peptide/MHC binding affinity prediction
+using ensembles of allele-specific models. You can fit MHCflurry
+models to your own data or download models that we fit to data from
+IEDB and Kim 2014. Our combined dataset is available for download
+here.
+
+Pan-allelic prediction is supported in principle but is not yet
+performing accurately. Infrastructure for modeling other aspects of
+antigen processing is also implemented but experimental.
+
+If you find MHCflurry useful in your research please cite:
+
+   O’Donnell, T. et al., 2017. MHCflurry: open-source class I MHC
+   binding affinity prediction. bioRxiv. Available at:
+   http://www.biorxiv.org/content/early/2017/08/09/174243.
+
+
+Setup (pip)
+***********
+
+Install the package:
+
+   pip install mhcflurry
+
+Then download our datasets and trained models:
+
+   mhcflurry-downloads fetch
+
+From a checkout you can run the unit tests with:
+
+   nosetests .
+
+The MHCflurry predictors are implemented in Python using keras.
+
+MHCflurry works with both the tensorflow and theano keras backends.
+The tensorflow backend gives faster model-loading time but is
+undergoing more rapid development and sometimes hits issues. If you
+encounter tensorflow errors running MHCflurry, try setting this
+environment variable to switch to the theano backend:
+
+   export KERAS_BACKEND=theano
+
+You may also needs to "pip install theano".
+
+
+Setup (conda)
+*************
+
+You can alternatively get up and running with a conda environment as
+follows:
+
+   conda create -q -n mhcflurry-env python=3.6 'tensorflow>=1.1.0'
+   source activate mhcflurry-env
+
+Then continue as above:
+
+   pip install mhcflurry
+   mhcflurry-downloads fetch
+
+If you wish to test your installation, you can install "nose" and run
+the tests from a checkout:
+
+   pip install nose
+   nosetests .
+
+
+Making predictions from the command-line
+****************************************
+
+   $ mhcflurry-predict --alleles HLA-A0201 HLA-A0301 --peptides SIINFEKL SIINFEKD SIINFEKQ
+   allele,peptide,mhcflurry_prediction,mhcflurry_prediction_low,mhcflurry_prediction_high
+   HLA-A0201,SIINFEKL,5326.541919062165,3757.86675352994,7461.37693353508
+   HLA-A0201,SIINFEKD,18763.70298522213,13140.82000240037,23269.82139560844
+   HLA-A0201,SIINFEKQ,18620.10057358322,13096.425874678192,23223.148184869413
+   HLA-A0301,SIINFEKL,24481.726678691946,21035.52779725433,27245.371837497867
+   HLA-A0301,SIINFEKD,24687.529360239587,21582.590014592537,27749.39869616437
+   HLA-A0301,SIINFEKQ,25923.062203902562,23522.5793450799,28079.456657427705
+
+The predictions returned are affinities (KD) in nM. The
+"prediction_low" and "prediction_high" fields give the 5-95 percentile
+predictions across the models in the ensemble. The predictions above
+were generated with MHCflurry 0.9.2. Your exact predictions may vary
+slightly from these (up to about 1 nM) depending on the Keras backend
+in use and other numerical details. Different versions of MHCflurry
+can of course give results considerably different from these.
+
+You can also specify the input and output as CSV files. Run
+"mhcflurry-predict -h" for details.
+
+
+Making predictions from Python
+******************************
+
+   >>> from mhcflurry import Class1AffinityPredictor
+   >>> predictor = Class1AffinityPredictor.load()
+   >>> predictor.predict_to_dataframe(peptides=['SIINFEKL'], allele='A0201')
+
+
+     allele   peptide   prediction  prediction_low  prediction_high
+     A0201  SIINFEKL  6029.084473     4474.103253      7771.297702
+
+See the class1_allele_specific_models.ipynb notebook for an overview
+of the Python API, including fitting your own predictors.
+
+
+Scanning protein sequences for predicted epitopes
+*************************************************
+
+The mhctools package provides support for scanning protein sequences
+to find predicted epitopes. It supports MHCflurry as well as other
+binding predictors. Here is an example:
+
+   # First install mhctools if needed:
+   pip install mhctools
+
+   # Now generate predictions for protein sequences in FASTA format:
+   mhctools \
+       --mhc-predictor mhcflurry \
+       --input-fasta-file INPUT.fasta \
+       --mhc-alleles A02:01,A03:01 \
+       --mhc-peptide-lengths 8,9,10,11 \
+       --extract-subsequences \
+       --out RESULT.csv
+
+
+Details on the downloadable models
+**********************************
+
+
+Environment variables
+*********************
+
+The path where MHCflurry looks for model weights and data can be set
+with the "MHCFLURRY_DOWNLOADS_DIR" environment variable. This
+directory should contain subdirectories like “models_class1”.
--- a/docs/package_readme/readme.template.rst
+++ b/docs/package_readme/readme.template.rst
+:orphan:

 .. include:: /intro.rst
    :start-line: 3
@@ -7,3 +8,179 @@
 .. include:: /python_tutorial.rst

 .. include:: /models_supported_alleles.rst
+|Build Status| |Coverage Status|
+
+mhcflurry
+=========
+
+Open source neural network models for peptide-MHC binding affinity
+prediction
+
+The `adaptive immune
+system <https://en.wikipedia.org/wiki/Adaptive_immune_system>`__ depends
+on the presentation of protein fragments by
+`MHC <https://en.wikipedia.org/wiki/Major_histocompatibility_complex>`__
+molecules. Machine learning models of this interaction are used in
+studies of infectious diseases, autoimmune diseases, vaccine
+development, and cancer immunotherapy.
+
+MHCflurry supports Class I peptide/MHC binding affinity prediction using
+ensembles of allele-specific models. You can fit MHCflurry models to
+your own data or download models that we fit to data from
+`IEDB <http://www.iedb.org/home_v3.php>`__ and `Kim
+2014 <http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-241>`__.
+Our combined dataset is available for download
+`here <https://github.com/hammerlab/mhcflurry/releases/download/pre-1.0.0-alpha/data_curated.tar.bz2>`__.
+
+Pan-allelic prediction is supported in principle but is not yet
+performing accurately. Infrastructure for modeling other aspects of
+antigen processing is also implemented but experimental.
+
+If you find MHCflurry useful in your research please cite:
+
+    O'Donnell, T. et al., 2017. MHCflurry: open-source class I MHC
+    binding affinity prediction. bioRxiv. Available at:
+    http://www.biorxiv.org/content/early/2017/08/09/174243.
+
+Setup (pip)
+-----------
+
+Install the package:
+
+::
+
+    pip install mhcflurry
+
+Then download our datasets and trained models:
+
+::
+
+    mhcflurry-downloads fetch
+
+From a checkout you can run the unit tests with:
+
+::
+
+    nosetests .
+
+The MHCflurry predictors are implemented in Python using
+`keras <https://keras.io>`__.
+
+MHCflurry works with both the tensorflow and theano keras backends. The
+tensorflow backend gives faster model-loading time but is undergoing
+more rapid development and sometimes hits issues. If you encounter
+tensorflow errors running MHCflurry, try setting this environment
+variable to switch to the theano backend:
+
+::
+
+    export KERAS_BACKEND=theano
+
+You may also needs to ``pip install theano``.
+
+Setup (conda)
+-------------
+
+You can alternatively get up and running with a
+`conda <https://conda.io/docs/>`__ environment as follows:
+
+::
+
+    conda create -q -n mhcflurry-env python=3.6 'tensorflow>=1.1.0'
+    source activate mhcflurry-env
+
+Then continue as above:
+
+::
+
+    pip install mhcflurry
+    mhcflurry-downloads fetch
+
+If you wish to test your installation, you can install ``nose`` and run
+the tests from a checkout:
+
+::
+
+    pip install nose
+    nosetests .
+
+Making predictions from the command-line
+----------------------------------------
+
+.. code:: shell
+
+    $ mhcflurry-predict --alleles HLA-A0201 HLA-A0301 --peptides SIINFEKL SIINFEKD SIINFEKQ
+    allele,peptide,mhcflurry_prediction,mhcflurry_prediction_low,mhcflurry_prediction_high
+    HLA-A0201,SIINFEKL,5326.541919062165,3757.86675352994,7461.37693353508
+    HLA-A0201,SIINFEKD,18763.70298522213,13140.82000240037,23269.82139560844
+    HLA-A0201,SIINFEKQ,18620.10057358322,13096.425874678192,23223.148184869413
+    HLA-A0301,SIINFEKL,24481.726678691946,21035.52779725433,27245.371837497867
+    HLA-A0301,SIINFEKD,24687.529360239587,21582.590014592537,27749.39869616437
+    HLA-A0301,SIINFEKQ,25923.062203902562,23522.5793450799,28079.456657427705
+
+The predictions returned are affinities (KD) in nM. The
+``prediction_low`` and ``prediction_high`` fields give the 5-95
+percentile predictions across the models in the ensemble. The
+predictions above were generated with MHCflurry 0.9.2. Your exact
+predictions may vary slightly from these (up to about 1 nM) depending on
+the Keras backend in use and other numerical details. Different versions
+of MHCflurry can of course give results considerably different from
+these.
+
+You can also specify the input and output as CSV files. Run
+``mhcflurry-predict -h`` for details.
+
+Making predictions from Python
+------------------------------
+
+.. code:: python
+
+    >>> from mhcflurry import Class1AffinityPredictor
+    >>> predictor = Class1AffinityPredictor.load()
+    >>> predictor.predict_to_dataframe(peptides=['SIINFEKL'], allele='A0201')
+
+
+      allele   peptide   prediction  prediction_low  prediction_high
+      A0201  SIINFEKL  6029.084473     4474.103253      7771.297702
+
+See the
+`class1_allele_specific_models.ipynb <https://github.com/hammerlab/mhcflurry/blob/master/examples/class1_allele_specific_models.ipynb>`__
+notebook for an overview of the Python API, including fitting your own
+predictors.
+
+Scanning protein sequences for predicted epitopes
+-------------------------------------------------
+
+The `mhctools <https://github.com/hammerlab/mhctools>`__ package
+provides support for scanning protein sequences to find predicted
+epitopes. It supports MHCflurry as well as other binding predictors.
+Here is an example:
+
+::
+
+    # First install mhctools if needed:
+    pip install mhctools
+
+    # Now generate predictions for protein sequences in FASTA format:
+    mhctools \
+        --mhc-predictor mhcflurry \
+        --input-fasta-file INPUT.fasta \
+        --mhc-alleles A02:01,A03:01 \
+        --mhc-peptide-lengths 8,9,10,11 \
+        --extract-subsequences \
+        --out RESULT.csv
+
+Details on the downloadable models
+----------------------------------
+
+Environment variables
+---------------------
+
+The path where MHCflurry looks for model weights and data can be set
+with the ``MHCFLURRY_DOWNLOADS_DIR`` environment variable. This
+directory should contain subdirectories like "models_class1".
+
+.. |Build Status| image:: https://travis-ci.org/hammerlab/mhcflurry.svg?branch=master
+   :target: https://travis-ci.org/hammerlab/mhcflurry
+.. |Coverage Status| image:: https://coveralls.io/repos/github/hammerlab/mhcflurry/badge.svg?branch=master
+   :target: https://coveralls.io/github/hammerlab/mhcflurry?branch=master
--- a/docs/package_readme/readme_header.rst
+++ b/docs/package_readme/readme_header.rst
+:orphan:
+
 .. image:: https://travis-ci.org/hammerlab/mhcflurry.svg?branch=master
    :target: https://travis-ci.org/hammerlab/mhcflurry


--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -2,3 +2,5 @@ sphinx-autorun
 sphinxcontrib-programoutput
 sphinx
 numpydoc
+pypandoc
+mhctools
--- a/setup.py
+++ b/setup.py
@@ -24,7 +24,7 @@ from setuptools import setup
 PY2 = (sys.version_info.major == 2)

 readme_dir = os.path.dirname(__file__)
-readme_filename = os.path.join(readme_dir, 'README.md')
+readme_filename = os.path.join(readme_dir, 'README.rst')

 try:
    with open(readme_filename, 'r') as f:
@@ -33,13 +33,6 @@ except:
    logging.warning("Failed to load %s" % readme_filename)
    readme = ""

-try:
-    import pypandoc
-    readme = pypandoc.convert(readme, to='rst', format='md')
-except:
-    logging.warning("Conversion of long_description from MD to RST failed")
-    pass
-
 with open('mhcflurry/__init__.py', 'r') as f:
    version = re.search(
        r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',