diff --git a/docs/Makefile b/docs/Makefile index ae700144355702f793ea79faafea43b2847ffa99..72c597ba4e4c373485ee1a41a88b4265dc419525 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -63,15 +63,11 @@ generate: # Added by Tim: .PHONY: readme readme: text + rm -f package_readme/readme.generated.rst cat package_readme/readme_header.rst \ _build/text/package_readme/readme.template.txt \ > package_readme/readme.generated.rst - #pandoc -B package_readme/readme_header.rst \ - # -f rst \ - # -t rst \ - # --base-header-level 2 \ - # _build/text/package_readme/readme.template.txt \ - # -o package_readme/readme.generated.rst + chmod 444 package_readme/readme.generated.rst # read only .PHONY: clean clean: diff --git a/docs/commandline_tutorial.rst b/docs/commandline_tutorial.rst index 684438568b4855a05358d9e63794aac7a30341e1..ab5fdaff067e1f98cd3f42bb5bdcf455ee464b9b 100644 --- a/docs/commandline_tutorial.rst +++ b/docs/commandline_tutorial.rst @@ -27,6 +27,7 @@ be customized with the ``--models`` argument. See ``mhcflurry-predict -h`` for details. .. command-output:: mhcflurry-predict --alleles HLA-A0201 HLA-A0301 --peptides SIINFEKL SIINFEKD SIINFEKQ + :nostderr: The predictions returned are affinities (KD) in nM. The ``prediction_low`` and ``prediction_high`` fields give the 5-95 percentile predictions across @@ -43,3 +44,39 @@ You can also specify the input and output as CSV files. Run Fitting your own models ----------------------- +Scanning protein sequences for predicted epitopes +------------------------------------------------- + +The `mhctools <https://github.com/hammerlab/mhctools>`__ package +provides support for scanning protein sequences to find predicted +epitopes. It supports MHCflurry as well as other binding predictors. +Here is an example. + +First, install ``mhctools`` if it is not already installed: + +.. code:: shell + + $ pip install mhctools + +We'll generate predictions across ``example.fasta``, a FASTA file with two short +sequences: + +.. literalinclude:: /example.fasta + +Here's the ``mhctools`` invocation. See ``mhctools -h`` for more information. + +.. command-output:: + mhctools + --mhc-predictor mhcflurry + --input-fasta-file example.fasta + --mhc-alleles A02:01,A03:01 + --mhc-peptide-lengths 8,9,10,11 + --extract-subsequences + --output-csv /tmp/result.csv + :ellipsis: 2,-2 + :nostderr: + +This will write a file giving predictions for all subsequences of the specified lengths: + +.. command-output:: + head -n 3 /tmp/result.csv diff --git a/docs/conf.py b/docs/conf.py index c3580079eb2cb23c110a407235930cb6035945ea..ffd7f08126b8983789f30d39d4cf890b71328c46 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -80,6 +80,9 @@ release = version # Added by tim autodoc_member_order = 'bysource' +# Added by tim +suppress_warnings = ['image.nonlocal_uri'] + # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # diff --git a/docs/example.fasta b/docs/example.fasta new file mode 100644 index 0000000000000000000000000000000000000000..ea095115509e108927979079136d5ff5b358d864 --- /dev/null +++ b/docs/example.fasta @@ -0,0 +1,6 @@ +>protein1 +MDSKGSSQKGSRLLLLLVVSNLLLCQGVVSTPVCPNGPGNCQV +EMFNEFDKRYAQGKGFITMALNSCHTSSLPTPEDKEQAQQTHH +>protein2 +VTEVRGMKGAPDAILSRAIEIEEENKRLLEGMEMIFGQVIPGA +ARYSAFYNLLHCLRRDSSKIDTYLKLLNCRIIYNNNC diff --git a/docs/generate.py b/docs/generate.py index 900014b663658bbbfd310c764723c18886006fe6..edce8f1e835bf41796ba83d71de7ebf4c52cf647 100644 --- a/docs/generate.py +++ b/docs/generate.py @@ -1,5 +1,5 @@ """ -Generate models report. +Generate certain RST files used in documentation. """ import sys diff --git a/docs/package_readme/readme.generated.rst b/docs/package_readme/readme.generated.rst index 1d4381cdff3cb4906497a920f94cdef4b180b00f..d590e1dae327ab01778a636dffeb9bbc5cdb38be 100644 --- a/docs/package_readme/readme.generated.rst +++ b/docs/package_readme/readme.generated.rst @@ -1,3 +1,5 @@ +:orphan: + .. image:: https://travis-ci.org/hammerlab/mhcflurry.svg?branch=master :target: https://travis-ci.org/hammerlab/mhcflurry @@ -15,7 +17,7 @@ open source implementation. You can download pre-trained MHCflurry models fit to affinity measurements deposited in IEDB. See the -"downloads_generation/models_class1" directory in the repository for +“downloads_generation/models_class1” directory in the repository for the workflow used to train these predictors. Users with their own data can also fit their own MHCflurry models. @@ -30,7 +32,7 @@ GPUs may optionally be used for a generally modest speed improvement. If you find MHCflurry useful in your research please cite: - O'Donnell, T. et al., 2017. MHCflurry: open-source class I MHC + O’Donnell, T. et al., 2017. MHCflurry: open-source class I MHC binding affinity prediction. bioRxiv. Available at: http://www.biorxiv.org/content/early/2017/08/09/174243. @@ -79,7 +81,7 @@ Most users will use pre-trained MHCflurry models that we release. These models are distributed separately from the source code and may be downloaded with the following command: -We also release other "downloads," such as curated training data and +We also release other “downloads,” such as curated training data and some experimental models. To see what you have downloaded, run: @@ -92,8 +94,6 @@ downloaded above but this can be customized with the "--models" argument. See "mhcflurry-predict -h" for details. $ mhcflurry-predict --alleles HLA-A0201 HLA-A0301 --peptides SIINFEKL SIINFEKD SIINFEKQ - 2017-12-21 13:15:45.075649: I tensorflow/core/platform/cpu_feature_guard.cc:137] Your CPU supports instructions that this TensorFlow binary was not compiled to use: SSE4.1 SSE4.2 AVX AVX2 FMA - Using TensorFlow backend. allele,peptide,mhcflurry_prediction,mhcflurry_prediction_low,mhcflurry_prediction_high,mhcflurry_prediction_percentile HLA-A0201,SIINFEKL,4899.047843425702,2767.7636539507857,7269.683642935029,6.509787499999997 HLA-A0201,SIINFEKD,21050.420242970613,16834.65859138968,24129.046091695887,34.297175 @@ -120,6 +120,52 @@ Fitting your own models *********************** +Scanning protein sequences for predicted epitopes +************************************************* + +The mhctools package provides support for scanning protein sequences +to find predicted epitopes. It supports MHCflurry as well as other +binding predictors. Here is an example. + +First, install "mhctools" if it is not already installed: + + $ pip install mhctools + +We’ll generate predictions across "example.fasta", a FASTA file with +two short sequences: + + >protein1 + MDSKGSSQKGSRLLLLLVVSNLLLCQGVVSTPVCPNGPGNCQV + EMFNEFDKRYAQGKGFITMALNSCHTSSLPTPEDKEQAQQTHH + >protein2 + VTEVRGMKGAPDAILSRAIEIEEENKRLLEGMEMIFGQVIPGA + ARYSAFYNLLHCLRRDSSKIDTYLKLLNCRIIYNNNC + +Here’s the "mhctools" invocation. See "mhctools -h" for more +information. + + $ mhctools + --mhc-predictor mhcflurry + --input-fasta-file example.fasta + --mhc-alleles A02:01,A03:01 + --mhc-peptide-lengths 8,9,10,11 + --extract-subsequences + --output-csv /tmp/result.csv + 2017-12-21 14:13:47,847 - mhctools.cli.args - INFO - Building MHC binding prediction type for alleles ['HLA-A*02:01', 'HLA-A*03:01'] and epitope lengths [8, 9, 10, 11] + 2017-12-21 14:13:52,753 - mhctools.cli.script - INFO - + ... + [1192 rows x 8 columns] + Wrote: /tmp/result.csv + +This will write a file giving predictions for all subsequences of the +specified lengths: + + $ head -n 3 /tmp/result.csv + source_sequence_name,offset,peptide,allele,affinity,percentile_rank,prediction_method_name,length + protein2,42,AARYSAFY,HLA-A*02:01,33829.639361000336,73.7865875,mhcflurry,8 + protein2,42,AARYSAFYN,HLA-A*02:01,29747.41688667342,60.34871249999998,mhcflurry,9 + + Library usage ============= @@ -326,3 +372,153 @@ peptides of length 8-15 and the following 124 alleles: Mamu-B*17:04, Mamu-B*39:01, Mamu-B*52:01, Mamu-B*66:01, Mamu-B*83:01, Mamu-B*87:01, Patr-A*01:01, Patr-A*03:01, Patr-A*04:01, Patr-A*07:01, Patr-A*09:01, Patr-B*01:01, Patr-B*13:01, Patr-B*24:01 + +[image: Build Status][image] [image: Coverage Status][image] + + +mhcflurry +========= + +Open source neural network models for peptide-MHC binding affinity +prediction + +The adaptive immune system depends on the presentation of protein +fragments by MHC molecules. Machine learning models of this +interaction are used in studies of infectious diseases, autoimmune +diseases, vaccine development, and cancer immunotherapy. + +MHCflurry supports Class I peptide/MHC binding affinity prediction +using ensembles of allele-specific models. You can fit MHCflurry +models to your own data or download models that we fit to data from +IEDB and Kim 2014. Our combined dataset is available for download +here. + +Pan-allelic prediction is supported in principle but is not yet +performing accurately. Infrastructure for modeling other aspects of +antigen processing is also implemented but experimental. + +If you find MHCflurry useful in your research please cite: + + O’Donnell, T. et al., 2017. MHCflurry: open-source class I MHC + binding affinity prediction. bioRxiv. Available at: + http://www.biorxiv.org/content/early/2017/08/09/174243. + + +Setup (pip) +*********** + +Install the package: + + pip install mhcflurry + +Then download our datasets and trained models: + + mhcflurry-downloads fetch + +From a checkout you can run the unit tests with: + + nosetests . + +The MHCflurry predictors are implemented in Python using keras. + +MHCflurry works with both the tensorflow and theano keras backends. +The tensorflow backend gives faster model-loading time but is +undergoing more rapid development and sometimes hits issues. If you +encounter tensorflow errors running MHCflurry, try setting this +environment variable to switch to the theano backend: + + export KERAS_BACKEND=theano + +You may also needs to "pip install theano". + + +Setup (conda) +************* + +You can alternatively get up and running with a conda environment as +follows: + + conda create -q -n mhcflurry-env python=3.6 'tensorflow>=1.1.0' + source activate mhcflurry-env + +Then continue as above: + + pip install mhcflurry + mhcflurry-downloads fetch + +If you wish to test your installation, you can install "nose" and run +the tests from a checkout: + + pip install nose + nosetests . + + +Making predictions from the command-line +**************************************** + + $ mhcflurry-predict --alleles HLA-A0201 HLA-A0301 --peptides SIINFEKL SIINFEKD SIINFEKQ + allele,peptide,mhcflurry_prediction,mhcflurry_prediction_low,mhcflurry_prediction_high + HLA-A0201,SIINFEKL,5326.541919062165,3757.86675352994,7461.37693353508 + HLA-A0201,SIINFEKD,18763.70298522213,13140.82000240037,23269.82139560844 + HLA-A0201,SIINFEKQ,18620.10057358322,13096.425874678192,23223.148184869413 + HLA-A0301,SIINFEKL,24481.726678691946,21035.52779725433,27245.371837497867 + HLA-A0301,SIINFEKD,24687.529360239587,21582.590014592537,27749.39869616437 + HLA-A0301,SIINFEKQ,25923.062203902562,23522.5793450799,28079.456657427705 + +The predictions returned are affinities (KD) in nM. The +"prediction_low" and "prediction_high" fields give the 5-95 percentile +predictions across the models in the ensemble. The predictions above +were generated with MHCflurry 0.9.2. Your exact predictions may vary +slightly from these (up to about 1 nM) depending on the Keras backend +in use and other numerical details. Different versions of MHCflurry +can of course give results considerably different from these. + +You can also specify the input and output as CSV files. Run +"mhcflurry-predict -h" for details. + + +Making predictions from Python +****************************** + + >>> from mhcflurry import Class1AffinityPredictor + >>> predictor = Class1AffinityPredictor.load() + >>> predictor.predict_to_dataframe(peptides=['SIINFEKL'], allele='A0201') + + + allele peptide prediction prediction_low prediction_high + A0201 SIINFEKL 6029.084473 4474.103253 7771.297702 + +See the class1_allele_specific_models.ipynb notebook for an overview +of the Python API, including fitting your own predictors. + + +Scanning protein sequences for predicted epitopes +************************************************* + +The mhctools package provides support for scanning protein sequences +to find predicted epitopes. It supports MHCflurry as well as other +binding predictors. Here is an example: + + # First install mhctools if needed: + pip install mhctools + + # Now generate predictions for protein sequences in FASTA format: + mhctools \ + --mhc-predictor mhcflurry \ + --input-fasta-file INPUT.fasta \ + --mhc-alleles A02:01,A03:01 \ + --mhc-peptide-lengths 8,9,10,11 \ + --extract-subsequences \ + --out RESULT.csv + + +Details on the downloadable models +********************************** + + +Environment variables +********************* + +The path where MHCflurry looks for model weights and data can be set +with the "MHCFLURRY_DOWNLOADS_DIR" environment variable. This +directory should contain subdirectories like “models_class1”. diff --git a/docs/package_readme/readme.template.rst b/docs/package_readme/readme.template.rst index d96af450129dbcd3255d956bc4ae7999b7c04d31..a3d7a410d42a08926f9a64ec3e690dc87f919501 100644 --- a/docs/package_readme/readme.template.rst +++ b/docs/package_readme/readme.template.rst @@ -1,3 +1,4 @@ +:orphan: .. include:: /intro.rst :start-line: 3 @@ -7,3 +8,179 @@ .. include:: /python_tutorial.rst .. include:: /models_supported_alleles.rst +|Build Status| |Coverage Status| + +mhcflurry +========= + +Open source neural network models for peptide-MHC binding affinity +prediction + +The `adaptive immune +system <https://en.wikipedia.org/wiki/Adaptive_immune_system>`__ depends +on the presentation of protein fragments by +`MHC <https://en.wikipedia.org/wiki/Major_histocompatibility_complex>`__ +molecules. Machine learning models of this interaction are used in +studies of infectious diseases, autoimmune diseases, vaccine +development, and cancer immunotherapy. + +MHCflurry supports Class I peptide/MHC binding affinity prediction using +ensembles of allele-specific models. You can fit MHCflurry models to +your own data or download models that we fit to data from +`IEDB <http://www.iedb.org/home_v3.php>`__ and `Kim +2014 <http://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-15-241>`__. +Our combined dataset is available for download +`here <https://github.com/hammerlab/mhcflurry/releases/download/pre-1.0.0-alpha/data_curated.tar.bz2>`__. + +Pan-allelic prediction is supported in principle but is not yet +performing accurately. Infrastructure for modeling other aspects of +antigen processing is also implemented but experimental. + +If you find MHCflurry useful in your research please cite: + + O'Donnell, T. et al., 2017. MHCflurry: open-source class I MHC + binding affinity prediction. bioRxiv. Available at: + http://www.biorxiv.org/content/early/2017/08/09/174243. + +Setup (pip) +----------- + +Install the package: + +:: + + pip install mhcflurry + +Then download our datasets and trained models: + +:: + + mhcflurry-downloads fetch + +From a checkout you can run the unit tests with: + +:: + + nosetests . + +The MHCflurry predictors are implemented in Python using +`keras <https://keras.io>`__. + +MHCflurry works with both the tensorflow and theano keras backends. The +tensorflow backend gives faster model-loading time but is undergoing +more rapid development and sometimes hits issues. If you encounter +tensorflow errors running MHCflurry, try setting this environment +variable to switch to the theano backend: + +:: + + export KERAS_BACKEND=theano + +You may also needs to ``pip install theano``. + +Setup (conda) +------------- + +You can alternatively get up and running with a +`conda <https://conda.io/docs/>`__ environment as follows: + +:: + + conda create -q -n mhcflurry-env python=3.6 'tensorflow>=1.1.0' + source activate mhcflurry-env + +Then continue as above: + +:: + + pip install mhcflurry + mhcflurry-downloads fetch + +If you wish to test your installation, you can install ``nose`` and run +the tests from a checkout: + +:: + + pip install nose + nosetests . + +Making predictions from the command-line +---------------------------------------- + +.. code:: shell + + $ mhcflurry-predict --alleles HLA-A0201 HLA-A0301 --peptides SIINFEKL SIINFEKD SIINFEKQ + allele,peptide,mhcflurry_prediction,mhcflurry_prediction_low,mhcflurry_prediction_high + HLA-A0201,SIINFEKL,5326.541919062165,3757.86675352994,7461.37693353508 + HLA-A0201,SIINFEKD,18763.70298522213,13140.82000240037,23269.82139560844 + HLA-A0201,SIINFEKQ,18620.10057358322,13096.425874678192,23223.148184869413 + HLA-A0301,SIINFEKL,24481.726678691946,21035.52779725433,27245.371837497867 + HLA-A0301,SIINFEKD,24687.529360239587,21582.590014592537,27749.39869616437 + HLA-A0301,SIINFEKQ,25923.062203902562,23522.5793450799,28079.456657427705 + +The predictions returned are affinities (KD) in nM. The +``prediction_low`` and ``prediction_high`` fields give the 5-95 +percentile predictions across the models in the ensemble. The +predictions above were generated with MHCflurry 0.9.2. Your exact +predictions may vary slightly from these (up to about 1 nM) depending on +the Keras backend in use and other numerical details. Different versions +of MHCflurry can of course give results considerably different from +these. + +You can also specify the input and output as CSV files. Run +``mhcflurry-predict -h`` for details. + +Making predictions from Python +------------------------------ + +.. code:: python + + >>> from mhcflurry import Class1AffinityPredictor + >>> predictor = Class1AffinityPredictor.load() + >>> predictor.predict_to_dataframe(peptides=['SIINFEKL'], allele='A0201') + + + allele peptide prediction prediction_low prediction_high + A0201 SIINFEKL 6029.084473 4474.103253 7771.297702 + +See the +`class1_allele_specific_models.ipynb <https://github.com/hammerlab/mhcflurry/blob/master/examples/class1_allele_specific_models.ipynb>`__ +notebook for an overview of the Python API, including fitting your own +predictors. + +Scanning protein sequences for predicted epitopes +------------------------------------------------- + +The `mhctools <https://github.com/hammerlab/mhctools>`__ package +provides support for scanning protein sequences to find predicted +epitopes. It supports MHCflurry as well as other binding predictors. +Here is an example: + +:: + + # First install mhctools if needed: + pip install mhctools + + # Now generate predictions for protein sequences in FASTA format: + mhctools \ + --mhc-predictor mhcflurry \ + --input-fasta-file INPUT.fasta \ + --mhc-alleles A02:01,A03:01 \ + --mhc-peptide-lengths 8,9,10,11 \ + --extract-subsequences \ + --out RESULT.csv + +Details on the downloadable models +---------------------------------- + +Environment variables +--------------------- + +The path where MHCflurry looks for model weights and data can be set +with the ``MHCFLURRY_DOWNLOADS_DIR`` environment variable. This +directory should contain subdirectories like "models_class1". + +.. |Build Status| image:: https://travis-ci.org/hammerlab/mhcflurry.svg?branch=master + :target: https://travis-ci.org/hammerlab/mhcflurry +.. |Coverage Status| image:: https://coveralls.io/repos/github/hammerlab/mhcflurry/badge.svg?branch=master + :target: https://coveralls.io/github/hammerlab/mhcflurry?branch=master diff --git a/docs/package_readme/readme_header.rst b/docs/package_readme/readme_header.rst index d4cdf9d7824ac23b964ec4a2f4cad528ce124c97..dcffcad682c4fd067ad9f1c38e147ab3eb15c5e1 100644 --- a/docs/package_readme/readme_header.rst +++ b/docs/package_readme/readme_header.rst @@ -1,3 +1,5 @@ +:orphan: + .. image:: https://travis-ci.org/hammerlab/mhcflurry.svg?branch=master :target: https://travis-ci.org/hammerlab/mhcflurry diff --git a/docs/requirements.txt b/docs/requirements.txt index 27dde4233d48923904073b43a6fca6106d68df22..a37e65049c3623c6eabd946d0c482187a8828193 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -2,3 +2,5 @@ sphinx-autorun sphinxcontrib-programoutput sphinx numpydoc +pypandoc +mhctools diff --git a/setup.py b/setup.py index a66f0d8255cb82c5f978d1d019004797dd1f11b9..4d0dfb95489129d8910c4f8bf0baaf80cf491df1 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ from setuptools import setup PY2 = (sys.version_info.major == 2) readme_dir = os.path.dirname(__file__) -readme_filename = os.path.join(readme_dir, 'README.md') +readme_filename = os.path.join(readme_dir, 'README.rst') try: with open(readme_filename, 'r') as f: @@ -33,13 +33,6 @@ except: logging.warning("Failed to load %s" % readme_filename) readme = "" -try: - import pypandoc - readme = pypandoc.convert(readme, to='rst', format='md') -except: - logging.warning("Conversion of long_description from MD to RST failed") - pass - with open('mhcflurry/__init__.py', 'r') as f: version = re.search( r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]',