diff --git a/downloads-generation/data_mass_spec_benchmark/GENERATE.WITH_HPC_CLUSTER.sh b/downloads-generation/data_mass_spec_benchmark/GENERATE.WITH_HPC_CLUSTER.sh index 132d78d3ad4040c16f104c10d9a9ab1cc70d883e..bfcb1d14a616015b049d52d6d518c7d5ac93e5eb 100755 --- a/downloads-generation/data_mass_spec_benchmark/GENERATE.WITH_HPC_CLUSTER.sh +++ b/downloads-generation/data_mass_spec_benchmark/GENERATE.WITH_HPC_CLUSTER.sh @@ -15,8 +15,8 @@ rm -rf "$SCRATCH_DIR/$DOWNLOAD_NAME" mkdir "$SCRATCH_DIR/$DOWNLOAD_NAME" # Send stdout and stderr to a logfile included with the archive. -#exec > >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt") -#exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2) +exec > >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt") +exec 2> >(tee -ia "$SCRATCH_DIR/$DOWNLOAD_NAME/LOG.txt" >&2) # Log some environment info date @@ -29,17 +29,15 @@ cp $SCRIPT_DIR/write_proteome_peptides.py . cp $SCRIPT_DIR/run_mhcflurry.py . cp $SCRIPT_DIR/write_allele_list.py . - PEPTIDES=$(mhcflurry-downloads path data_mass_spec_annotated)/annotated_ms.csv.bz2 REFERENCES_DIR=$(mhcflurry-downloads path data_references) -#python write_proteome_peptides.py \ -# "$PEPTIDES" \ -# "${REFERENCES_DIR}/uniprot_proteins.csv.bz2" \ -# --out proteome_peptides.csv -#ls -lh proteome_peptides.csv -#bzip2 proteome_peptides.csv -ln -s ~/Dropbox/sinai/projects/201808-mhcflurry-pan/20190622-models/proteome_peptides.csv.bz2 proteome_peptides.csv.bz2 +python write_proteome_peptides.py \ + "$PEPTIDES" \ + "${REFERENCES_DIR}/uniprot_proteins.csv.bz2" \ + --out proteome_peptides.csv +ls -lh proteome_peptides.csv +bzip2 proteome_peptides.csv python write_allele_list.py "$PEPTIDES" --out alleles.txt diff --git a/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py b/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py index 435a6046ff96a115974f0065d8860d2464c04e86..84fb4fe568fe39053dccb6f518fa715004666ad7 100644 --- a/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py +++ b/downloads-generation/data_mass_spec_benchmark/run_mhcflurry.py @@ -55,7 +55,7 @@ parser.add_argument( parser.add_argument( "--chunk-size", type=int, - default=1000000, + default=100000000, help="Num peptides per job. Default: %(default)s") parser.add_argument( "--batch-size", diff --git a/downloads-generation/data_mass_spec_benchmark/write_proteome_peptides.py b/downloads-generation/data_mass_spec_benchmark/write_proteome_peptides.py index bc9c72280de549e675d2a93fa1234d365f41259c..e18daf9ee5d2dd70779e1aed166344c2cc9aa543 100644 --- a/downloads-generation/data_mass_spec_benchmark/write_proteome_peptides.py +++ b/downloads-generation/data_mass_spec_benchmark/write_proteome_peptides.py @@ -12,8 +12,6 @@ import pandas import tqdm # progress bar tqdm.monitor_interval = 0 # see https://github.com/tqdm/tqdm/issues/481 -import shellinford - parser = argparse.ArgumentParser(usage=__doc__)