From b8e76a1fde6c57385198120380212e1ef950a9a3 Mon Sep 17 00:00:00 2001 From: Tim O'Donnell <timodonnell@gmail.com> Date: Sun, 13 Oct 2019 14:59:33 -0400 Subject: [PATCH] fix --- .../data_mass_spec_benchmark/GENERATE.sh | 2 +- .../data_mass_spec_benchmark/run_predictors.py | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh index fbb1db71..b798aa58 100755 --- a/downloads-generation/data_mass_spec_benchmark/GENERATE.sh +++ b/downloads-generation/data_mass_spec_benchmark/GENERATE.sh @@ -137,7 +137,7 @@ do proteome_peptides.$subset.csv.bz2 \ --result-dtype "float16" \ --predictor netmhcpan4-$kind \ - --chunk-size 10000 \ + --chunk-size 5000 \ --allele $(cat alleles.txt) \ --out "$OUT_DIR" \ --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ diff --git a/downloads-generation/data_mass_spec_benchmark/run_predictors.py b/downloads-generation/data_mass_spec_benchmark/run_predictors.py index af6a7868..482ed8ab 100644 --- a/downloads-generation/data_mass_spec_benchmark/run_predictors.py +++ b/downloads-generation/data_mass_spec_benchmark/run_predictors.py @@ -311,8 +311,6 @@ def run(argv=sys.argv[1:]): for allele in alleles: allele_to_chunk_index_to_predictions[allele] = {} - last_write_time_per_column = dict((col, 0.0) for col in result_df.columns) - def write_col(col): out_path = os.path.join( args.out, col_to_filename[col]) @@ -322,6 +320,13 @@ def run(argv=sys.argv[1:]): result_df[col].isnull().mean() * 100.0), out_path) + print("Writing all columns.") + last_write_time_per_column = {} + for col in result_df.columns: + write_col(col) + last_write_time_per_column[col] = time.time() + print("Done writing all columns. Reading results.") + for worker_results in tqdm.tqdm(results, total=len(work_items)): for (work_item_num, col_to_predictions) in worker_results: for (col, predictions) in col_to_predictions.items(): -- GitLab