Skip to content
Snippets Groups Projects
Commit cd8e0e82 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

fix

parent 42592f05
No related branches found
No related tags found
No related merge requests found
...@@ -68,7 +68,7 @@ W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 0 ...@@ -68,7 +68,7 @@ W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 0
Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 0 Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 0
V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 0 V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 0
X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 X 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
"""), sep='\s+').loc[AMINO_ACIDS, AMINO_ACIDS] """), sep='\s+').loc[AMINO_ACIDS, AMINO_ACIDS].astype("int8")
assert (BLOSUM62_MATRIX == BLOSUM62_MATRIX.T).all().all() assert (BLOSUM62_MATRIX == BLOSUM62_MATRIX.T).all().all()
ENCODING_DATA_FRAMES = { ENCODING_DATA_FRAMES = {
......
...@@ -120,25 +120,27 @@ def run(argv=sys.argv[1:]): ...@@ -120,25 +120,27 @@ def run(argv=sys.argv[1:]):
start = time.time() start = time.time()
print("Percent rank calibration for %d alleles. Encoding peptides." % ( print("Percent rank calibration for %d alleles. Generating peptides." % (
len(alleles))) len(alleles)))
peptides = [] peptides = []
lengths = range(args.length_range[0], args.length_range[1] + 1) lengths = range(args.length_range[0], args.length_range[1] + 1)
for length in lengths: for length in lengths:
peptides.extend( peptides.extend(
random_peptides( random_peptides(
args.num_peptides_per_length, length, distribution=distribution)) args.num_peptides_per_length, length, distribution=distribution))
print("Done generating peptides in %0.2f sec." % (time.time() - start))
print("Encoding %d peptides." % len(peptides))
start = time.time()
encoded_peptides = EncodableSequences.create(peptides) encoded_peptides = EncodableSequences.create(peptides)
del peptides
# Now we encode the peptides for each neural network, so the encoding # Now we encode the peptides for each neural network, so the encoding
# becomes cached. # becomes cached.
for network in predictor.neural_networks: for network in predictor.neural_networks:
network.peptides_to_network_input(encoded_peptides) network.peptides_to_network_input(encoded_peptides)
assert encoded_peptides.encoding_cache # must have cached the encoding assert encoded_peptides.encoding_cache # must have cached the encoding
print("Finished encoding peptides for percent ranks in %0.2f sec." % ( print("Finished encoding peptides in %0.2f sec." % (time.time() - start))
time.time() - start))
print("Calibrating percent rank calibration for %d alleles." % len(alleles))
# Store peptides in global variable so they are in shared memory # Store peptides in global variable so they are in shared memory
# after fork, instead of needing to be pickled (when doing a parallel run). # after fork, instead of needing to be pickled (when doing a parallel run).
...@@ -149,6 +151,7 @@ def run(argv=sys.argv[1:]): ...@@ -149,6 +151,7 @@ def run(argv=sys.argv[1:]):
'summary_top_peptide_fractions': args.summary_top_peptide_fraction, 'summary_top_peptide_fractions': args.summary_top_peptide_fraction,
'verbose': args.verbosity > 0 'verbose': args.verbosity > 0
} }
del encoded_peptides
serial_run = not args.cluster_parallelism and args.num_jobs == 0 serial_run = not args.cluster_parallelism and args.num_jobs == 0
worker_pool = None worker_pool = None
...@@ -167,7 +170,8 @@ def run(argv=sys.argv[1:]): ...@@ -167,7 +170,8 @@ def run(argv=sys.argv[1:]):
work_function=do_calibrate_percentile_ranks, work_function=do_calibrate_percentile_ranks,
work_items=work_items, work_items=work_items,
constant_data=GLOBAL_DATA, constant_data=GLOBAL_DATA,
result_serialization_method="pickle") result_serialization_method="pickle",
clear_constant_data=True)
else: else:
worker_pool = worker_pool_with_gpu_assignments_from_args(args) worker_pool = worker_pool_with_gpu_assignments_from_args(args)
print("Worker pool", worker_pool) print("Worker pool", worker_pool)
......
...@@ -43,7 +43,8 @@ def cluster_results_from_args( ...@@ -43,7 +43,8 @@ def cluster_results_from_args(
work_function, work_function,
work_items, work_items,
constant_data=None, constant_data=None,
result_serialization_method="pickle"): result_serialization_method="pickle",
clear_constant_data=False):
return cluster_results( return cluster_results(
work_function=work_function, work_function=work_function,
work_items=work_items, work_items=work_items,
...@@ -51,7 +52,8 @@ def cluster_results_from_args( ...@@ -51,7 +52,8 @@ def cluster_results_from_args(
submit_command=args.cluster_submit_command, submit_command=args.cluster_submit_command,
results_workdir=args.cluster_results_workdir, results_workdir=args.cluster_results_workdir,
script_prefix_path=args.cluster_script_prefix_path, script_prefix_path=args.cluster_script_prefix_path,
result_serialization_method=result_serialization_method result_serialization_method=result_serialization_method,
clear_constant_data=clear_constant_data
) )
...@@ -63,7 +65,8 @@ def cluster_results( ...@@ -63,7 +65,8 @@ def cluster_results(
results_workdir="./cluster-workdir", results_workdir="./cluster-workdir",
script_prefix_path=None, script_prefix_path=None,
result_serialization_method="pickle", result_serialization_method="pickle",
max_retries=3): max_retries=3,
clear_constant_data=False):
constant_payload = { constant_payload = {
'constant_data': constant_data, 'constant_data': constant_data,
...@@ -78,6 +81,9 @@ def cluster_results( ...@@ -78,6 +81,9 @@ def cluster_results(
with open(constant_payload_path, "wb") as fd: with open(constant_payload_path, "wb") as fd:
pickle.dump(constant_payload, fd, protocol=pickle.HIGHEST_PROTOCOL) pickle.dump(constant_payload, fd, protocol=pickle.HIGHEST_PROTOCOL)
print("Wrote:", constant_payload_path) print("Wrote:", constant_payload_path)
if clear_constant_data:
constant_data.clear()
print("Cleared constant data to free up memory.")
if script_prefix_path: if script_prefix_path:
with open(script_prefix_path) as fd: with open(script_prefix_path) as fd:
......
...@@ -73,6 +73,6 @@ def test_run_cluster_parallelism(delete=True): ...@@ -73,6 +73,6 @@ def test_run_cluster_parallelism(delete=True):
if __name__ == "__main__": if __name__ == "__main__":
run_and_check(n_jobs=0, delete=False) # run_and_check(n_jobs=0, delete=False)
# run_and_check(n_jobs=2, delete=False) # run_and_check(n_jobs=2, delete=False)
# test_run_cluster_parallelism(delete=False) test_run_cluster_parallelism(delete=False)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment