Skip to content
Snippets Groups Projects
Commit f32b1aa5 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

fix

parent 48f28d4b
No related branches found
No related tags found
No related merge requests found
...@@ -98,7 +98,6 @@ else ...@@ -98,7 +98,6 @@ else
--expression "$(mhcflurry-downloads path data_curated)/rna_expression.csv.bz2" \ --expression "$(mhcflurry-downloads path data_curated)/rna_expression.csv.bz2" \
--decoys-per-hit 1 \ --decoys-per-hit 1 \
--out train.multiallelic.csv \ --out train.multiallelic.csv \
--alleles "HLA-A*02:01" "HLA-B*27:01" "HLA-C*07:01" "HLA-A*03:01" "HLA-B*15:01" "HLA-C*01:02"
fi fi
ALLELE_LIST=$(bzcat "$MONOALLELIC_TRAIN" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq) ALLELE_LIST=$(bzcat "$MONOALLELIC_TRAIN" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq)
...@@ -113,6 +112,7 @@ time mhcflurry-multiallelic-refinement \ ...@@ -113,6 +112,7 @@ time mhcflurry-multiallelic-refinement \
--out-affinity-predictor-dir $(pwd)/models.affinity \ --out-affinity-predictor-dir $(pwd)/models.affinity \
--out-presentation-predictor-dir $(pwd)/models.presentation \ --out-presentation-predictor-dir $(pwd)/models.presentation \
--worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \ --worker-log-dir "$SCRATCH_DIR/$DOWNLOAD_NAME" \
--only-alleles-with-mass-spec \
$PARALLELISM_ARGS $PARALLELISM_ARGS
time mhcflurry-calibrate-percentile-ranks \ time mhcflurry-calibrate-percentile-ranks \
......
...@@ -72,6 +72,10 @@ parser.add_argument( ...@@ -72,6 +72,10 @@ parser.add_argument(
"--max-models", "--max-models",
type=int, type=int,
default=None) default=None)
parser.add_argument(
"--only-alleles-with-mass-spec",
type=int,
default=None)
parser.add_argument( parser.add_argument(
"--verbosity", "--verbosity",
type=int, type=int,
...@@ -106,6 +110,23 @@ def run(argv=sys.argv[1:]): ...@@ -106,6 +110,23 @@ def run(argv=sys.argv[1:]):
monoallelic_df = pandas.read_csv(args.monoallelic_data) monoallelic_df = pandas.read_csv(args.monoallelic_data)
print("Loaded monoallelic data: %s" % (str(monoallelic_df.shape))) print("Loaded monoallelic data: %s" % (str(monoallelic_df.shape)))
if args.only_alleles_with_mass_spec:
multiallelic_alleles = set()
for hla in multiallelic_df.hla.unique():
multiallelic_alleles.update(hla.split())
print(
"Multiallelic alleles (%d)" % len(multiallelic_alleles),
multiallelic_alleles)
new_monoallelic_df = monoallelic_df.loc[
monoallelic_df.allele.isin((multiallelic_alleles))
].copy()
print(
"Allele selection reduced monoallelic data from",
len(monoallelic_df),
"to",
len(new_monoallelic_df))
monoallelic_df = new_monoallelic_df
input_predictor = Class1AffinityPredictor.load( input_predictor = Class1AffinityPredictor.load(
args.models_dir, optimization_level=0, max_models=args.max_models) args.models_dir, optimization_level=0, max_models=args.max_models)
print("Loaded: %s" % input_predictor) print("Loaded: %s" % input_predictor)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment