Skip to content
Snippets Groups Projects
Commit 716e2251 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

test

parent af32a1b5
No related branches found
No related tags found
No related merge requests found
......@@ -97,7 +97,8 @@ else
--hits "$(mhcflurry-downloads path data_mass_spec_annotated)/annotated_ms.csv.bz2" \
--expression "$(mhcflurry-downloads path data_curated)/rna_expression.csv.bz2" \
--decoys-per-hit 1 \
--out train.multiallelic.csv
--out train.multiallelic.csv \
--alleles "HLA-A*02:01" "HLA-B*27:01" "HLA-C*07:01" "HLA-A*03:01" "HLA-B*15:01" "HLA-C*01:02"
fi
ALLELE_LIST=$(bzcat "$MONOALLELIC_TRAIN" | cut -f 1 -d , | grep -v allele | uniq | sort | uniq)
......
......@@ -39,12 +39,17 @@ parser.add_argument(
metavar="CSV",
required=True,
help="File to write")
parser.add_argument(
"--alleles",
nargs="+",
help="Include only the specified alleles")
def run():
args = parser.parse_args(sys.argv[1:])
hit_df = pandas.read_csv(args.hits)
expression_df = pandas.read_csv(args.expression, index_col=0).fillna(0)
hit_df["alleles"] = hit_df.hla.str.split()
hit_df = hit_df.loc[
(hit_df.mhc_class == "I") &
(hit_df.peptide.str.len() <= 15) &
......@@ -64,8 +69,20 @@ def run():
"to",
len(new_hit_df))
hit_df = new_hit_df.copy()
hit_df["alleles"] = hit_df.hla.str.split()
if args.alleles:
filter_alleles = set(args.alleles)
new_hit_df = hit_df.loc[
hit_df.alleles.isin.map(
lambda a: len(set(a).intersection(filter_alleles)) > 0)
]
print(
"Selecting alleles",
args.alleles,
"reduced dataset from",
len(hit_df),
"to",
len(new_hit_df))
hit_df = new_hit_df.copy()
sample_table = hit_df.drop_duplicates("sample_id").set_index("sample_id")
grouped = hit_df.groupby("sample_id").nunique()
......
......@@ -30,7 +30,7 @@ releases:
default: false
- name: models_class1_pan_refined
url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/models_class1_pan_refined.20191212b.tar.bz2
url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/models_class1_pan_refined.20191212c.tar.bz2
default: false
- name: models_class1_pan_variants
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment