Skip to content
Snippets Groups Projects
Commit 8a6b85f1 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

update

parent d3600b1f
No related merge requests found
......@@ -81,6 +81,11 @@ time python make_allele_sequences.py \
--differentiate-alleles training_data.alleles.txt \
--out-csv allele_sequences.csv
time python make_allele_sequences.py \
class1.aligned.fasta \
--recapitulate-sequences class1_pseudosequences.csv \
--out-csv allele_sequences.no_differentiation.csv
# Cleanup
gzip -f class1.fasta
gzip -f class1.aligned.fasta
......@@ -88,6 +93,6 @@ rm *.fasta
cp $SCRIPT_ABSOLUTE_PATH .
bzip2 LOG.txt
tar -cjf "../${DOWNLOAD_NAME}.tar.bz2" *
echo "Created archive: $SCRATCH_DIR/$DOWNLOAD_NAME.tar.bz2"
RESULT="$SCRATCH_DIR/${DOWNLOAD_NAME}.$(date +%Y%m%d).tar.bz2"
tar -cjf "$RESULT" *
echo "Created archive: $RESULT"
\ No newline at end of file
......@@ -133,6 +133,7 @@ def run():
assert agreement > 0.9
# Add additional positions
additional_positions = []
if args.differentiate_alleles:
differentiate_alleles = pandas.read_csv(
args.differentiate_alleles).iloc[:,0].values
......
......@@ -62,9 +62,11 @@ if [ "$2" != "continue-incomplete" ]
then
cp $SCRIPT_DIR/generate_hyperparameters.production.py .
cp $SCRIPT_DIR/generate_hyperparameters.py .
cp $SCRIPT_DIR/normalize_allele_names.py .
python generate_hyperparameters.production.py > hyperparameters.production.yaml
python generate_hyperparameters.py hyperparameters.production.yaml no_pretrain > hyperparameters.no_pretrain.yaml
python generate_hyperparameters.py hyperparameters.no_pretrain.yaml single_hidden > hyperparameters.single_hidden_no_pretrain.yaml
python normalize_allele_names.py "$(mhcflurry-downloads path allele_sequences)/class1_pseudosequences.csv" --out allele_sequences.34mer.csv
fi
for kind in single_hidden_no_pretrain no_pretrain 34mer_sequence
......@@ -80,7 +82,7 @@ do
HYPERPARAMETERS=hyperparameters.$kind.yaml
if [ "$kind" == "34mer_sequence" ]
then
ALLELE_SEQUENCES="$(mhcflurry-downloads path allele_sequences)/class1_pseudosequences.csv"
ALLELE_SEQUENCES=allele_sequences.34mer.csv
HYPERPARAMETERS=hyperparameters.production.yaml
fi
......
"""
Normalize MHC allele names
"""
from sys import argv
import os
import pandas
import mhcnames
import argparse
def normalize(s, disallowed=["MIC", "HFE"]):
if any(item in s for item in disallowed):
return None
try:
return mhcnames.normalize_allele_name(s)
except:
while s:
s = ":".join(s.split(":")[:-1])
try:
return mhcnames.normalize_allele_name(s)
except:
pass
return None
parser = argparse.ArgumentParser(usage=__doc__)
parser.add_argument("input_csv")
parser.add_argument("--out", help="CSV output")
args = parser.parse_args(argv[1:])
df = pandas.read_csv(args.input_csv)
print("Read df with shape", df.shape)
df["allele"] = df["allele"].map(normalize)
df = df.loc[~df.allele.isnull()]
print("Done normalizing. After removing unparseable names, shape is", df.shape)
df = df.drop_duplicates("allele")
print("After dropping duplicates", df.shape)
df.to_csv(args.out, index=False)
print("Wrote", os.path.abspath(args.out))
......@@ -66,7 +66,7 @@ releases:
default: false
- name: data_curated
url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_curated.20191011.tar.bz2
url: https://github.com/openvax/mhcflurry/releases/download/1.4.0/data_curated.20191030.tar.bz2
default: true
# Older downloads
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment