Skip to content
Snippets Groups Projects
Commit 15b85d7c authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

fix

parent 5fc5ba6d
No related branches found
No related tags found
No related merge requests found
......@@ -40,7 +40,10 @@ do
CURATE_BY_PMID_ARGS+=$(echo --item $pmid raw/$pmid/* ' ')
done
time python curate_by_pmid.py $CURATE_BY_PMID_ARGS --out curated.by_pmid.csv --debug
time python curate_by_pmid.py $CURATE_BY_PMID_ARGS \
--out nontraining_curated.by_pmid.csv
bzip2 nontraining_curated.by_pmid.csv
rm -rf raw
......
......@@ -126,6 +126,12 @@ def handle_pmid_23481700(filename):
results.append(result_df)
result_df = pandas.concat(results, ignore_index=True)
# Rename samples to avoid a collision with the JY sample in PMID 25576301.
result_df.sample_id = result_df.sample_id.map({
"JY": "JY.2015",
"HHC": "HHC.2015",
})
return result_df
......@@ -666,6 +672,10 @@ def run():
else:
print("No nulls.")
# Each sample should be coming from only one experiment.
assert df.groupby("sample_id").pmid.nunique().max() == 1, (
df.groupby("sample_id").pmid.nunique().sort_values())
df.to_csv(args.out, index=False)
print("Wrote: %s" % os.path.abspath(args.out))
......
......@@ -50,7 +50,7 @@ releases:
default: false
- name: data_curated
url: https://github.com/openvax/mhcflurry/releases/download/pre-1.4.0/data_curated.20190925.tar.bz2
url: https://github.com/openvax/mhcflurry/releases/download/pre-1.4.0/data_curated.20190927.tar.bz2
default: true
# Older downloads
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment