Skip to content
Snippets Groups Projects
Commit 15b85d7c authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

fix

parent 5fc5ba6d
No related merge requests found
......@@ -40,7 +40,10 @@ do
CURATE_BY_PMID_ARGS+=$(echo --item $pmid raw/$pmid/* ' ')
done
time python curate_by_pmid.py $CURATE_BY_PMID_ARGS --out curated.by_pmid.csv --debug
time python curate_by_pmid.py $CURATE_BY_PMID_ARGS \
--out nontraining_curated.by_pmid.csv
bzip2 nontraining_curated.by_pmid.csv
rm -rf raw
......
......@@ -126,6 +126,12 @@ def handle_pmid_23481700(filename):
results.append(result_df)
result_df = pandas.concat(results, ignore_index=True)
# Rename samples to avoid a collision with the JY sample in PMID 25576301.
result_df.sample_id = result_df.sample_id.map({
"JY": "JY.2015",
"HHC": "HHC.2015",
})
return result_df
......@@ -666,6 +672,10 @@ def run():
else:
print("No nulls.")
# Each sample should be coming from only one experiment.
assert df.groupby("sample_id").pmid.nunique().max() == 1, (
df.groupby("sample_id").pmid.nunique().sort_values())
df.to_csv(args.out, index=False)
print("Wrote: %s" % os.path.abspath(args.out))
......
......@@ -50,7 +50,7 @@ releases:
default: false
- name: data_curated
url: https://github.com/openvax/mhcflurry/releases/download/pre-1.4.0/data_curated.20190925.tar.bz2
url: https://github.com/openvax/mhcflurry/releases/download/pre-1.4.0/data_curated.20190927.tar.bz2
default: true
# Older downloads
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment