Skip to content
Snippets Groups Projects
Commit 7ed8c54a authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

update

parent 790ebc17
No related merge requests found
......@@ -5,6 +5,7 @@ optionally including eluted peptides identified by mass-spec.
import sys
import argparse
import os
import collections
import pandas
......@@ -122,28 +123,58 @@ def handle_pmid_25576301(filename):
assert peptides[0] == "AAAAAAAQSVY"
assert peptides[-1] == "YYYNGKAVY"
# TODO TODO
import ipdb ; ipdb.set_trace()
column_to_sample = {}
for s in [c for c in df if c.startswith("Intensity ")]:
assert s[-2] == "-"
column_to_sample[s] = s.replace("Intensity ", "")[:-2].strip()
# THIS IS ALL JUNK:
result = pandas.DataFrame({
"peptide": peptides,
})
result["sample_id"] = "24616531"
result["sample_type"] = "B-lymphoblastoid"
result["cell_line"] = "GR"
intensity_columns = list(column_to_sample)
rows = []
for _, row in df.iterrows():
x1 = row[intensity_columns]
x2 = x1[x1 > 0].index.map(column_to_sample).value_counts()
x3 = x2[x2 >= 2] # require at least two replicates for each peptide
for sample in x3.index:
rows.append((row.Sequence, sample))
result = pandas.DataFrame(rows, columns=["peptide", "sample_id"])
result["cell_line"] = ""
result["pulldown_antibody"] = "W6/32"
# Note: this publication lists hla as "HLA-A*01,-03, B*07,-27, and -C*02,-07"
# we are guessing the exact 4 digit alleles based on this.
result["hla"] = "HLA-A*01:01 HLA-A*03:01 HLA-B*07:02 HLA-B*27:05 HLA-C*02:02 HLA-C*07:01"
allele_map = {
'Fib': "HLA-A*03:01 HLA-A*23:01 HLA-B*08:01 HLA-B*15:18 HLA-C*07:02 HLA-C*07:04",
'HCC1937': "HLA-A*23:01 HLA-A*24:02 HLA-B*07:02 HLA-B*40:01 HLA-C*03:04 HLA-C*07:02",
'SupB15WT': None, # four digit alleles unknown, will drop sample
'SupB15RT': None,
'HCT116': "HLA-A*01:01 HLA-A*02:01 HLA-B*45:01 HLA-B*18:01 HLA-C*05:01 HLA-C*07:01",
# Homozygous at HLA-A:
'HCC1143': "HLA-A*31:01 HLA-A*31:01 HLA-B*35:08 HLA-B*37:01 HLA-C*04:01 HLA-C*06:02",
# Homozygous everywhere:
'JY': "HLA-A*02:01 HLA-A*02:01 HLA-B*07:02 HLA-B*07:02 HLA-C*07:02 HLA-C*07:02",
}
sample_type = {
'Fib': "fibroblast",
'HCC1937': "basal like breast cancer",
'SupB15WT': None,
'SupB15RT': None,
'HCT116': "colon carcinoma",
'HCC1143': "basal like breast cancer",
'JY': "B-cell",
}
result["hla"] = result.sample_id.map(allele_map)
print("Entries before dropping samples with unknown alleles", len(result))
result = result.loc[~result.hla.isnull()]
print("Entries after dropping samples with unknown alleles", len(result))
result["sample_type"] = result.sample_id.map(sample_type)
print(result.head(3))
return result
# Hack to add all functions with names like handle_pmid_XXXX to HANDLERS dict.
for (key, value) in list(locals().items()):
if key.startswith("handle_pmid_"):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment