Skip to content
Snippets Groups Projects
Commit 2bdae397 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

update

parent dd582f48
No related branches found
No related tags found
No related merge requests found
......@@ -5,6 +5,7 @@ optionally including eluted peptides identified by mass-spec.
import sys
import argparse
import os
import collections
import pandas
......@@ -122,28 +123,58 @@ def handle_pmid_25576301(filename):
assert peptides[0] == "AAAAAAAQSVY"
assert peptides[-1] == "YYYNGKAVY"
# TODO TODO
import ipdb ; ipdb.set_trace()
column_to_sample = {}
for s in [c for c in df if c.startswith("Intensity ")]:
assert s[-2] == "-"
column_to_sample[s] = s.replace("Intensity ", "")[:-2].strip()
# THIS IS ALL JUNK:
result = pandas.DataFrame({
"peptide": peptides,
})
result["sample_id"] = "24616531"
result["sample_type"] = "B-lymphoblastoid"
result["cell_line"] = "GR"
intensity_columns = list(column_to_sample)
rows = []
for _, row in df.iterrows():
x1 = row[intensity_columns]
x2 = x1[x1 > 0].index.map(column_to_sample).value_counts()
x3 = x2[x2 >= 2] # require at least two replicates for each peptide
for sample in x3.index:
rows.append((row.Sequence, sample))
result = pandas.DataFrame(rows, columns=["peptide", "sample_id"])
result["cell_line"] = ""
result["pulldown_antibody"] = "W6/32"
# Note: this publication lists hla as "HLA-A*01,-03, B*07,-27, and -C*02,-07"
# we are guessing the exact 4 digit alleles based on this.
result["hla"] = "HLA-A*01:01 HLA-A*03:01 HLA-B*07:02 HLA-B*27:05 HLA-C*02:02 HLA-C*07:01"
allele_map = {
'Fib': "HLA-A*03:01 HLA-A*23:01 HLA-B*08:01 HLA-B*15:18 HLA-C*07:02 HLA-C*07:04",
'HCC1937': "HLA-A*23:01 HLA-A*24:02 HLA-B*07:02 HLA-B*40:01 HLA-C*03:04 HLA-C*07:02",
'SupB15WT': None, # four digit alleles unknown, will drop sample
'SupB15RT': None,
'HCT116': "HLA-A*01:01 HLA-A*02:01 HLA-B*45:01 HLA-B*18:01 HLA-C*05:01 HLA-C*07:01",
# Homozygous at HLA-A:
'HCC1143': "HLA-A*31:01 HLA-A*31:01 HLA-B*35:08 HLA-B*37:01 HLA-C*04:01 HLA-C*06:02",
# Homozygous everywhere:
'JY': "HLA-A*02:01 HLA-A*02:01 HLA-B*07:02 HLA-B*07:02 HLA-C*07:02 HLA-C*07:02",
}
sample_type = {
'Fib': "fibroblast",
'HCC1937': "basal like breast cancer",
'SupB15WT': None,
'SupB15RT': None,
'HCT116': "colon carcinoma",
'HCC1143': "basal like breast cancer",
'JY': "B-cell",
}
result["hla"] = result.sample_id.map(allele_map)
print("Entries before dropping samples with unknown alleles", len(result))
result = result.loc[~result.hla.isnull()]
print("Entries after dropping samples with unknown alleles", len(result))
result["sample_type"] = result.sample_id.map(sample_type)
print(result.head(3))
return result
# Hack to add all functions with names like handle_pmid_XXXX to HANDLERS dict.
for (key, value) in list(locals().items()):
if key.startswith("handle_pmid_"):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment