Skip to content
Snippets Groups Projects
Commit 2bdae397 authored by Tim O'Donnell's avatar Tim O'Donnell
Browse files

update

parent dd582f48
No related branches found
No related tags found
No related merge requests found
...@@ -5,6 +5,7 @@ optionally including eluted peptides identified by mass-spec. ...@@ -5,6 +5,7 @@ optionally including eluted peptides identified by mass-spec.
import sys import sys
import argparse import argparse
import os import os
import collections
import pandas import pandas
...@@ -122,28 +123,58 @@ def handle_pmid_25576301(filename): ...@@ -122,28 +123,58 @@ def handle_pmid_25576301(filename):
assert peptides[0] == "AAAAAAAQSVY" assert peptides[0] == "AAAAAAAQSVY"
assert peptides[-1] == "YYYNGKAVY" assert peptides[-1] == "YYYNGKAVY"
# TODO TODO column_to_sample = {}
import ipdb ; ipdb.set_trace() for s in [c for c in df if c.startswith("Intensity ")]:
assert s[-2] == "-"
column_to_sample[s] = s.replace("Intensity ", "")[:-2].strip()
# THIS IS ALL JUNK: intensity_columns = list(column_to_sample)
result = pandas.DataFrame({
"peptide": peptides, rows = []
}) for _, row in df.iterrows():
result["sample_id"] = "24616531" x1 = row[intensity_columns]
result["sample_type"] = "B-lymphoblastoid" x2 = x1[x1 > 0].index.map(column_to_sample).value_counts()
result["cell_line"] = "GR" x3 = x2[x2 >= 2] # require at least two replicates for each peptide
for sample in x3.index:
rows.append((row.Sequence, sample))
result = pandas.DataFrame(rows, columns=["peptide", "sample_id"])
result["cell_line"] = ""
result["pulldown_antibody"] = "W6/32" result["pulldown_antibody"] = "W6/32"
# Note: this publication lists hla as "HLA-A*01,-03, B*07,-27, and -C*02,-07" allele_map = {
# we are guessing the exact 4 digit alleles based on this. 'Fib': "HLA-A*03:01 HLA-A*23:01 HLA-B*08:01 HLA-B*15:18 HLA-C*07:02 HLA-C*07:04",
result["hla"] = "HLA-A*01:01 HLA-A*03:01 HLA-B*07:02 HLA-B*27:05 HLA-C*02:02 HLA-C*07:01" 'HCC1937': "HLA-A*23:01 HLA-A*24:02 HLA-B*07:02 HLA-B*40:01 HLA-C*03:04 HLA-C*07:02",
'SupB15WT': None, # four digit alleles unknown, will drop sample
'SupB15RT': None,
'HCT116': "HLA-A*01:01 HLA-A*02:01 HLA-B*45:01 HLA-B*18:01 HLA-C*05:01 HLA-C*07:01",
# Homozygous at HLA-A:
'HCC1143': "HLA-A*31:01 HLA-A*31:01 HLA-B*35:08 HLA-B*37:01 HLA-C*04:01 HLA-C*06:02",
# Homozygous everywhere:
'JY': "HLA-A*02:01 HLA-A*02:01 HLA-B*07:02 HLA-B*07:02 HLA-C*07:02 HLA-C*07:02",
}
sample_type = {
'Fib': "fibroblast",
'HCC1937': "basal like breast cancer",
'SupB15WT': None,
'SupB15RT': None,
'HCT116': "colon carcinoma",
'HCC1143': "basal like breast cancer",
'JY': "B-cell",
}
result["hla"] = result.sample_id.map(allele_map)
print("Entries before dropping samples with unknown alleles", len(result))
result = result.loc[~result.hla.isnull()]
print("Entries after dropping samples with unknown alleles", len(result))
result["sample_type"] = result.sample_id.map(sample_type)
print(result.head(3))
return result return result
# Hack to add all functions with names like handle_pmid_XXXX to HANDLERS dict. # Hack to add all functions with names like handle_pmid_XXXX to HANDLERS dict.
for (key, value) in list(locals().items()): for (key, value) in list(locals().items()):
if key.startswith("handle_pmid_"): if key.startswith("handle_pmid_"):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment