Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
mhc_rank
Manage
Activity
Members
Labels
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Patrick Skillman-Lawrence
mhc_rank
Commits
cf1c077e
Commit
cf1c077e
authored
9 years ago
by
Alex Rubinsteyn
Browse files
Options
Downloads
Patches
Plain Diff
print dataset size for each allele
parent
92abfb6e
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
mhcflurry/paths.py
+4
-1
4 additions, 1 deletion
mhcflurry/paths.py
scripts/create-combined-class1-dataset.py
+2
-5
2 additions, 5 deletions
scripts/create-combined-class1-dataset.py
scripts/print-class1-alleles.py
+30
-6
30 additions, 6 deletions
scripts/print-class1-alleles.py
with
36 additions
and
12 deletions
mhcflurry/paths.py
+
4
−
1
View file @
cf1c077e
...
@@ -17,4 +17,7 @@ from appdirs import user_data_dir
...
@@ -17,4 +17,7 @@ from appdirs import user_data_dir
BASE_DIRECTORY
=
user_data_dir
(
"
mhcflurry
"
,
version
=
"
0.1
"
)
BASE_DIRECTORY
=
user_data_dir
(
"
mhcflurry
"
,
version
=
"
0.1
"
)
CLASS1_DATA_DIRECTORY
=
join
(
BASE_DIRECTORY
,
"
class1_data
"
)
CLASS1_DATA_DIRECTORY
=
join
(
BASE_DIRECTORY
,
"
class1_data
"
)
CLASS1_MODEL_DIRECTORY
=
join
(
BASE_DIRECTORY
,
"
class1_models
"
)
CLASS1_MODEL_DIRECTORY
=
join
(
BASE_DIRECTORY
,
"
class1_models
"
)
\ No newline at end of file
CLASS1_DATA_CSV_FILENAME
=
"
combined_human_class1_dataset.csv
"
CLASS1_DATA_CSV_PATH
=
join
(
CLASS1_DATA_DIRECTORY
,
CLASS1_DATA_CSV_FILENAME
)
This diff is collapsed.
Click to expand it.
scripts/create-combined-class1-dataset.py
+
2
−
5
View file @
cf1c077e
...
@@ -19,7 +19,7 @@ import argparse
...
@@ -19,7 +19,7 @@ import argparse
import
pandas
as
pd
import
pandas
as
pd
from
mhcflurry.paths
import
CLASS1_DATA_DIRECTORY
from
mhcflurry.paths
import
CLASS1_DATA_DIRECTORY
,
CLASS1_DATA_CSV_PATH
IEDB_PICKLE_FILENAME
=
"
iedb_human_class1_assay_datasets.pickle
"
IEDB_PICKLE_FILENAME
=
"
iedb_human_class1_assay_datasets.pickle
"
IEDB_PICKLE_PATH
=
join
(
CLASS1_DATA_DIRECTORY
,
IEDB_PICKLE_FILENAME
)
IEDB_PICKLE_PATH
=
join
(
CLASS1_DATA_DIRECTORY
,
IEDB_PICKLE_FILENAME
)
...
@@ -27,9 +27,6 @@ IEDB_PICKLE_PATH = join(CLASS1_DATA_DIRECTORY, IEDB_PICKLE_FILENAME)
...
@@ -27,9 +27,6 @@ IEDB_PICKLE_PATH = join(CLASS1_DATA_DIRECTORY, IEDB_PICKLE_FILENAME)
PETERS_CSV_FILENAME
=
"
bdata.20130222.mhci.public.1.txt
"
PETERS_CSV_FILENAME
=
"
bdata.20130222.mhci.public.1.txt
"
PETERS_CSV_PATH
=
join
(
CLASS1_DATA_DIRECTORY
,
PETERS_CSV_FILENAME
)
PETERS_CSV_PATH
=
join
(
CLASS1_DATA_DIRECTORY
,
PETERS_CSV_FILENAME
)
OUTPUT_CSV_FILENAME
=
"
combined_human_class1_dataset.csv
"
OUTPUT_CSV_PATH
=
join
(
CLASS1_DATA_DIRECTORY
,
OUTPUT_CSV_FILENAME
)
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"
--ic50-fraction-tolerance
"
,
parser
.
add_argument
(
"
--ic50-fraction-tolerance
"
,
...
@@ -59,7 +56,7 @@ parser.add_argument("--netmhcpan-csv-path",
...
@@ -59,7 +56,7 @@ parser.add_argument("--netmhcpan-csv-path",
help
=
"
Path to CSV with NetMHCpan dataset from 2013 Peters paper
"
)
help
=
"
Path to CSV with NetMHCpan dataset from 2013 Peters paper
"
)
parser
.
add_argument
(
"
--output-csv-path
"
,
parser
.
add_argument
(
"
--output-csv-path
"
,
default
=
OUTPUT
_CSV_PATH
,
default
=
CLASS1_DATA
_CSV_PATH
,
help
=
"
Path to CSV of combined assay results
"
)
help
=
"
Path to CSV of combined assay results
"
)
parser
.
add_argument
(
"
--extra-dataset-csv-path
"
,
parser
.
add_argument
(
"
--extra-dataset-csv-path
"
,
...
...
This diff is collapsed.
Click to expand it.
scripts/print-class1-alleles.py
+
30
−
6
View file @
cf1c077e
...
@@ -22,7 +22,9 @@ trained models are available
...
@@ -22,7 +22,9 @@ trained models are available
import
argparse
import
argparse
import
os
import
os
from
mhcflurry.paths
import
CLASS1_MODEL_DIRECTORY
import
pandas
as
pd
from
mhcflurry.paths
import
CLASS1_MODEL_DIRECTORY
,
CLASS1_DATA_CSV_PATH
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
parser
.
add_argument
(
...
@@ -30,17 +32,39 @@ parser.add_argument(
...
@@ -30,17 +32,39 @@ parser.add_argument(
default
=
False
,
default
=
False
,
action
=
"
store_true
"
)
action
=
"
store_true
"
)
parser
.
add_argument
(
"
--with-dataset-size
"
,
default
=
False
,
action
=
"
store_true
"
)
parser
.
add_argument
(
"
--all
"
,
default
=
False
,
action
=
"
store_true
"
,
help
=
"
Include serotypes (like
'
A2
'
) which include multiple 4-digit types
"
)
if
__name__
==
"
__main__
"
:
if
__name__
==
"
__main__
"
:
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
if
args
.
with_dataset_size
:
df
=
pd
.
read_csv
(
CLASS1_DATA_CSV_PATH
)
allele_sizes
=
{
allele
:
len
(
group
)
for
(
allele
,
group
)
in
df
.
groupby
(
"
mhc
"
)
}
else
:
allele_sizes
=
None
for
filename
in
os
.
listdir
(
CLASS1_MODEL_DIRECTORY
):
for
filename
in
os
.
listdir
(
CLASS1_MODEL_DIRECTORY
):
allele
=
filename
.
replace
(
"
.hdf
"
,
""
)
allele
=
filename
.
replace
(
"
.hdf
"
,
""
)
if
len
(
allele
)
<
5
:
if
len
(
allele
)
>=
5
:
allele
=
"
HLA-%s*%s:%s
"
%
(
allele
[
0
],
allele
[
1
:
3
],
allele
[
3
:])
elif
args
.
all
:
allele
=
"
HLA-%s
"
%
allele
else
:
# skipping serotype names like A2 or B7
# skipping serotype names like A2 or B7
continue
continue
allele
=
"
HLA-%s*%s:%s
"
%
(
allele
[
0
],
allele
[
1
:
3
],
allele
[
3
:])
line
=
allele
if
args
.
with_peptide_lengths
:
if
args
.
with_peptide_lengths
:
print
(
"
%s
\t
8,9,10,11,12
"
%
allele
)
line
+=
"
\t
8,9,10,11,12
"
else
:
if
args
.
with_dataset_size
:
print
(
allele
)
line
+=
"
\t
%d
"
%
allele_sizes
[
allele
]
\ No newline at end of file
print
(
line
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment