Skip to content
Snippets Groups Projects
Commit 8c8ee19b authored by Alex Rubinsteyn's avatar Alex Rubinsteyn
Browse files

moved fixed dataset paths into own module

parent a2857ed2
No related merge requests found
from os.path import join
from mhcflurry.paths import CLASS1_DATA_DIRECTORY
PETERS2009_CSV_FILENAME = "bdata.2009.mhci.public.1.txt"
PETERS2009_CSV_PATH = join(CLASS1_DATA_DIRECTORY, PETERS2009_CSV_FILENAME)
PETERS2013_CSV_FILENAME = "bdata.20130222.mhci.public.1.txt"
PETERS2013_CSV_PATH = join(CLASS1_DATA_DIRECTORY, PETERS2013_CSV_FILENAME)
BLIND_2013_CSV_FILENAME = "bdata.2013.mhci.public.blind.1.txt"
BLIND_2013_CSV_PATH = join(CLASS1_DATA_DIRECTORY, BLIND_2013_CSV_FILENAME)
COMBINED_CSV_FILENAME = "combined_human_class1_dataset.csv"
COMBINED_CSV_PATH = join(CLASS1_DATA_DIRECTORY, COMBINED_CSV_FILENAME)
......@@ -20,7 +20,6 @@ from __future__ import (
absolute_import,
unicode_literals
)
from os.path import join
import argparse
from time import time
......@@ -29,9 +28,7 @@ import numpy as np
import pandas as pd
from mhcflurry.data_helpers import load_data
from mhcflurry.paths import (
CLASS1_DATA_DIRECTORY
)
from model_configs import (
generate_all_model_configs,
......@@ -53,19 +50,7 @@ from model_selection_helpers import (
from summarize_model_results import hyperparameter_performance
from arg_parsing import parse_int_list, parse_float_list, parse_string_list
PETERS2009_CSV_FILENAME = "bdata.2009.mhci.public.1.txt"
PETERS2009_CSV_PATH = join(CLASS1_DATA_DIRECTORY, PETERS2009_CSV_FILENAME)
PETERS2013_CSV_FILENAME = "bdata.20130222.mhci.public.1.txt"
PETERS2013_CSV_PATH = join(CLASS1_DATA_DIRECTORY, PETERS2013_CSV_FILENAME)
BLIND_2013_CSV_FILENAME = "bdata.2013.mhci.public.blind.1.txt"
BLIND_2013_CSV_PATH = join(CLASS1_DATA_DIRECTORY, BLIND_2013_CSV_FILENAME)
COMBINED_CSV_FILENAME = "combined_human_class1_dataset.csv"
COMBINED_CSV_PATH = join(CLASS1_DATA_DIRECTORY, COMBINED_CSV_FILENAME)
from dataset_paths import PETERS2009_CSV_PATH
parser = argparse.ArgumentParser()
......@@ -160,18 +145,6 @@ parser.add_argument(
type=parse_string_list,
help="Comma separated list of optimization methods")
parser.add_argument(
"--ensemble-size",
default=[0],
type=parse_int_list,
help="Number of classifiers in ensemble, default=0 (don't use an ensemble)")
parser.add_argument(
"--ensemble-fraction-dataset",
default=[1.0],
type=parse_float_list,
help="Size of ensemble training sets (sampled with replacement)")
def evaluate_model_configs(configs, results_filename, train_fn):
all_dataframes = []
......
......@@ -37,10 +37,10 @@ def f1_score(true_label, label_pred):
tp = (true_label & label_pred).sum()
fp = ((~true_label) & label_pred).sum()
fn = (true_label & (~label_pred)).sum()
sensitivity = (tp / float(tp + fn)) if (tp + fn) > 0 else 0.0
recall = (tp / float(tp + fn)) if (tp + fn) > 0 else 0.0
precision = (tp / float(tp + fp)) if (tp + fp) > 0 else 0.0
if (precision + sensitivity) > 0:
return (2 * precision * sensitivity) / (precision + sensitivity)
if (precision + recall) > 0:
return (2 * precision * recall) / (precision + recall)
else:
return 0.0
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment