column_clean.py

import sys

file_name = sys.argv[1]

file = open( file_name, "r" )

biofilm = 0
if "biofilm" in file_name:
	biofilm = 1

# Gross header : MAY.Strain..	Species	Soll.Clade	Isolation.Site	MTL.Genotype	Media	Temperature..C.	MJD.Phenotype.Score	MJD.Score.St..Dev.	RJF.Phenotype.Score	RJF.Score.St..Dev.	Total.Average.Phenotype.Score	Total.Phenotype.Score.St..Dev.	Normalized.Scores
new_header = "May Strain, Species, Soll Clade, Isolation Site, MTL Genotype, Media, Temperature ("+ u"\N{DEGREE SIGN}" + "C), MJD Phenotype Score, 	MJD Score St. Dev., RJF Phenotype Score, RJF Score St. Dev., Total Average Phenotype Score, Total Phenotype Score St. Dev., Normalized Scores"
biofilm_header = "May Strain, Species, Soll Clade, Isolation Site, Media, Temperature ("+ u"\N{DEGREE SIGN}" + "C), Total Average Phenotype Score, Total Phenotype Score St. Dev., Normalized Scores"

new_file = open( "nc_" + file_name[5:],"w") #nc = normalized clean

header = 1
for lines in file:

	if header:

		if biofilm:
			print( biofilm_header, file = new_file )
		else:
			print( new_header, file = new_file )

		file_header = lines.split(",")
		header = 0
		continue

	line_list = lines.split(",")

	if biofilm:
		new_line_list = line_list[0:]
	else:
		new_line_list = line_list[0:]
		# new_line_list = line_list[1:7] # Include these two lines if you want to remove the per-person scoring
		# new_line_list.extend( line_list[11:] )

	line_str = ",".join(new_line_list).strip()

	print( line_str, file = new_file )

file.close()
new_file.close()