Commit efc71c39 authored by alessiamarcolini's avatar alessiamarcolini
Browse files

Use context managers to open and close files, PEP8

parent 9d50804a
import numpy as np
from input_output import load_data
import csv
import sys
import numpy as np
from input_output import load_data
def extract_feats(datafile, rankedfile, nfeat, outfile):
# sample names, features names and table with features abundances
samples, features, data_ab = load_data(datafile)
......@@ -12,7 +15,7 @@ def extract_feats(datafile, rankedfile, nfeat, outfile):
rank = np.loadtxt(rankedfile, delimiter = '\t', skiprows = 1, dtype = str)
# number of features in the list
nf_list = rank.shape
if len(nf_list)>1:
if len(nf_list) > 1:
feats = rank[:, 1]
top_feats = feats[0:nfeat]
else:
......@@ -22,10 +25,10 @@ def extract_feats(datafile, rankedfile, nfeat, outfile):
#print top_feats.shape
# extract top features from table with abundances of all features
idx = []
if len(nf_list)==1:
if len(nf_list) == 1:
idx.append(features.index(top_feats))
else:
for i in range(0, nfeat):
for i in range(nfeat):
if top_feats[i] in features:
idx.append(features.index(top_feats[i]))
else:
......@@ -35,14 +38,13 @@ def extract_feats(datafile, rankedfile, nfeat, outfile):
sel_feats=[features[i] for i in idx]
# write new table
outw = open(outfile, 'w')
writer = csv.writer(outw, delimiter = '\t', lineterminator = '\n')
# header
writer.writerow(['Samples']+sel_feats)
for i in range(0, len(samples)):
writer.writerow([samples[i]]+data_ab[i,idx].tolist())
with open(outfile, 'w') as outw:
writer = csv.writer(outw, delimiter = '\t', lineterminator = '\n')
# header
writer.writerow(['Samples']+sel_feats)
for i in range(0, len(samples)):
writer.writerow([samples[i]]+data_ab[i,idx].tolist())
outw.close()
if __name__ == "__main__":
......@@ -60,4 +62,3 @@ if __name__ == "__main__":
outfile = sys.argv[4]
extract_feats(datafile, rankedfile, nfeat, outfile)
......@@ -4,10 +4,11 @@
## Requires Python >= 2.7, mlpy >= 3.5
import numpy as np
import csv
import sys
import numpy as np
__author__ = 'Marco Chierici, Alessandro Zandona'
__date__ = '15 December 2016'
......@@ -16,14 +17,14 @@ __date__ = '15 December 2016'
def extract_feats(datafile, rankedfile, outfile):
#print locals()
# table with feats abundances
data = np.loadtxt(datafile, delimiter = '\t', dtype = str)
data = np.loadtxt(datafile, delimiter='\t', dtype=str)
# feats abundances (no names of samples, no header)
data_ab = data[1:,1:].astype(np.float)
rank = np.loadtxt(rankedfile, delimiter = '\t', skiprows = 1, dtype = str)
rank = np.loadtxt(rankedfile, delimiter='\t', skiprows=1, dtype=str)
# number of features in the list
nf_list = rank.shape
if len(nf_list)>1:
if len(nf_list) > 1:
feats = rank[:, 0]
top_feats = feats #[0:nfeat]
else:
......@@ -34,7 +35,7 @@ def extract_feats(datafile, rankedfile, outfile):
# extract top features from table with abundances of all features
idx = []
nfeat = len(top_feats)
for i in range(0, nfeat):
for i in range(nfeat):
if top_feats[i] in data[0,:].tolist():
idx.append(data[0,:].tolist().index(top_feats[i]))
else:
......@@ -45,12 +46,10 @@ def extract_feats(datafile, rankedfile, outfile):
sel_feats = data[:, idx]
# write new table
outw = open(outfile, 'w')
writer = csv.writer(outw, delimiter = '\t', lineterminator = '\n')
for i in range(0, len(sel_feats[:,0])):
writer.writerow(sel_feats[i,:])
outw.close()
with open(outfile, 'w') as outw:
writer = csv.writer(outw, delimiter='\t', lineterminator='\n')
for i in range(len(sel_feats[:,0])):
writer.writerow(sel_feats[i,:])
if __name__ == "__main__":
......
......@@ -4,18 +4,21 @@
from __future__ import division
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib_venn as pltv
import sys
import csv
import os.path
import argparse
import configparser as ConfigParser
import csv
import os.path
import sys
from distutils.version import StrictVersion
import matplotlib
import matplotlib.pyplot as plt
import matplotlib_venn as pltv
import numpy as np
matplotlib.use('Agg')
parser = argparse.ArgumentParser(description='Find the intersection between feature lists and produce Venn diagrams.')
parser.add_argument('CONFIGFILE1', type=str, help='Training experiment configuration file 1 (with info about number of top discriminant features)')
parser.add_argument('CONFIGFILE2', type=str, help='Training experiment configuration file 2 (with info about number of top discriminant features)')
......@@ -102,48 +105,47 @@ if (configfile3 != 'NO'):
# associate to each common feature the position in each lists
#outFile_f1f2=os.path.join(os.path.dirname(OUTFILE),'Intersection_%s_%s.txt' %(title1,title2))
#outw=open(outFile_f1f2, 'w')
outw=open(OUTLIST, 'w')
writer = csv.writer(outw, delimiter = '\t', lineterminator = '\n')
writer.writerow(['Feature', 'Position in %s' %title1, 'Postition in %s' %title2])
for i in range(len(list(f1f2))):
# current feature in intersection
interF = list(f1f2)[i]
# position of current feature in first list
idx_list1 = np.where(feats1==interF)[0][0]
# position of current feature in second list
idx_list2 = np.where(feats2==interF)[0][0]
writer.writerow([list(f1f2)[i], idx_list1+1, idx_list2+1])
outw.close()
if (configfile3 != 'NO'):
# associate to each common feature the position in each lists
outFile_f1f3=os.path.join(os.path.dirname(OUTFILE),'Intersection_%s_%s.txt' %(title1,title3))
outw=open(outFile_f1f3, 'w')
with open(OUTLIST, 'w') as outw:
writer = csv.writer(outw, delimiter = '\t', lineterminator = '\n')
writer.writerow(['Feature', 'Position in %s '%title1, 'Postition in %s ' %title3])
for i in range(len(list(f1f3))):
writer.writerow(['Feature', 'Position in %s' %title1, 'Postition in %s' %title2])
for i in range(len(list(f1f2))):
# current feature in intersection
interF = list(f1f3)[i]
interF = list(f1f2)[i]
# position of current feature in first list
idx_list1 = np.where(feats1==interF)[0][0]
# position of current feature in second list
idx_list3 = np.where(feats3==interF)[0][0]
writer.writerow([list(f1f3)[i], idx_list1+1, idx_list3+1])
outw.close()
idx_list2 = np.where(feats2==interF)[0][0]
writer.writerow([list(f1f2)[i], idx_list1+1, idx_list2+1])
if (configfile3 != 'NO'):
# associate to each common feature the position in each lists
outFile_f1f3=os.path.join(os.path.dirname(OUTFILE),'Intersection_%s_%s.txt' %(title1,title3))
with open(outFile_f1f3, 'w') as outw:
writer = csv.writer(outw, delimiter = '\t', lineterminator = '\n')
writer.writerow(['Feature', 'Position in %s '%title1, 'Postition in %s ' %title3])
for i in range(len(list(f1f3))):
# current feature in intersection
interF = list(f1f3)[i]
# position of current feature in first list
idx_list1 = np.where(feats1==interF)[0][0]
# position of current feature in second list
idx_list3 = np.where(feats3==interF)[0][0]
writer.writerow([list(f1f3)[i], idx_list1+1, idx_list3+1])
outFile_f2f3=os.path.join(os.path.dirname(OUTFILE),'Intersection_%s_%s.txt' %(title2,title3))
outw=open(outFile_f2f3, 'w')
writer = csv.writer(outw, delimiter = '\t', lineterminator = '\n')
writer.writerow(['Feature', 'Position in %s '%title2, 'Postition in %s ' %title3])
for i in range(len(list(f2f3))):
# current feature in intersection
interF = list(f2f3)[i]
# position of current feature in first list
idx_list2 = np.where(feats2==interF)[0][0]
# position of current feature in second list
idx_list3 = np.where(feats3==interF)[0][0]
writer.writerow([list(f2f3)[i], idx_list2+1, idx_list3+1])
outw.close()
with open(outFile_f2f3, 'w') as outw:
writer = csv.writer(outw, delimiter = '\t', lineterminator = '\n')
writer.writerow(['Feature', 'Position in %s '%title2, 'Postition in %s ' %title3])
for i in range(len(list(f2f3))):
# current feature in intersection
interF = list(f2f3)[i]
# position of current feature in first list
idx_list2 = np.where(feats2==interF)[0][0]
# position of current feature in second list
idx_list3 = np.where(feats3==interF)[0][0]
writer.writerow([list(f2f3)[i], idx_list2+1, idx_list3+1])
# plot Venn diagrams
if (configfile3 != 'NO'):
......
......@@ -4,17 +4,17 @@
## Requires Python >= 2.7, mlpy >= 3.5
import numpy as np
import os.path
from input_output import load_data
import performance as perf
from scaling import *
import argparse
import sys
import configparser as ConfigParser
import os.path
import sys
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from distutils.version import StrictVersion
import performance as perf
from extract_topfeats import extract_feats
from input_output import load_data
parser = argparse.ArgumentParser(description='Run a validation experiment using LibLinear.')
parser.add_argument('CONFIGFILE', type=str, help='Training experiment configuration file')
......@@ -53,7 +53,7 @@ BASEFILE = os.path.splitext(TRFILE)[0]
OUTFILE = os.path.join(OUTDIR, os.path.basename(BASEFILE))
# extract the top-ranked NFEATS features from TRAINING set
TR_TOPFEATS = OUTFILE + '_top%s_tr.txt' % NFEATS
TR_TOPFEATS = OUTFILE + '_top%s_tr.txt' % NFEATS
extract_feats(TRFILE, RANK, NFEATS, TR_TOPFEATS)
# extract the top-ranked NFEATS features from VALIDATION set
TS_TOPFEATS = OUTFILE + '_top%s_ts.txt' % NFEATS
......@@ -83,33 +83,28 @@ if TSLABELSFILE is not None:
# write output files
# save MCC_train and MCC_validation
fout = open(OUTFILE + "_MCC_scores.txt", "w")
fout.write("MCC_train\t%.5f\n" % (perf.KCCC_discrete(y_tr, p_tr)))
fout.write("MCC_validation\t%.5f\n" % (perf.KCCC_discrete(y_ts, p_ts)))
fout.close()
with open(OUTFILE + "_MCC_scores.txt", "w") as fout:
fout.write("MCC_train\t%.5f\n" % (perf.KCCC_discrete(y_tr, p_tr)))
fout.write("MCC_validation\t%.5f\n" % (perf.KCCC_discrete(y_ts, p_ts)))
fout = open(OUTFILE + "_TEST_pred_tr.txt", "w")
for i in range(len(sample_names_tr)):
fout.write("%s\t%i\n" % (sample_names_tr[i], p_tr[i]))
fout.close()
with open(OUTFILE + "_TEST_pred_tr.txt", "w") as fout:
for i in range(len(sample_names_tr)):
fout.write("%s\t%i\n" % (sample_names_tr[i], p_tr[i]))
fout = open(OUTFILE + "_TEST_pred_ts.txt", "w")
for i in range(len(sample_names_ts)):
fout.write("%s\t%i\n" % (sample_names_ts[i], p_ts[i]))
fout.close()
with open(OUTFILE + "_TEST_pred_ts.txt", "w") as fout:
for i in range(len(sample_names_ts)):
fout.write("%s\t%i\n" % (sample_names_ts[i], p_ts[i]))
np.savetxt(OUTFILE + "_TEST_signature.txt",
np.array(var_names_tr).reshape(-1,1),
fmt='%s', delimiter='\t')
fout = open(OUTFILE + "_TEST_prob_tr.txt", "w")
fout.write("SAMPLE\tCLASS 0\tCLASS 1\n")
for i in range(len(sample_names_tr)):
fout.write("%s\t%f\t%f\n" % (sample_names_tr[i], prob_tr[i,0], prob_tr[i,1]))
fout.close()
fout = open(OUTFILE + "_TEST_prob_ts.txt", "w")
fout.write("SAMPLE\tCLASS 0\tCLASS 1\n")
for i in range(len(sample_names_ts)):
fout.write("%s\t%f\t%f\n" % (sample_names_ts[i], prob_ts[i,0], prob_ts[i,1]))
fout.close()
with open(OUTFILE + "_TEST_prob_tr.txt", "w") as fout:
fout.write("SAMPLE\tCLASS 0\tCLASS 1\n")
for i in range(len(sample_names_tr)):
fout.write("%s\t%f\t%f\n" % (sample_names_tr[i], prob_tr[i,0], prob_tr[i,1]))
with open(OUTFILE + "_TEST_prob_ts.txt", "w") as fout:
fout.write("SAMPLE\tCLASS 0\tCLASS 1\n")
for i in range(len(sample_names_ts)):
fout.write("%s\t%f\t%f\n" % (sample_names_ts[i], prob_ts[i,0], prob_ts[i,1]))
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment