Commit adf3a331 authored by Nicole Bussola's avatar Nicole Bussola
Browse files

fix conflict

parents 6f4e2688 7507f448
......@@ -4,15 +4,18 @@
# output folder
OUT=results_breast
DATA_FOLDER=data/TCGA_data/Breast/INF
LAYER1=gene
LAYER2=cnv
# prepare output tree
# go!
make all \
OUTBASE=${OUT} \
DATA1=${DATA_FOLDER}/gene_tr.txt \
DATA2=${DATA_FOLDER}/prot_tr.txt \
FILE=${DATA_FOLDER}/gene_prot_tr.txt \
DATA1=${DATA_FOLDER}/${LAYER1}_tr.txt \
DATA2=${DATA_FOLDER}/${LAYER2}_tr.txt \
FILE=${DATA_FOLDER}/${LAYER1}_${LAYER2}_tr.txt \
LABEL=${DATA_FOLDER}/labels_ER_tr.txt \
ENDPOINT=breast_ER_gene_prot
ENDPOINT=breast_ER_${LAYER1}_${LAYER2}
......@@ -4,12 +4,11 @@
## Requires Python >= 2.7, mlpy >= 3.5
from __future__ import division
import numpy as np
import csv
import os.path
from scaling import norm_l2
import mlpy
from mlpy import bootstrap_ci, borda_count, canberra_stability
from input_output import load_data
import performance as perf
import sys
......@@ -107,14 +106,11 @@ if RANK_METHOD == "rankList":
# build FSTEPS according to dataset size
nfeat = x.shape[1]
#ord = np.int(np.log10(nfeat))
#fs = np.empty(0, dtype=np.int)
#for p in range(ord+1):
# fs = np.concatenate( (fs, np.dot(10**p, np.arange(10))) )
#fs = np.unique(fs)[1:]
# cap FSTEPS at 10000 features, if applicable
fs = np.array([5, 10, 25, 50, 75, 100, 500, 1000, 5000, 10000], dtype=np.int)
FSTEPS = fs[ fs <= 10000 ].tolist() if nfeat>10000 else fs[ fs < nfeat ].tolist() + [nfeat]
feature_ranges = [5, 10, 25, 50, 75, 100]
FSTEPS = list()
for percentage in feature_ranges:
k = np.ceil((nfeat * percentage) / 100).astype(np.int)
FSTEPS.append(k)
# prepare output files
metricsf = open(OUTFILE + "_metrics.txt", 'w')
......@@ -236,31 +232,31 @@ ADOR_APPROX = (ASENS / (1 - ASPEC)) / ((1 - ASENS) / ASPEC)
# confidence intervals
NPVCI = []
for i in range(NPV.shape[1]):
NPVCI.append(mlpy.bootstrap_ci(NPV[:, i]))
NPVCI.append(bootstrap_ci(NPV[:, i]))
PPVCI = []
for i in range(PPV.shape[1]):
PPVCI.append(mlpy.bootstrap_ci(PPV[:, i]))
PPVCI.append(bootstrap_ci(PPV[:, i]))
SENSCI = []
for i in range(SENS.shape[1]):
SENSCI.append(mlpy.bootstrap_ci(SENS[:, i]))
SENSCI.append(bootstrap_ci(SENS[:, i]))
SPECCI = []
for i in range(SPEC.shape[1]):
SPECCI.append(mlpy.bootstrap_ci(SPEC[:, i]))
SPECCI.append(bootstrap_ci(SPEC[:, i]))
MCCCI = []
for i in range(MCC.shape[1]):
MCCCI.append(mlpy.bootstrap_ci(MCC[:, i]))
MCCCI.append(bootstrap_ci(MCC[:, i]))
AUCCI = []
for i in range(AUC.shape[1]):
AUCCI.append(mlpy.bootstrap_ci(AUC[:, i]))
AUCCI.append(bootstrap_ci(AUC[:, i]))
DORCI = []
for i in range(DOR.shape[1]):
DORCI.append(mlpy.bootstrap_ci(DOR[:, i]))
DORCI.append(bootstrap_ci(DOR[:, i]))
ACCCI = []
for i in range(ACC.shape[1]):
ACCCI.append(mlpy.bootstrap_ci(ACC[:, i]))
ACCCI.append(bootstrap_ci(ACC[:, i]))
# Borda list
BORDA_ID, _, BORDA_POS = mlpy.borda_count(RANKING)
BORDA_ID, _, BORDA_POS = borda_count(RANKING)
# optimal number of features (yielding max MCC)
opt_feats = FSTEPS[np.argmax(AMCC)]
......@@ -268,7 +264,7 @@ opt_feats = FSTEPS[np.argmax(AMCC)]
STABILITY = []
PR = np.argsort( RANKING )
for ss in FSTEPS:
STABILITY.append( mlpy.canberra_stability(PR, ss) )
STABILITY.append( canberra_stability(PR, ss) )
metrics_w.writerow(["FS_WITH_BEST_MCC", opt_feats])
metrics_w.writerow(["STEP",
......
......@@ -84,7 +84,6 @@ if TSLABELSFILE is not None:
# write output files
# save MCC_train and MCC_validation
fout = open(OUTFILE + "_MCC_scores.txt", "w")
print('ciao')
fout.write("MCC_train\t%.5f\n" % (perf.KCCC_discrete(y_tr, p_tr)))
fout.write("MCC_validation\t%.5f\n" % (perf.KCCC_discrete(y_ts, p_ts)))
fout.close()
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment