Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
MPBA
phylogenetic-cnn
Commits
9163c31d
Commit
9163c31d
authored
Apr 28, 2017
by
Diego Fioravanti
Browse files
Initial commit with the original files from the thesis plus datasets and requirement.txt
parents
Changes
90
Expand all
Hide whitespace changes
Inline
Side-by-side
.gitignore
0 → 100644
View file @
9163c31d
# Python
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
env/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*,cover
.hypothesis/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# dotenv
.env
# virtualenv
.venv
venv/
ENV/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# R
# History files
.Rhistory
.Rapp.history
# Session Data files
.RData
# Example code in package build process
*-Ex.R
# Output files from R CMD build
/*.tar.gz
# Output files from R CMD check
/*.Rcheck/
# RStudio files
.Rproj.user/
# produced vignettes
vignettes/*.html
vignettes/*.pdf
# OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
.httr-oauth
# knitr and R markdown default cache directories
/*_cache/
/cache/
# Temporary files created by R markdown
*.utf8.md
*.knit.md
# Various
.idea/
R_scripts/get_coordinates/torgerson_mds.R
0 → 100644
View file @
9163c31d
library
(
phytools
)
require
(
geiger
)
require
(
maps
)
library
(
ape
)
library
(
matrixcalc
)
library
(
igraph
)
library
(
ade4
)
library
(
rgl
)
library
(
vegan
)
library
(
ggplot2
)
library
(
smacof
)
#------------------load---------------------------------#
# The leaves are 307 because the variable unassigned was deleted
tree
<-
read.newick
(
file
=
"~/Desktop/preprocessing308/RAxML_bestTree.raxml_ott.tre"
)
is.rooted
(
tree
)
#Add taxonomy
table_correspondence_id_tax
<-
read.delim
(
"~/Desktop/ThesisData/progettoR/table_correspondence_id_tax.txt"
)
hs
<-
read.delim
(
"~/Desktop/ThesisData/progettoR/FBK_thesis/16S/otu_table_16S_L6_commsamp_HS.txt"
)
names
<-
as.data.frame
(
colnames
(
hs
[
-
c
(
1
,
2
)]))
table_correspondence_id_tax
<-
cbind
(
table_correspondence_id_tax
,
names
)
tree
$
tip.label
index
<-
match
(
as.factor
(
tree
$
tip.label
),
table_correspondence_id_tax
[,
1
])
tree
$
tip.label
<-
table_correspondence_id_tax
[
index
,
4
]
write.tree
(
tree
,
file
=
"raxMl_label.tre"
,
append
=
FALSE
)
#----------Patristic Distance between leaves------------#
D
<-
cophenetic
(
tree
)
is.positive.semi.definite
(
D
)
#FALSE
det
(
D
)
#---------MDS PROCEDURE: MARK LAYER AND JOHN A.RHODES----#
#----------Convertion in Euclidean matrix----------------#
D_eu
<-
sqrt
(
D
)
is.positive.semi.definite
(
D_eu
)
is.euclid
(
as.dist
(
D_eu
),
plot
=
FALSE
,
print
=
FALSE
,
tol
=
1e-07
)
isSymmetric
(
D_eu
)
# F <- diag(dim(D)[1])-(1/dim(D)[1])*(matrix(1,dim(D)[1],dim(D)[1]))
# H <- -0.5*F%*%(D)^2%*%(F)
# isSymmetric(H)
# is.positive.semi.definite(round(H,3))
#
#
# # We have to find a matrix X of dimesion (n-1) x n s.t.
# # H=X^tX that is the Cholesky decomposition
# # It is convinient to compute the eigenvalues
#
# P <- eigen(H)
# R <- P$vectors
# Q <- diag(P$values)
#
#
#
# #configuration of points in the a euclidean space of dimension n-1 by col
# X <- (Q[-dim(Q)[1],])^(0.5)%*%t(R) #leave out the last eigenvector that correspond to the eigevalue zero
# h <- t(X)%*%X
#
# colnames(X) <- as.factor(row.names(D))
#
#
# for( i in 1:dim(X)[2]){
# if(colnames(X)[i]%in%annotation[,1]){
# colnames(X)[i] <- as.character(annotation[which(annotation[,1]==colnames(X)[i]),2])
# }
# }
# coordinate <- X
# row.names(X) <- (1:dim(X)[1])
# write.table(coordinate,file="coordinateR307.txt", quote = FALSE, sep="\t" ,row.names=T)
# plot(t(X[1:2,]))
#
#
# plot3d(X[1,], X[2,],X[3,],pch=30,size=5, col= 4)
#
# # --------------------- R package for the MDS---------------------------------#
# library("bios2mds")
# S <- mmds(sqrt(D_ucf),pc=250)
# coordinate_bio <- S$coord # scale factor sqrt(308)*X
# scree.plot(S$eigen, lab = FALSE, title = NULL, xlim = NULL,
# ylim = NULL, new.plot = TRUE, pdf.file = NULL)
#----------------------------------MDS------------------------------------------
X
<-
torgerson
(
D_eu
,
p
=
306
)
#307x306
#order variables as in the dataset
index_2
<-
match
(
table_correspondence_id_tax
[,
4
],
as.data.frame
(
row.names
(
X
))[,
1
])
X
<-
X
[
index_2
,]
colnames
(
X
)
<-
1
:
306
write.table
(
X
,
file
=
"coordinates_307x306.txt"
,
quote
=
FALSE
,
sep
=
"\t"
,
row.names
=
T
)
#--------------------- Coordinates for each dataset-------------------------#
#UCf
ucf
<-
read.delim
(
"~/Desktop/ThesisData/progettoR/HS_UCf/Sokol_16S_taxa_HS_UCflare_commsamp_training.txt"
,
stringsAsFactors
=
F
)[,
-1
]
names_ucf
<-
as.data.frame
(
colnames
(
ucf
))
coord_names
<-
as.data.frame
(
row.names
(
X
))
coord_ucf
<-
which
(
names_ucf
[,
1
]
%in%
coord_names
[,
1
])
#delete the coordinates that are not in the dataset
row_del
<-
which
(
coord_names
[,
1
]
%in%
names_ucf
[,
1
]
==
F
)
X_ucf
<-
X
[
-
row_del
,]
PCs
<-
1
:
dim
(
X_ucf
)[
2
]
X_ucf
<-
cbind
(
PCs
,
t
(
X_ucf
))
write.table
(
X_ucf
,
file
=
"coordinates_ucf.txt"
,
quote
=
FALSE
,
sep
=
"\t"
,
row.names
=
F
)
#---------------------------------UCr--------------------------------------------#
ucr
<-
read.delim
(
"~/Desktop/ThesisData/progettoR/HS_UCr/Sokol_16S_taxa_HS_UCr_commsamp_training.txt"
)[
-1
]
names_ucr
<-
as.data.frame
(
colnames
(
ucr
))
index
<-
which
(
names_ucr
[,
1
]
%in%
coord_names
[,
1
])
#delete the coordinates that are not in the dataset
row_del
<-
which
(
coord_names
[,
1
]
%in%
names_ucr
[,
1
]
==
F
)
X_ucr
<-
X
[
-
row_del
,]
PCs
<-
1
:
dim
(
X_ucr
)[
2
]
X_ucr
<-
cbind
(
PCs
,
t
(
X_ucr
))
write.table
(
X_ucr
,
file
=
"coordinates_ucr.txt"
,
quote
=
FALSE
,
sep
=
"\t"
,
row.names
=
F
)
#----------------------------------CDf----------------------------------------#
cdf
<-
read.delim
(
"~/Desktop/ThesisData/progettoR/HS_CDf/Sokol_16S_taxa_HS_CDflare_commsamp_training.txt"
)[
-1
]
names_cdf
<-
as.data.frame
(
colnames
(
cdf
))
index
<-
which
(
names_cdf
[,
1
]
%in%
coord_names
[,
1
])
#delete the coordinates that are not in the dataset
row_del
<-
which
(
coord_names
[,
1
]
%in%
names_cdf
[,
1
]
==
F
)
X_cdf
<-
X
[
-
row_del
,]
PCs
<-
1
:
dim
(
X_cdf
)[
2
]
X_cdf
<-
cbind
(
PCs
,
t
(
X_cdf
))
write.table
(
X_cdf
,
file
=
"coordinates_cdf.txt"
,
quote
=
FALSE
,
sep
=
"\t"
,
row.names
=
F
)
#---------------------------------CDr---------------------------------------------------#
cdr
<-
read.delim
(
"~/Desktop/ThesisData/progettoR/HS_CDr/Sokol_16S_taxa_HS_CDr_commsamp_training.txt"
)[
-1
]
names_cdr
<-
as.data.frame
(
colnames
(
cdr
))
index
<-
which
(
names_cdr
[,
1
]
%in%
coord_names
[,
1
])
#delete the coordinates that are not in the dataset
row_del
<-
which
(
coord_names
[,
1
]
%in%
names_cdr
[,
1
]
==
F
)
X_cdr
<-
X
[
-
row_del
,]
PCs
<-
1
:
dim
(
X_cdr
)[
2
]
X_cdr
<-
cbind
(
PCs
,
t
(
X_cdr
))
write.table
(
X_cdr
,
file
=
"coordinates_cdr.txt"
,
quote
=
FALSE
,
sep
=
"\t"
,
row.names
=
F
)
#----------------------------------iCDf----------------------------------------#
icdf
<-
read.delim
(
"~/Desktop/ThesisData/progettoR/HS_iCDf/Sokol_16S_taxa_HS_iCDflare_commsamp_training.txt"
)[
-1
]
names_icdf
<-
as.data.frame
(
colnames
(
icdf
))
index
<-
which
(
names_icdf
[,
1
]
%in%
coord_names
[,
1
])
#delete the coordinates that are not in the dataset
row_del
<-
which
(
coord_names
[,
1
]
%in%
names_icdf
[,
1
]
==
F
)
X_icdf
<-
X
[
-
row_del
,]
PCs
<-
1
:
dim
(
X_icdf
)[
2
]
X_icdf
<-
cbind
(
PCs
,
t
(
X_icdf
))
write.table
(
X_icdf
,
file
=
"coordinates_icdf.txt"
,
quote
=
FALSE
,
sep
=
"\t"
,
row.names
=
F
)
#---------------------------------iCDr---------------------------------------------------#
icdr
<-
read.delim
(
"~/Desktop/ThesisData/progettoR/HS_iCDr/Sokol_16S_taxa_HS_iCDr_commsamp_training.txt"
)[
-1
]
names_icdr
<-
as.data.frame
(
colnames
(
icdr
))
index
<-
which
(
names_icdr
[,
1
]
%in%
coord_names
[,
1
])
#delete the coordinates that are not in the dataset
row_del
<-
which
(
coord_names
[,
1
]
%in%
names_icdr
[,
1
]
==
F
)
X_icdr
<-
X
[
-
row_del
,]
PCs
<-
1
:
dim
(
X_icdr
)[
2
]
X_icdr
<-
cbind
(
PCs
,
t
(
X_icdr
))
write.table
(
X_icdr
,
file
=
"coordinates_icdr.txt"
,
quote
=
FALSE
,
sep
=
"\t"
,
row.names
=
F
)
\ No newline at end of file
R_scripts/sMDS/example1/example1.R
0 → 100644
View file @
9163c31d
This diff is collapsed.
Click to expand it.
R_scripts/sMDS/example1/global_en_example1_300x6.RData
0 → 100644
View file @
9163c31d
File added
R_scripts/sMDS/example2/example2.R
0 → 100644
View file @
9163c31d
################################################################################
# Aims : creation of synthetic data set to validate the sPCA-rSVD method
# fix the first 2 eigenvector, then construction of the covariance matrix. Compare the eigenvector computed
# using sPCA-rSVD with the true one
# selection of the degree of sparsity using the paramenter tuning
#-------------------------------------------------------------------------------
library
(
"pracma"
)
library
(
"cvTools"
)
library
(
"circular"
)
library
(
"TunePareto"
)
library
(
"ggplot2"
)
sPCA_rSVD
<-
function
(
decomp
,
X
,
j_par
,
type
=
c
(
"SCAD"
,
"soft_thr"
,
"hard_thr"
),
tol
=
1e-13
,
max.iter
=
100
)
{
type
<-
match.arg
(
type
)
#decomp <- svd(X)
v_old
<-
decomp
$
v
[,
1
]
u_old
<-
decomp
$
d
[
1
]
*
decomp
$
u
[,
1
]
#cat("#### v_old ", v_old, " ####\n")
if
(
j_par
==
0
)
{
lambda_par
<-
0
}
else
{
Xvold
<-
sort
(
abs
(
X
%*%
v_old
))
lambda_par
<-
(
Xvold
[
j_par
]
+
Xvold
[(
j_par
+1
)])
/
2
}
h_lambda
<-
switch
(
type
,
soft_thr
=
mcia
:::
h_lambda_soft
,
hard_thr
=
mcia
:::
h_lambda_hard
,
SCAD
=
mcia
:::
h_lambda_SCAD
)
#convergence in norm
norm_diffv
<-
norm_diffu
<-
tol
+1
iter
<-
0
while
((
norm_diffv
>=
tol
||
norm_diffu
>=
tol
)
&&
iter
<
max.iter
)
{
iter
<-
iter
+
1
u_new
<-
h_lambda
(
X
%*%
v_old
,
lambda
=
lambda_par
)
v_new
<-
drop
(
t
(
X
)
%*%
u_new
)
v_new
<-
v_new
/
sqrt
(
crossprod
(
v_new
,
v_new
))
norm_diffv
<-
sqrt
(
crossprod
((
v_new
-
v_old
),(
v_new
-
v_old
)))
norm_diffu
<-
sqrt
(
crossprod
((
u_new
-
u_old
),(
u_new
-
u_old
)))
v_old
<-
v_new
u_old
<-
u_new
}
u
<-
u_new
/
sqrt
(
crossprod
(
u_new
,
u_new
))
res
<-
list
(
v
=
drop
(
v_new
),
u
=
drop
(
u
),
convergence
=
iter
<=
max.iter
)
return
(
res
)
}
#-------------------------------------------------------------------------------
#tuning_j_par K-fold CV Tuning j_par parameter selection
# input:
# X ------ matrix to be decomposed using SVD
# ntimes ------ ntimes number of repetition on CV
# nfold ------ nfold cross validation
# opt_penalty ------ type of penalty to select sparsity
#
# output: type numeric
# j_opt ------ optimal degree of sparsity
# scores ------ matrix of CV scores (nrow=p,ncol=N)
#-------------------------------------------------------------------------------
tuning_j_par
<-
function
(
X
,
ntimes
,
nfold
,
type
,
fun
=
median
,
verbose
=
FALSE
,
...
)
{
nr
<-
nrow
(
X
)
#n
nc
<-
ncol
(
X
)
#p
#CV_score_fold <- rep(0, ntimes)
j_opt_cv
<-
rep
(
0
,
ntimes
)
#matrix saving CV_score for each j in each fold
CV_score
<-
c
()
#matrix(0, nrow=nr, ncol=ntimes)
set.seed
(
1234
)
cv_folds
<-
generateCVRuns
(
1
:
nc
,
ntimes
=
ntimes
,
nfold
=
nfold
,
stratified
=
TRUE
)
CV_temp
<-
rep
(
0
,
ntimes
)
for
(
i
in
1
:
ntimes
)
{
if
(
verbose
)
cat
(
"Repetion: "
,
i
,
"\n"
,
sep
=
""
)
cv_folds_current
<-
cv_folds
[[
i
]]
#array with CV score_j for each l-fold
CV_score_j
<-
matrix
(
0
,
nrow
=
nr
,
ncol
=
nfold
)
for
(
l
in
1
:
nfold
)
{
X_whitout_l
<-
X
[,
-
cv_folds_current
[[
l
]],
drop
=
FALSE
]
X_l
<-
X
[,
cv_folds_current
[[
l
]],
drop
=
FALSE
]
ncj
<-
NCOL
(
X_l
)
decomp
<-
svd
(
X_whitout_l
)
for
(
j
in
0
:
(
nr
-1
))
{
if
(
verbose
)
cat
(
"Number of variables"
,
j
,
"\n"
)
sPCA
<-
sPCA_rSVD
(
decomp
,
X
=
X_whitout_l
,
type
=
type
,
j_par
=
j
,
...
)
u_j
<-
sPCA
$
u
v_j
<-
drop
(
t
(
X_l
)
%*%
u_j
)
#compute single CV score,depend on j and the l-fold
CV_score_j
[
j
+1
,
l
]
<-
(
1
/
(
ncj
*
nr
))
*
sum
((
X_l
-
outer
(
u_j
,
v_j
))
^
2
)
#cat("#### CV_score_j ",CV_score_j , " ####\n")
}
}
CV_score
<-
cbind
(
CV_score
,
apply
(
CV_score_j
[,
1
:
nfold
],
1
,
sum
))
#cat("#### CV_score ",CV_score , " ####\n")
}
# save j responsible for min of CV score
goals
<-
apply
(
CV_score
,
1
,
fun
)
j_opt_cv
<-
which.min
(
goals
)
-
1
if
(
j_opt_cv
==
nr
)
cat
(
"the optimal degree is exactly the length of the vector"
)
res
<-
list
(
j
=
j_opt_cv
,
scores
=
CV_score
)
return
(
res
)
}
# function to create data X from eigenvector and eigenvalues
create_X
<-
function
(
v1
,
v2
,
C
,
n
=
100
,
tol
=
1e-13
,
m
=
1
){
p
<-
length
(
v1
)
# normalize eigenvector
v1
<-
v1
/
sqrt
(
sum
(
v1
*
v1
))
v2
<-
v2
/
sqrt
(
sum
(
v2
*
v2
))
V
<-
matrix
(
0
,
ncol
=
p
,
nrow
=
p
)
V
[,
1
:
2
]
<-
c
(
v1
,
v2
)
# creation of the other 8 eigenvector
for
(
j
in
3
:
ncol
(
V
)){
set.seed
(
j
+
m
)
#fissare la V?
V
[,
j
]
<-
runif
(
p
)
}
# if V is not full rank, recompute it
if
(
det
(
V
)
<
tol
){
for
(
j
in
3
:
ncol
(
V
)){
set.seed
(
j
+1234
+
m
)
V
[,
j
]
<-
runif
(
p
)
}
}
GS_orth
<-
gramSchmidt
(
V
,
tol
=
tol
)
# V = Q %*% R
V
<-
GS_orth
$
Q
#orthogonal matrix
# Z ~ N(0,I_P)
# set.seed(m)
# Z <- matrix(rnorm(n*p), nrow = p, ncol = n)
Z
<-
matrix
(
0
,
nrow
=
p
,
ncol
=
n
)
for
(
j
in
1
:
n
){
set.seed
(
j
+
m
)
Z
[,
j
]
<-
rnorm
(
p
)
}
# generation of the data
X
<-
V
%*%
diag
(
sqrt
(
C
))
%*%
Z
#p x n
return
(
X
)
}
# test sPCA with known degree of sparsity
# generation of l = 100 datasets
l
<-
100
opt_penalty
=
"SCAD"
sPCA_oracle_method
<-
function
(
v1
,
v2
,
C
,
l
=
100
,
opt_penalty
){
p
<-
length
(
v1
)
j1_known
<-
length
(
which
(
v1
==
0
))
j2_known
<-
length
(
which
(
v2
==
0
))
v1_spca
<-
list
()
v2_spca
<-
list
()
for
(
m
in
1
:
l
){
X_temp
<-
create_X
(
v1
,
v2
,
C
,
m
=
m
)
#each matrix is p x n
#component 1
decomp
<-
svd
(
X_temp
)
sPCA_1
<-
sPCA_rSVD
(
decomp
,
X
=
X_temp
,
j_par
=
j1_known
,
type
=
opt_penalty
)
v1_spca
[[
m
]]
<-
sPCA_1
$
u
X_1
<-
X_temp
%*%
tcrossprod
(
sPCA_1
$
v
,
sPCA_1
$
v
)
#cumulative percentage of variance (CPEV)
cum_perc_1
<-
sum
(
diag
(
tcrossprod
(
X_1
,
X_1
)))
/
sum
(
diag
(
tcrossprod
(
X_temp
,
X_temp
)))
lambda_1
<-
cum_perc_1
#component 2
X_new_temp
<-
X_temp
-
sqrt
(
lambda_1
*
sum
(
diag
(
t
(
X_temp
)
%*%
X_temp
)))
*
sPCA_1
$
u
%*%
t
(
sPCA_1
$
v
)
decomp
<-
svd
(
X_new_temp
)
sPCA_2
<-
sPCA_1
<-
sPCA_rSVD
(
decomp
,
X
=
X_new_temp
,
j_par
=
j2_known
,
type
=
opt_penalty
)
v2_spca
[[
m
]]
<-
sPCA_2
$
u
cat
(
m
,
"\n"
)
}
return
(
list
(
v1
=
v1_spca
,
v2
=
v2_spca
))
}
sPCA_CV_method
<-
function
(
v1
,
v2
,
C
,
l
=
100
,
opt_penalty
,
N
){
p
<-
length
(
v1
)
v1_spca
<-
list
()
v2_spca
<-
list
()
for
(
m
in
1
:
l
){
X_temp
<-
create_X
(
v1
,
v2
,
C
,
m
=
m
)
#each matrix is p x n
#solution order 1
j1_opt
<-
tuning_j_par
(
X_temp
,
ntimes
=
5
,
nfold
=
5
,
type
=
opt_penalty
,
fun
=
median
,
verbose
=
FALSE
)
$
j
decomp
<-
svd
(
X_temp
)
sPCA_1
<-
sPCA_rSVD
(
decomp
,
X
=
X_temp
,
j_par
=
j1_opt
,
type
=
opt_penalty
)
v1_spca
[[
m
]]
<-
sPCA_1
$
u
X_1
<-
X_temp
%*%
tcrossprod
(
sPCA_1
$
v
,
sPCA_1
$
v
)
#cumulative percentage of variance (CPEV)
cum_perc_1
<-
sum
(
diag
(
tcrossprod
(
X_1
,
X_1
)))
/
sum
(
diag
(
tcrossprod
(
X_temp
,
X_temp
)))
lambda_1
<-
cum_perc_1
#solution order 2
X_new_temp
<-
X_temp
-
sqrt
(
lambda_1
*
sum
(
diag
(
t
(
X_temp
)
%*%
X_temp
)))
*
sPCA_1
$
u
%*%
t
(
sPCA_1
$
v
)
j2_opt
<-
tuning_j_par
(
X_new_temp
,
ntimes
=
10
,
nfold
=
5
,
type
=
opt_penalty
,
fun
=
median
,
verbose
=
FALSE
)
$
j
decomp
<-
svd
(
X_new_temp
)
sPCA_2
<-
sPCA_rSVD
(
decomp
,
X
=
X_new_temp
,
j_par
=
j2_opt
,
type
=
opt_penalty
)
v2_spca
[[
m
]]
<-
sPCA_2
$
u
cat
(
m
,
"\n"
)
}
return
(
list
(
v1
=
v1_spca
,
j1
=
j1_opt
,
v2
=
v2_spca
,
j2
=
j2_opt
))
}
#compute the median angle
# v_true: vector of the true eigenvector
# v_spca: list of vector, one vector for each simulation (100)
compare_eig
<-
function
(
v_true
,
v_spca
){
v_true
<-
v_true
/
sqrt
(
sum
(
v_true
^
2
))
M
<-
length
(
v_spca
)
# sgn_true <- sign(v_true)
# create list v_spca with signs according to v_true sign
# v_spca_sgn <- list()
# for (h in 1:M){
# sgn_h <- sign(v_spca[[h]])
# if (any(abs(sgn_true-sgn_h)==2)){
# v_spca_sgn[[h]] <- v_spca[[h]]*(-1)
# }
# else v_spca_sgn[[h]] <- v_spca[[h]]
# }
#
# # angle between the 2 vectors
# angle <- vector(length = M)
# for (h in 1:M)
# angle[h] <- acos(sum(v_true * t(v_spca_sgn[[h]])) / (sqrt(sum(v_true^2)) * sqrt(sum(v_spca_sgn[[h]]^2))))
# angle between the 2 vectors
angle
<-
vector
(
length
=
M
)
for
(
h
in
1
:
M
){
true_angle
<-
acos
(
sum
(
v_true
*
t
(
v_spca
[[
h
]]))
/
(
sqrt
(
sum
(
v_true
^
2
))
*
sqrt
(
sum
(
v_spca
[[
h
]]
^
2
))))
angle
[
h
]
<-
min
(
abs
(
pi
-
true_angle
),
abs
(
true_angle
))
}
median_angle
<-
median
(
circular
(
x
=
angle
))
#percentage of correctly identified zero loadings
correct
<-
vector
(
length
=
M
)
correct_perc
<-
vector
()
#percentage of incorrectly identified zero loadings
incorrect
<-
vector
(
length
=
M
)
incorrect_perc
<-
vector
()
#find zero loadings in v_true
zero_true
<-
which
(
v_true
==
0
)
n_zero_true
<-
length
(
zero_true
)
not_zero_true
<-
which
(
v_true
!=
0
)
n_not_zero_true
<-
length
(
not_zero_true
)
for
(
h
in
1
:
M
)
{
zero_spca_temp
<-
which
(
v_spca
[[
h
]]
==
0
)
not_zero_spca_temp
<-
which
(
v_spca
[[
h
]]
!=
0
)
#number of zero loadings in v_true found also in v_spca
correct
[
h
]
<-
length
(
intersect
(
zero_true
,
zero_spca_temp
))
/
n_zero_true
correct_perc
[
h
]
<-
100
*
correct
[
h
]
#number of loadings set to zero in v_spca but not zero in v_true