[Vinecopula-commits] r120 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Do Aug 20 17:04:09 CEST 2015
Author: tnagler
Date: 2015-08-20 17:04:08 +0200 (Thu, 20 Aug 2015)
New Revision: 120
Modified:
pkg/R/RVineStructureSelect2.R
Log:
* adjust own MST algorithm for full backwards compatibility
Modified: pkg/R/RVineStructureSelect2.R
===================================================================
--- pkg/R/RVineStructureSelect2.R 2015-08-20 13:41:07 UTC (rev 119)
+++ pkg/R/RVineStructureSelect2.R 2015-08-20 15:04:08 UTC (rev 120)
@@ -1,7 +1,7 @@
RVineStructureSelect2 <- function(data, familyset = NA, type = 0, selectioncrit = "AIC", indeptest = FALSE, level = 0.05, trunclevel = NA, progress = FALSE, weights = NA, rotations = TRUE) {
d <- ncol(data)
n <- nrow(data)
-
+
## sanity checks
if (type == 0)
type <- "RVine" else if (type == 1)
@@ -26,27 +26,27 @@
stop("Selection criterion not implemented.")
if (level < 0 & level > 1)
stop("Significance level has to be between 0 and 1.")
-
+
## set variable names and trunclevel if not provided
if (is.null(colnames(data)))
colnames(data) <- paste0("V", 1:d)
if (is.na(trunclevel))
trunclevel <- d
-
+
## adjust familyset
if (trunclevel == 0)
familyset <- 0
if (rotations)
familyset <- with_rotations(familyset)
-
+
## initialize object for results
RVine <- list(Tree = NULL, Graph = NULL)
-
+
## estimation in first tree ----------------------------
# find optimal tree
g <- initializeFirstGraph2(data, weights)
- MST <- findMaximumTauTree2(g,mode = type)
-
+ MST <- findMaximumTauTree2(g, mode = type)
+
# estimate pair-copulas
VineTree <- fit.FirstTreeCopulas2(MST,
data,
@@ -59,7 +59,7 @@
RVine$Tree[[1]] <- VineTree
RVine$Graph[[1]] <- g
oldVineGraph <- VineTree
-
+
## estimation in higher trees --------------------------
for (i in 2:(d - 1)) {
# only estimate pair-copulas if not truncated
@@ -81,7 +81,7 @@
RVine$Tree[[i]] <- VineTree
RVine$Graph[[i]] <- g
}
-
+
## free memory and return results as 'RVineMatrix' object
.RVine <- RVine
rm(list = ls())
@@ -91,7 +91,7 @@
initializeFirstGraph2 <- function(data.univ, weights) {
## calculate Kendall's tau
taus <- TauMatrix(data = data.univ, weights = weights)
-
+
## return full graph with tau as weights
graphFromTauMatrix(taus)
}
@@ -100,53 +100,60 @@
## construct adjency matrix
A <- adjacencyMatrix(g)
d <- ncol(A)
-
+
if (mode == "RVine") {
## initialize
tree <- NULL
edges <- matrix(NA, d - 1, 2)
w <- numeric(d - 1)
i <- 1
-
+
## construct minimum spanning tree
for (k in 1:(d - 1)) {
# add selected edge to tree
tree <- c(tree, i)
-
+
# find edge with minimal weight
m <- apply(as.matrix(A[, tree]), 2, min)
- a <- apply(as.matrix(A[, tree]), 2, function(x) order(rank(x)))[1, ]
- b <- apply(as.matrix(m), 2, function(x) order(rank(x)))[1]
+ cnt <- sum(m == min(m)) # count ties
+ a <- apply(as.matrix(A[, tree]), 2,
+ function(x) order(rank(x)))[1, ]
+ b <- order(rank(m))[cnt]
j <- tree[b]
i <- a[b]
-
+
# store edge and weight
edges[k, ] <- c(j, i)
w[k] <- A[i, j]
-
+
## adjust adjecency matrix to prevent loops
for (t in tree)
A[i, t] <- A[t, i] <- Inf
}
-
+
## reorder edges for backwads compatibility with igraph output
edges <- t(apply(edges, 1, function(x) sort(x)))
edges <- edges[order(edges[, 2], edges[, 1]), ]
-
+
## delete unused edges from graph
E <- g$E$nums
in.tree <- apply(matrix(edges, ncol = 2), 1,
function(x) which((x[1] == E[, 1]) & (x[2] == E[, 2])))
+ if (is.list(in.tree))
+ do.call()
+ MST <- g
g$E$todel <- rep(TRUE, nrow(E))
- g$E$todel[in.tree] <- FALSE
- MST <- deleteEdges(g)
+ if (any(g$E$todel)) {
+ g$E$todel[in.tree] <- FALSE
+ MST <- deleteEdges(g)
+ }
} else if (mode == "CVine") {
## set root as vertex with minimal sum of weights
A <- adjacencyMatrix(g)
diag(A) <- 0
sumtaus <- rowSums(A)
root <- which.min(sumtaus)
-
+
## delete unused edges
g$E$todel <- !((g$E$nums[, 2] == root) | (g$E$nums[, 1] == root))
MST <- g
@@ -155,7 +162,7 @@
} else {
stop("vine not implemented")
}
-
+
## return result
MST
}
@@ -190,11 +197,11 @@
## fit pair-copulas for the first vine tree
fit.FirstTreeCopulas2 <- function(MST, data.univ, type, copulaSelectionBy, testForIndependence, testForIndependence.level, weights = NA) {
-
+
## initialize estimation results with empty list
d <- nrow(MST$E$nums)
parameterForACopula <- lapply(1:d, function(i) NULL)
-
+
## prepare for estimation and store names
for (i in 1:d) {
## get edge and corresponding data
@@ -203,7 +210,7 @@
parameterForACopula[[i]]$zr2 <- data.univ[, a[2]]
MST$E$Copula.Data.1[i] <- list(data.univ[, a[1]])
MST$E$Copula.Data.2[i] <- list(data.univ[, a[2]])
-
+
## set names for this edge
if (is.null(MST$V$names[a[1]])) {
MST$E$Copula.CondName.1[i] <- a[1]
@@ -223,7 +230,7 @@
sep = " , ")
}
}
-
+
## estimate parameters and select family
outForACopula <- lapply(X = parameterForACopula,
FUN = wrapper_fit.ACopula,
@@ -232,36 +239,36 @@
testForIndependence,
testForIndependence.level,
weights)
-
+
## store estimated model and pseudo-obversations for next tree
for (i in 1:d) {
MST$E$Copula.param[[i]] <- c(outForACopula[[i]]$par,
outForACopula[[i]]$par2)
MST$E$Copula.type[i] <- outForACopula[[i]]$family
MST$E$Copula.out[i] <- list(outForACopula[[i]])
-
+
MST$E$Copula.CondData.1[i] <- list(outForACopula[[i]]$CondOn.1)
MST$E$Copula.CondData.2[i] <- list(outForACopula[[i]]$CondOn.2)
}
-
+
## return results
MST
}
## fit pair-copulas for vine trees 2,...
fit.TreeCopulas2 <- function(MST, oldVineGraph, type, copulaSelectionBy, testForIndependence, testForIndependence.level, progress, weights = NA) {
-
+
## initialize estimation results with empty list
d <- nrow(MST$E$nums)
parameterForACopula <- lapply(1:d, function(i) NULL)
-
-
+
+
## prepare for estimation
for (i in 1:d) {
## get edge and corresponding data
con <- MST$E$nums[i, ]
temp <- oldVineGraph$E$nums[con, ]
-
+
## fetch corresponding data and names
if ((temp[1, 1] == temp[2, 1]) || (temp[1, 2] == temp[2, 1])) {
same <- temp[2, 1]
@@ -270,7 +277,7 @@
same <- temp[2, 2]
}
}
-
+
if (temp[1, 1] == same) {
zr1 <- oldVineGraph$E$Copula.CondData.2[con[1]]
n1 <- oldVineGraph$E$Copula.CondName.2[con[1]]
@@ -285,7 +292,7 @@
zr2 <- oldVineGraph$E$Copula.CondData.1[con[2]]
n2 <- oldVineGraph$E$Copula.CondName.1[con[2]]
}
-
+
if (is.list(zr1)) {
zr1a <- as.vector(zr1[[1]])
zr2a <- as.vector(zr2[[1]])
@@ -297,20 +304,20 @@
n1a <- n1
n2a <- n2
}
-
+
if (progress == TRUE)
message(n1a, " + ", n2a, " --> ", MST$E$names[i])
-
+
parameterForACopula[[i]]$zr1 <- zr1a
parameterForACopula[[i]]$zr2 <- zr2a
-
+
MST$E$Copula.Data.1[i] <- list(zr1a)
MST$E$Copula.Data.2[i] <- list(zr2a)
-
+
MST$E$Copula.CondName.1[i] <- n1a
MST$E$Copula.CondName.2[i] <- n2a
}
-
+
## estimate parameters and select family
outForACopula <- lapply(X = parameterForACopula,
FUN = wrapper_fit.ACopula,
@@ -319,58 +326,58 @@
testForIndependence,
testForIndependence.level,
weights)
-
+
## store estimated model and pseudo-obversations for next tree
for (i in 1:d) {
MST$E$Copula.param[[i]] <- c(outForACopula[[i]]$par,
outForACopula[[i]]$par2)
MST$E$Copula.type[i] <- outForACopula[[i]]$family
MST$E$Copula.out[i] <- list(outForACopula[[i]])
-
+
MST$E$Copula.CondData.1[i] <- list(outForACopula[[i]]$CondOn.1)
MST$E$Copula.CondData.2[i] <- list(outForACopula[[i]]$CondOn.2)
}
-
+
## return results
MST
}
## initialize graph for next vine tree (possible edges)
buildNextGraph2 <- function(oldVineGraph, weights = NA) {
-
+
d <- nrow(oldVineGraph$E$nums)
-
+
## initialize with full graph
g <- makeFullGraph(d)
g$V$names <- oldVineGraph$E$names
g$V$conditionedSet <- oldVineGraph$E$conditionedSet
g$V$conditioningSet <- oldVineGraph$E$conditioningSet
-
+
## get info for all edges
out <- lapply(1:nrow(g$E$nums),
getEdgeInfo2,
g = g,
oldVineGraph = oldVineGraph,
weights = weights)
-
+
## annotate graph (same order as in old version of this function)
g$E$weights <- sapply(out, function(x) x$tau)
g$E$names <- sapply(out, function(x) x$name)
g$E$conditionedSet <- lapply(out, function(x) x$nedSet)
g$E$conditioningSet <- lapply(out, function(x) x$ningSet)
g$E$todel <- sapply(out, function(x) x$todel)
-
+
## delete edges that are prohibited by the proximity condition
deleteEdges(g)
}
## function for obtaining edge information
getEdgeInfo2 <- function(i, g, oldVineGraph, weights) {
-
+
## get edge
con <- g$E$nums[i, ]
temp <- oldVineGraph$E$nums[con, ]
-
+
## check for proximity condition
ok <- FALSE
if ((temp[1, 1] == temp[2, 1]) || (temp[1, 2] == temp[2, 1])) {
@@ -382,11 +389,11 @@
same <- temp[2, 2]
}
}
-
+
## dummy output
tau <- nedSet <- ningSet <- name <- NA
todel <- TRUE
-
+
# info if proximity condition is fulfilled ...
if (ok) {
## get data
@@ -407,15 +414,15 @@
zr1a <- zr1
zr2a <- zr2
}
-
+
## calculate Kendall's tau
keine_nas <- !(is.na(zr1a) | is.na(zr2a))
tau <- fasttau(zr1a[keine_nas], zr2a[keine_nas], weights)
-
+
## get names
name.node1 <- strsplit(g$V$names[con[1]], split = " *[,;] *")[[1]]
name.node2 <- strsplit(g$V$names[con[2]], split = " *[,;] *")[[1]]
-
+
## infer conditioned set and conditioning set
l1 <- c(g$V$conditionedSet[[con[1]]],
g$V$conditioningSet[[con[1]]])
@@ -423,7 +430,7 @@
g$V$conditioningSet[[con[2]]])
nedSet <- c(setdiff(l1, l2), setdiff(l2, l1))
ningSet <- intersect(l1, l2)
-
+
## set edge name
nmdiff <- c(setdiff(name.node1, name.node2),
setdiff(name.node2, name.node1))
@@ -431,11 +438,11 @@
name <- paste(paste(nmdiff, collapse = ","),
paste(nmsect, collapse = ","),
sep = " ; ")
-
+
## mark as ok
todel <- FALSE
}
-
+
## return edge information
list(tau = tau,
nedSet = nedSet,
@@ -455,7 +462,7 @@
## bivariate copula selection
fit.ACopula <- function(u1, u2, familyset = NA, selectioncrit = "AIC", indeptest = FALSE, level = 0.05, weights = NA) {
-
+
## select family and estimate parameter(s) for the pair copula
out <- BiCopSelect(u1, u2,
familyset,
@@ -464,21 +471,21 @@
level,
weights = weights,
rotations = FALSE)
-
+
## change rotation if family is not symmetric wrt the main diagonal
if (out$family %in% c(23, 24, 26:30, 124, 224)) {
out$family <- out$family + 10
} else if (out$family %in% c(33, 34, 36:40, 134, 234)) {
out$family <- out$family - 10
}
-
+
## tawn copulas also have to change type
if (out$family%/%100 == 1) {
out$family <- out$family + 100
} else if (out$family%/%200 == 1) {
out$family <- out$family - 100
}
-
+
## store pseudo-observations for estimation in next tree
out$CondOn.1 <- .C("Hfunc1",
as.integer(out$family),
@@ -498,14 +505,14 @@
as.double(out$par2),
as.double(rep(0, length(u1))),
PACKAGE = "VineCopula")[[7]]
-
+
## return results
out
}
## build R-Vine matrix object based on nested set of trees
as.RVM2 <- function(RVine) {
-
+
## initialize objects
n <- length(RVine$Tree) + 1
con <- list()
@@ -513,7 +520,7 @@
nedSets <- list()
crspParams <- list()
crspTypes <- list()
-
+
## get selected pairs, families and estimated parameters
for (k in 1:(n - 2)) {
nedSets[[k]] <- RVine$Tree[[k]]$E$conditionedSet
@@ -527,24 +534,24 @@
} else {
nedSets[[n - 1]] <- list(RVine$Tree[[n - 1]]$E$conditionedSet)
}
-
+
## initialize matrices for RVineMatrix object
Param <- array(dim = c(n, n))
Params2 <- array(0, dim = c(n, n))
Type <- array(dim = c(n, n))
M <- matrix(NA, n, n)
-
+
## store structure, families and parameters in matrices
for (k in 1:(n - 1)) {
w <- nedSets[[n - k]][[1]][1]
-
+
M[k, k] <- w
M[(k + 1), k] <- nedSets[[n - k]][[1]][2]
-
+
Param[(k + 1), k] <- crspParams[[n - k]][[1]][1]
Params2[(k + 1), k] <- crspParams[[n - k]][[1]][2]
Type[(k + 1), k] <- crspTypes[[n - k]][[1]]
-
+
if (k == (n - 1)) {
M[(k + 1), (k + 1)] <- nedSets[[n - k]][[1]][2]
} else {
@@ -582,11 +589,11 @@
}
}
}
-
+
## clean NAs
M[is.na(M)] <- 0
Type[is.na(Type)] <- 0
-
+
## return RVineMatrix object
RVineMatrix(M, family = Type, par = Param, par2 = Params2, names = nam)
}
@@ -604,7 +611,7 @@
E.names <- apply(E, 1, function(x) paste(nms[x[1]], nms[x[2]], sep = ","))
# set weights
w <- tau[upper.tri(tau)]
-
+
## return results
list(V = list(names = nms,
conditionedSet = NULL,
@@ -621,7 +628,7 @@
E <- cbind(do.call(c, lapply(1:(d-1), function(i) rep(i, d-i))),
do.call(c, lapply(1:(d-1), function(i) (i+1):d)))
E <- matrix(E, ncol = 2)
-
+
## output dummy list with edges set
list(V = list(names = NULL,
conditionedSet = NULL,
@@ -638,23 +645,23 @@
d <- length(g$V$names)
v.all <- cbind(do.call(c, lapply(1:(d-1), function(i) seq.int(i))),
do.call(c, lapply(1:(d-1), function(i) rep(i+1, i))))
-
+
## fnd weight
vals <- apply(v.all, 1, set_weight, E = g$E)
-
+
## create symmetric matrix of weights
M <- matrix(0, d, d)
M[upper.tri(M)] <- vals
M <- M + t(M)
diag(M) <- Inf
-
+
## return final matrix
M
}
set_weight <- function(x, E) {
is.edge <- (x[1] == E$nums[, 1]) & (x[2] == E$nums[, 2])
- if (!any(is.edge)) 1 else (1 - abs(E$weights[which(is.edge)]))
+ if (!any(is.edge)) Inf else (1 - abs(E$weights[which(is.edge)]))
}
Mehr Informationen über die Mailingliste Vinecopula-commits