[Vinecopula-commits] r120 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Do Aug 20 17:04:09 CEST 2015


Author: tnagler
Date: 2015-08-20 17:04:08 +0200 (Thu, 20 Aug 2015)
New Revision: 120

Modified:
   pkg/R/RVineStructureSelect2.R
Log:
* adjust own MST algorithm for full backwards compatibility

Modified: pkg/R/RVineStructureSelect2.R
===================================================================
--- pkg/R/RVineStructureSelect2.R	2015-08-20 13:41:07 UTC (rev 119)
+++ pkg/R/RVineStructureSelect2.R	2015-08-20 15:04:08 UTC (rev 120)
@@ -1,7 +1,7 @@
 RVineStructureSelect2 <- function(data, familyset = NA, type = 0, selectioncrit = "AIC", indeptest = FALSE, level = 0.05, trunclevel = NA, progress = FALSE,  weights = NA, rotations = TRUE) {
     d <- ncol(data)
     n <- nrow(data)
-
+    
     ## sanity checks
     if (type == 0)
         type <- "RVine" else if (type == 1)
@@ -26,27 +26,27 @@
         stop("Selection criterion not implemented.")
     if (level < 0 & level > 1)
         stop("Significance level has to be between 0 and 1.")
-
+    
     ## set variable names and trunclevel if not provided
     if (is.null(colnames(data)))
         colnames(data) <- paste0("V", 1:d)
     if (is.na(trunclevel))
         trunclevel <- d
-
+    
     ## adjust familyset
     if (trunclevel == 0)
         familyset <- 0
     if (rotations)
         familyset <- with_rotations(familyset)
-
+    
     ## initialize object for results
     RVine <- list(Tree = NULL, Graph = NULL)
-
+    
     ## estimation in first tree ----------------------------
     # find optimal tree
     g <- initializeFirstGraph2(data, weights)
-    MST <- findMaximumTauTree2(g,mode = type)
-
+    MST <- findMaximumTauTree2(g, mode = type)
+    
     # estimate pair-copulas
     VineTree <- fit.FirstTreeCopulas2(MST,
                                       data,
@@ -59,7 +59,7 @@
     RVine$Tree[[1]] <- VineTree
     RVine$Graph[[1]] <- g
     oldVineGraph <- VineTree
-
+    
     ## estimation in higher trees --------------------------
     for (i in 2:(d - 1)) {
         # only estimate pair-copulas if not truncated
@@ -81,7 +81,7 @@
         RVine$Tree[[i]] <- VineTree
         RVine$Graph[[i]] <- g
     }
-
+    
     ## free memory and return results as 'RVineMatrix' object
     .RVine <- RVine
     rm(list = ls())
@@ -91,7 +91,7 @@
 initializeFirstGraph2 <- function(data.univ, weights) {
     ## calculate Kendall's tau
     taus <- TauMatrix(data = data.univ, weights = weights)
-
+    
     ## return full graph with tau as weights
     graphFromTauMatrix(taus)
 }
@@ -100,53 +100,60 @@
     ## construct adjency matrix
     A <- adjacencyMatrix(g)
     d <- ncol(A)
-
+    
     if (mode == "RVine") {
         ## initialize
         tree <- NULL
         edges <- matrix(NA, d - 1, 2)
         w <- numeric(d - 1)
         i <- 1
-
+        
         ## construct minimum spanning tree
         for (k in 1:(d - 1)) {
             # add selected edge to tree
             tree <- c(tree, i)
-
+            
             # find edge with minimal weight
             m <- apply(as.matrix(A[, tree]), 2, min)
-            a <- apply(as.matrix(A[, tree]), 2, function(x) order(rank(x)))[1, ]
-            b <- apply(as.matrix(m), 2, function(x) order(rank(x)))[1]
+            cnt <- sum(m == min(m))  # count ties
+            a <- apply(as.matrix(A[, tree]), 2, 
+                       function(x) order(rank(x)))[1, ]
+            b <- order(rank(m))[cnt]
             j <- tree[b]
             i <- a[b]
-
+            
             # store edge and weight
             edges[k, ] <- c(j, i)
             w[k] <- A[i, j]
-
+            
             ## adjust adjecency matrix to prevent loops
             for (t in tree)
                 A[i, t] <- A[t, i] <- Inf
         }
-
+        
         ## reorder edges for backwads compatibility with igraph output
         edges <- t(apply(edges, 1, function(x) sort(x)))
         edges <- edges[order(edges[, 2], edges[, 1]), ]
-
+        
         ## delete unused edges from graph
         E <- g$E$nums
         in.tree <- apply(matrix(edges, ncol = 2), 1,
                          function(x) which((x[1] == E[, 1]) & (x[2] == E[, 2])))
+        if (is.list(in.tree))
+            do.call()
+        MST <- g
         g$E$todel <- rep(TRUE, nrow(E))
-        g$E$todel[in.tree] <- FALSE
-        MST <- deleteEdges(g)
+        if (any(g$E$todel)) {
+            g$E$todel[in.tree] <- FALSE
+            MST <- deleteEdges(g)
+        }
     } else if (mode  == "CVine") {
         ## set root as vertex with minimal sum of weights
         A <- adjacencyMatrix(g)
         diag(A) <- 0
         sumtaus <- rowSums(A)
         root <- which.min(sumtaus)
-
+        
         ## delete unused edges
         g$E$todel <- !((g$E$nums[, 2] == root) | (g$E$nums[, 1] == root))
         MST <- g
@@ -155,7 +162,7 @@
     } else {
         stop("vine not implemented")
     }
-
+    
     ## return result
     MST
 }
@@ -190,11 +197,11 @@
 
 ## fit pair-copulas for the first vine tree
 fit.FirstTreeCopulas2 <- function(MST, data.univ, type, copulaSelectionBy, testForIndependence, testForIndependence.level, weights = NA) {
-
+    
     ## initialize estimation results with empty list
     d <- nrow(MST$E$nums)
     parameterForACopula <- lapply(1:d, function(i) NULL)
-
+    
     ## prepare for estimation and store names
     for (i in 1:d) {
         ## get edge and corresponding data
@@ -203,7 +210,7 @@
         parameterForACopula[[i]]$zr2 <- data.univ[, a[2]]
         MST$E$Copula.Data.1[i] <- list(data.univ[, a[1]])
         MST$E$Copula.Data.2[i] <- list(data.univ[, a[2]])
-
+        
         ## set names for this edge
         if (is.null(MST$V$names[a[1]])) {
             MST$E$Copula.CondName.1[i] <- a[1]
@@ -223,7 +230,7 @@
                                           sep = " , ")
         }
     }
-
+    
     ## estimate parameters and select family
     outForACopula <- lapply(X = parameterForACopula,
                             FUN = wrapper_fit.ACopula,
@@ -232,36 +239,36 @@
                             testForIndependence,
                             testForIndependence.level,
                             weights)
-
+    
     ## store estimated model and pseudo-obversations for next tree
     for (i in 1:d) {
         MST$E$Copula.param[[i]] <- c(outForACopula[[i]]$par,
                                      outForACopula[[i]]$par2)
         MST$E$Copula.type[i] <- outForACopula[[i]]$family
         MST$E$Copula.out[i] <- list(outForACopula[[i]])
-
+        
         MST$E$Copula.CondData.1[i] <- list(outForACopula[[i]]$CondOn.1)
         MST$E$Copula.CondData.2[i] <- list(outForACopula[[i]]$CondOn.2)
     }
-
+    
     ## return results
     MST
 }
 
 ## fit pair-copulas for vine trees 2,...
 fit.TreeCopulas2 <- function(MST, oldVineGraph, type, copulaSelectionBy, testForIndependence, testForIndependence.level, progress, weights = NA) {
-
+    
     ## initialize estimation results with empty list
     d <- nrow(MST$E$nums)
     parameterForACopula <- lapply(1:d, function(i) NULL)
-
-
+    
+    
     ## prepare for estimation
     for (i in 1:d) {
         ## get edge and corresponding data
         con <- MST$E$nums[i, ]
         temp <- oldVineGraph$E$nums[con, ]
-
+        
         ## fetch corresponding data and names
         if ((temp[1, 1] == temp[2, 1]) || (temp[1, 2] == temp[2, 1])) {
             same <- temp[2, 1]
@@ -270,7 +277,7 @@
                 same <- temp[2, 2]
             }
         }
-
+        
         if (temp[1, 1] == same) {
             zr1 <- oldVineGraph$E$Copula.CondData.2[con[1]]
             n1  <- oldVineGraph$E$Copula.CondName.2[con[1]]
@@ -285,7 +292,7 @@
             zr2 <- oldVineGraph$E$Copula.CondData.1[con[2]]
             n2  <- oldVineGraph$E$Copula.CondName.1[con[2]]
         }
-
+        
         if (is.list(zr1)) {
             zr1a <- as.vector(zr1[[1]])
             zr2a <- as.vector(zr2[[1]])
@@ -297,20 +304,20 @@
             n1a <- n1
             n2a <- n2
         }
-
+        
         if (progress == TRUE)
             message(n1a, " + ", n2a, " --> ", MST$E$names[i])
-
+        
         parameterForACopula[[i]]$zr1 <- zr1a
         parameterForACopula[[i]]$zr2 <- zr2a
-
+        
         MST$E$Copula.Data.1[i] <- list(zr1a)
         MST$E$Copula.Data.2[i] <- list(zr2a)
-
+        
         MST$E$Copula.CondName.1[i] <- n1a
         MST$E$Copula.CondName.2[i] <- n2a
     }
-
+    
     ## estimate parameters and select family
     outForACopula <- lapply(X = parameterForACopula,
                             FUN = wrapper_fit.ACopula,
@@ -319,58 +326,58 @@
                             testForIndependence,
                             testForIndependence.level,
                             weights)
-
+    
     ## store estimated model and pseudo-obversations for next tree
     for (i in 1:d) {
         MST$E$Copula.param[[i]] <- c(outForACopula[[i]]$par,
                                      outForACopula[[i]]$par2)
         MST$E$Copula.type[i] <- outForACopula[[i]]$family
         MST$E$Copula.out[i] <- list(outForACopula[[i]])
-
+        
         MST$E$Copula.CondData.1[i] <- list(outForACopula[[i]]$CondOn.1)
         MST$E$Copula.CondData.2[i] <- list(outForACopula[[i]]$CondOn.2)
     }
-
+    
     ## return results
     MST
 }
 
 ## initialize graph for next vine tree (possible edges)
 buildNextGraph2 <- function(oldVineGraph, weights = NA) {
-
+    
     d <- nrow(oldVineGraph$E$nums)
-
+    
     ## initialize with full graph
     g <- makeFullGraph(d)
     g$V$names <- oldVineGraph$E$names
     g$V$conditionedSet <- oldVineGraph$E$conditionedSet
     g$V$conditioningSet <- oldVineGraph$E$conditioningSet
-
+    
     ## get info for all edges
     out <- lapply(1:nrow(g$E$nums),
                   getEdgeInfo2,
                   g = g,
                   oldVineGraph = oldVineGraph,
                   weights = weights)
-
+    
     ## annotate graph (same order as in old version of this function)
     g$E$weights         <- sapply(out, function(x) x$tau)
     g$E$names           <- sapply(out, function(x) x$name)
     g$E$conditionedSet  <- lapply(out, function(x) x$nedSet)
     g$E$conditioningSet <- lapply(out, function(x) x$ningSet)
     g$E$todel           <- sapply(out, function(x) x$todel)
-
+    
     ## delete edges that are prohibited by the proximity condition
     deleteEdges(g)
 }
 
 ## function for obtaining edge information
 getEdgeInfo2 <- function(i, g, oldVineGraph, weights) {
-
+    
     ## get edge
     con <- g$E$nums[i, ]
     temp <- oldVineGraph$E$nums[con, ]
-
+    
     ## check for proximity condition
     ok <- FALSE
     if ((temp[1, 1] == temp[2, 1]) || (temp[1, 2] == temp[2, 1])) {
@@ -382,11 +389,11 @@
             same <- temp[2, 2]
         }
     }
-
+    
     ## dummy output
     tau <- nedSet <- ningSet <- name <- NA
     todel <- TRUE
-
+    
     # info if proximity condition is fulfilled ...
     if (ok) {
         ## get data
@@ -407,15 +414,15 @@
             zr1a <- zr1
             zr2a <- zr2
         }
-
+        
         ## calculate Kendall's tau
         keine_nas <- !(is.na(zr1a) | is.na(zr2a))
         tau <- fasttau(zr1a[keine_nas], zr2a[keine_nas], weights)
-
+        
         ## get names
         name.node1 <- strsplit(g$V$names[con[1]], split = " *[,;] *")[[1]]
         name.node2 <- strsplit(g$V$names[con[2]], split = " *[,;] *")[[1]]
-
+        
         ## infer conditioned set and conditioning set
         l1 <- c(g$V$conditionedSet[[con[1]]],
                 g$V$conditioningSet[[con[1]]])
@@ -423,7 +430,7 @@
                 g$V$conditioningSet[[con[2]]])
         nedSet <- c(setdiff(l1, l2), setdiff(l2, l1))
         ningSet <- intersect(l1, l2)
-
+        
         ## set edge name
         nmdiff <- c(setdiff(name.node1, name.node2),
                     setdiff(name.node2, name.node1))
@@ -431,11 +438,11 @@
         name <- paste(paste(nmdiff, collapse = ","),
                       paste(nmsect, collapse = ","),
                       sep = " ; ")
-
+        
         ## mark as ok
         todel <- FALSE
     }
-
+    
     ## return edge information
     list(tau = tau,
          nedSet = nedSet,
@@ -455,7 +462,7 @@
 
 ## bivariate copula selection
 fit.ACopula <- function(u1, u2, familyset = NA, selectioncrit = "AIC", indeptest = FALSE, level = 0.05, weights = NA) {
-
+    
     ## select family and estimate parameter(s) for the pair copula
     out <- BiCopSelect(u1, u2,
                        familyset,
@@ -464,21 +471,21 @@
                        level,
                        weights = weights,
                        rotations = FALSE)
-
+    
     ## change rotation if family is not symmetric wrt the main diagonal
     if (out$family %in% c(23, 24, 26:30, 124, 224)) {
         out$family <- out$family + 10
     } else if (out$family %in% c(33, 34, 36:40, 134, 234)) {
         out$family <- out$family - 10
     }
-
+    
     ## tawn copulas also have to change type
     if (out$family%/%100 == 1) {
         out$family <- out$family + 100
     } else if (out$family%/%200 == 1) {
         out$family <- out$family - 100
     }
-
+    
     ## store pseudo-observations for estimation in next tree
     out$CondOn.1 <- .C("Hfunc1",
                        as.integer(out$family),
@@ -498,14 +505,14 @@
                        as.double(out$par2),
                        as.double(rep(0, length(u1))),
                        PACKAGE = "VineCopula")[[7]]
-
+    
     ## return results
     out
 }
 
 ## build R-Vine matrix object based on nested set of trees
 as.RVM2 <- function(RVine) {
-
+    
     ## initialize objects
     n <- length(RVine$Tree) + 1
     con <- list()
@@ -513,7 +520,7 @@
     nedSets <- list()
     crspParams <- list()
     crspTypes <- list()
-
+    
     ## get selected pairs, families and estimated parameters
     for (k in 1:(n - 2)) {
         nedSets[[k]]    <- RVine$Tree[[k]]$E$conditionedSet
@@ -527,24 +534,24 @@
     } else {
         nedSets[[n - 1]] <- list(RVine$Tree[[n - 1]]$E$conditionedSet)
     }
-
+    
     ## initialize matrices for RVineMatrix object
     Param <- array(dim = c(n, n))
     Params2 <- array(0, dim = c(n, n))
     Type <- array(dim = c(n, n))
     M <- matrix(NA, n, n)
-
+    
     ## store structure, families and parameters in matrices
     for (k in 1:(n - 1)) {
         w <- nedSets[[n - k]][[1]][1]
-
+        
         M[k, k] <- w
         M[(k + 1), k] <- nedSets[[n - k]][[1]][2]
-
+        
         Param[(k + 1), k]   <- crspParams[[n - k]][[1]][1]
         Params2[(k + 1), k] <- crspParams[[n - k]][[1]][2]
         Type[(k + 1), k]    <- crspTypes[[n - k]][[1]]
-
+        
         if (k == (n - 1)) {
             M[(k + 1), (k + 1)] <- nedSets[[n - k]][[1]][2]
         } else {
@@ -582,11 +589,11 @@
             }
         }
     }
-
+    
     ## clean NAs
     M[is.na(M)] <- 0
     Type[is.na(Type)] <- 0
-
+    
     ## return RVineMatrix object
     RVineMatrix(M, family = Type, par = Param, par2 = Params2, names = nam)
 }
@@ -604,7 +611,7 @@
     E.names <- apply(E, 1, function(x) paste(nms[x[1]],  nms[x[2]], sep = ","))
     # set weights
     w <- tau[upper.tri(tau)]
-
+    
     ## return results
     list(V = list(names = nms,
                   conditionedSet = NULL,
@@ -621,7 +628,7 @@
     E <- cbind(do.call(c, lapply(1:(d-1), function(i) rep(i, d-i))),
                do.call(c, lapply(1:(d-1), function(i) (i+1):d)))
     E <- matrix(E, ncol = 2)
-
+    
     ## output dummy list with edges set
     list(V = list(names = NULL,
                   conditionedSet = NULL,
@@ -638,23 +645,23 @@
     d <- length(g$V$names)
     v.all <- cbind(do.call(c, lapply(1:(d-1), function(i) seq.int(i))),
                    do.call(c, lapply(1:(d-1), function(i) rep(i+1, i))))
-
+    
     ## fnd weight
     vals <- apply(v.all, 1, set_weight, E = g$E)
-
+    
     ## create symmetric matrix of weights
     M <- matrix(0, d, d)
     M[upper.tri(M)] <- vals
     M <- M + t(M)
     diag(M) <- Inf
-
+    
     ## return final matrix
     M
 }
 
 set_weight <- function(x, E) {
     is.edge <- (x[1] == E$nums[, 1]) & (x[2] == E$nums[, 2])
-    if (!any(is.edge)) 1 else (1 - abs(E$weights[which(is.edge)]))
+    if (!any(is.edge)) Inf else (1 - abs(E$weights[which(is.edge)]))
 }
 
 



Mehr Informationen über die Mailingliste Vinecopula-commits