[Vinecopula-commits] r117 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Di Aug 11 11:12:00 CEST 2015


Author: tnagler
Date: 2015-08-11 11:11:59 +0200 (Tue, 11 Aug 2015)
New Revision: 117

Modified:
   pkg/R/RVineStructureSelect.r
Log:
* initializeFirstGraph, buildNextGraph: tidy code and make fns run faster using apply-family instead of loops

Modified: pkg/R/RVineStructureSelect.r
===================================================================
--- pkg/R/RVineStructureSelect.r	2015-08-11 08:23:12 UTC (rev 116)
+++ pkg/R/RVineStructureSelect.r	2015-08-11 09:11:59 UTC (rev 117)
@@ -1,7 +1,7 @@
 RVineStructureSelect <- function(data, familyset = NA, type = 0, selectioncrit = "AIC", indeptest = FALSE, 
                                  level = 0.05, trunclevel = NA, progress = FALSE,  weights = NA, rotations = TRUE) { 
-    d <- n <- dim(data)[2]
-    N <- dim(data)[1]
+    d <- ncol(data)
+    N <- nrow(data)
     
     ## sanity checks 
     if (type == 0) 
@@ -30,7 +30,7 @@
     
     ## set variable names and trunclevel if not provided 
     if (is.null(colnames(data))) 
-        colnames(data) <- paste("V", 1:n, sep = "")
+        colnames(data) <- paste("V", 1:d, sep = "")
     if (is.na(trunclevel)) 
         trunclevel <- d
     
@@ -61,7 +61,7 @@
     oldVineGraph <- VineTree
     
     ## estimation in higher trees --------------------------
-    for (i in 2:(n - 1)) {
+    for (i in 2:(d - 1)) {
         # only estimate pair-copulas if not truncated
         if (trunclevel == i - 1) 
             familyset <- 0
@@ -88,31 +88,25 @@
     as.RVM(.RVine)
 }
 
-## full graph with Kendall's tau as edge weights
 initializeFirstGraph <- function(data.univ, weights) {
     
-    # C = cor(data.univ,method='kendall')
-    q <- dim(data.univ)[2]
-#     C <- matrix(rep(1, q * q), ncol = q)
-#     
-#     for (i in 1:(q - 1)) {
-#         for (j in (i + 1):q) {
-#             tau <- fasttau(data.univ[, i], data.univ[, j], weights)
-#             C[i, j] <- tau
-#             C[j, i] <- tau
-#         }
-#     }
-#     rownames(C) <- colnames(C) <- colnames(data.univ)
-    C <- TauMatrix(data = data.univ, weights = weights)
+    ## calculate Kendall's tau
+    taus <- TauMatrix(data = data.univ, weights = weights)
     
-    g <- graph_from_adjacency_matrix(C, mode = "lower", weighted = TRUE, diag = FALSE)
+    ## create graph with Kendall's tau as weights
+    g <- graph_from_adjacency_matrix(taus,
+                                     mode = "lower",
+                                     weighted = TRUE,
+                                     diag = FALSE)
     E(g)$tau <- E(g)$weight
     E(g)$name <- paste(as_edgelist(g)[, 1], as_edgelist(g)[, 2], sep = ",")
     
-    for (i in 1:gsize(g)) {
-        E(g)$conditionedSet[[i]] <- ends(g, i, names = FALSE)
-    }
-    return(g)
+    ## store condition sets
+    E(g)$conditionedSet <- unname(split(ends(g, 1:gsize(g), names = FALSE),
+                                        1:gsize(g)))
+    
+    ## return graph object
+    g
 }
 
 ## find maximum spanning tree/ first vine tree
@@ -307,128 +301,128 @@
 ## initialize graph for next vine tree (possible edges)
 buildNextGraph <- function(oldVineGraph, weights = NA) {
     
-    # EL <- as_edgelist(oldVineGraph)
     d <- gsize(oldVineGraph)
     
-    
+    ## initialize with full graph
     g <- make_full_graph(d)
     V(g)$name <- E(oldVineGraph)$name
     V(g)$conditionedSet <- E(oldVineGraph)$conditionedSet
-    
     if (!is.null(E(oldVineGraph)$conditioningSet)) {
         V(g)$conditioningSet <- E(oldVineGraph)$conditioningSet
     }
     
-    for (i in 1:gsize(g)) {
+    ## get info for all edges
+    out <- lapply(1:gsize(g),
+                  getEdgeInfo, 
+                  g = g, 
+                  oldVineGraph = oldVineGraph,
+                  weights = weights)
+    
+    ## annotate graph (same order as in old version of this function)
+    E(g)$weight          <- sapply(out, function(x) x$tau)
+    E(g)$name            <- sapply(out, function(x) x$name)
+    E(g)$conditionedSet  <- lapply(out, function(x) x$nedSet)
+    E(g)$conditioningSet <- lapply(out, function(x) x$ningSet)
+    E(g)$todel           <- sapply(out, function(x) x$todel)
+    E(g)$tau <- E(g)$weight
+    
+    ## delete edges that are prohibited by the proximity condition
+    g <- delete_edges(g, E(g)[E(g)$todel])
+    
+    ## return new graph
+    g
+}
+
+## function for obtaining edge information
+getEdgeInfo <- function(i, g, oldVineGraph, weights) {
+    
+    ## get edge
+    con <- as.vector(ends(g, i, names = FALSE))
+    temp <- ends(oldVineGraph, con, names = FALSE)
+    
+    ## check for proximity condition
+    ok <- FALSE
+    if ((temp[1, 1] == temp[2, 1]) || (temp[1, 2] == temp[2, 1])) {
+        ok <- TRUE
+        same <- temp[2, 1]
+    } else {
+        if ((temp[1, 1] == temp[2, 2]) || (temp[1, 2] == temp[2, 2])) {
+            ok <- TRUE
+            same <- temp[2, 2]
+        }
+    }
+    
+    ## dummy output
+    tau <- nedSet <- ningSet <- name <- NA
+    todel <- TRUE
+    
+    ## calculate edge info if proximity condition is fulfilled
+    if (ok) {
+        # get data
+        if (temp[1, 1] == same) {
+            zr1 <- E(oldVineGraph)[con[1]]$Copula.CondData.2
+        } else {
+            zr1 <- E(oldVineGraph)[con[1]]$Copula.CondData.1
+        }
+        if (temp[2, 1] == same) {
+            zr2 <- E(oldVineGraph)[con[2]]$Copula.CondData.2
+        } else {
+            zr2 <- E(oldVineGraph)[con[2]]$Copula.CondData.1
+        }
+        if (is.list(zr1)) {
+            zr1a <- as.vector(zr1[[1]])
+            zr2a <- as.vector(zr2[[1]])
+        } else {
+            zr1a <- zr1
+            zr2a <- zr2
+        }
         
-        con <- as.vector(ends(g, i, names = FALSE))
+        # calculate Kendall's tau
+        keine_nas <- !(is.na(zr1a) | is.na(zr2a))
+        tau <- fasttau(zr1a[keine_nas],
+                       zr2a[keine_nas],
+                       weights)
         
-        temp <- ends(oldVineGraph, con, names = FALSE)
+        # get names
+        name.node1 <- strsplit(V(g)[con[1]]$name, split = " *[,;] *")[[1]]
+        name.node2 <- strsplit(V(g)[con[2]]$name, split = " *[,;] *")[[1]]
         
-        ## check for proximity condition
-        ok <- FALSE
-        if ((temp[1, 1] == temp[2, 1]) || (temp[1, 2] == temp[2, 1])) {
-            ok <- TRUE
-            same <- temp[2, 1]
+        # infer conditioned set and conditioning set
+        if (is.list(V(g)[con[1]]$conditionedSet)) {
+            l1 <- c(as.vector(V(g)[con[1]]$conditionedSet[[1]]),
+                    as.vector(V(g)[con[1]]$conditioningSet[[1]]))
+            l2 <- c(as.vector(V(g)[con[2]]$conditionedSet[[1]]),
+                    as.vector(V(g)[con[2]]$conditioningSet[[1]]))
         } else {
-            if ((temp[1, 1] == temp[2, 2]) || (temp[1, 2] == temp[2, 2])) {
-                ok <- TRUE
-                same <- temp[2, 2]
-            }
+            l1 <- c(V(g)[con[1]]$conditionedSet,
+                    V(g)[con[1]]$conditioningSet)
+            l2 <- c(V(g)[con[2]]$conditionedSet,
+                    V(g)[con[2]]$conditioningSet)
         }
+        nedSet <- c(setdiff(l1, l2), setdiff(l2, l1))
+        ningSet <- intersect(l1, l2)
+
+        # set edge name
+        nmdiff <- c(setdiff(name.node1, name.node2),
+                  setdiff(name.node2, name.node1))
+        nmsect <- intersect(name.node1, name.node2)  
+        name <- paste(paste(nmdiff, collapse = ","),
+                      paste(nmsect, collapse = ","),
+                      sep = " ; ")
         
-        ## if proximity condition is fulfilled
-        if (ok) {
-            # other1 <- temp[1, temp[1, ] != same]
-            # other2 <- temp[2, temp[2, ] != same]
-            
-            if (temp[1, 1] == same) {
-                zr1 <- E(oldVineGraph)[con[1]]$Copula.CondData.2
-            } else {
-                zr1 <- E(oldVineGraph)[con[1]]$Copula.CondData.1
-            }
-            
-            if (temp[2, 1] == same) {
-                zr2 <- E(oldVineGraph)[con[2]]$Copula.CondData.2
-            } else {
-                zr2 <- E(oldVineGraph)[con[2]]$Copula.CondData.1
-            }
-            # print(is.list(zr1))
-            if (is.list(zr1)) {
-                zr1a <- as.vector(zr1[[1]])
-                zr2a <- as.vector(zr2[[1]])
-            } else {
-                zr1a <- zr1
-                zr2a <- zr2
-            }
-            keine_nas <- !(is.na(zr1a) | is.na(zr2a))
-            # print(keine_nas) print(zr1a) E(g)[i]$weight = cor(x=zr1[keine_nas],y=zr2[keine_nas], method='kendall')
-            E(g)[i]$weight <- TauMatrix(cbind(zr1a[keine_nas], zr2a[keine_nas]), weights)[2,1]
-            
-            name.node1 <- strsplit(V(g)[con[1]]$name, split = " *[,;] *")[[1]]
-            name.node2 <- strsplit(V(g)[con[2]]$name, split = " *[,;] *")[[1]]
-            
-            ## conditioning set
-            schnitt <- intersect(name.node1, name.node2)      
-#             for (j in 1:length(name.node1)) {
-#                 for (k in 1:length(name.node2)) {
-#                     if (name.node1[j] == name.node2[k]) {
-#                         schnitt <- c(schnitt, name.node1[j])
-#                         name.node1[j] <- ""
-#                         name.node2[k] <- ""
-#                         break
-#                     }
-#                 }
-#             }
-            
-            ## conditioned set
-            differenz <- c(setdiff(name.node1, name.node2), setdiff(name.node2, name.node1))
-#             for (j in 1:length(name.node1)) {
-#                 if (name.node1[j] != "") {
-#                     differenz <- c(differenz, name.node1[j])
-#                 }
-#             }
-#             for (j in 1:length(name.node2)) {
-#                 if (name.node2[j] != "") {
-#                     differenz <- c(differenz, name.node2[j])
-#                 }
-#             }
-            
-            E(g)[i]$name <- paste(paste(differenz, collapse = ","),
-                                  paste(schnitt, collapse = ","),
-                                  sep = " ; ")
-            
-            if (is.list(V(g)[con[1]]$conditionedSet)) {
-                l1 <- c(as.vector(V(g)[con[1]]$conditionedSet[[1]]),
-                        as.vector(V(g)[con[1]]$conditioningSet[[1]]))
-                l2 <- c(as.vector(V(g)[con[2]]$conditionedSet[[1]]),
-                        as.vector(V(g)[con[2]]$conditioningSet[[1]]))
-            } else {
-                l1 <- c(V(g)[con[1]]$conditionedSet,
-                        V(g)[con[1]]$conditioningSet)
-                l2 <- c(V(g)[con[2]]$conditionedSet,
-                        V(g)[con[2]]$conditioningSet)
-            }
-            out <- intern_SchnittDifferenz(l1, l2)
-            
-            suppressWarnings({
-                E(g)$conditionedSet[i] <- list(out$differenz)
-            })
-            suppressWarnings({
-                E(g)$conditioningSet[i] <- list(out$schnitt)
-            })
-        }
-        
-        E(g)[i]$todel <- !ok
+        # mark as ok
+        todel <- FALSE
     }
     
-    E(g)$tau <- E(g)$weight
-    
-    g <- delete_edges(g, E(g)[E(g)$todel])
-    
-    return(g)
+    ## return edge information
+    list(tau = tau,
+         nedSet = nedSet,
+         ningSet = ningSet,
+         name = name,
+         todel = todel)
 }
 
+
 wrapper_fit.ACopula <- function(parameterForACopula, type, ...) {
     return(fit.ACopula(parameterForACopula$zr1, 
                        parameterForACopula$zr2,
@@ -436,35 +430,6 @@
                        ...))
 }
 
-intern_SchnittDifferenz <- function(liste1, liste2) {
-    out <- list()
-    out$schnitt <- intersect(liste1, liste2)
-    out$differenz <- c(setdiff(liste1, liste2), setdiff(liste2, liste1))
-    
-#     for (j in 1:length(liste1)) {
-#         for (k in 1:length(liste2)) {
-#             if (!is.na(liste2[k]) && liste1[j] == liste2[k]) {
-#                 out$schnitt <- c(out$schnitt, liste1[j])
-#                 liste1[j] <- NA
-#                 liste2[k] <- NA
-#                 break
-#             }
-#         }
-#     }
-#     
-#     for (j in 1:length(liste1)) {
-#         if (!is.na(liste1[j])) {
-#             out$differenz <- c(out$differenz, liste1[j])
-#         }
-#     }
-#     for (j in 1:length(liste2)) {
-#         if (!is.na(liste2[j])) {
-#             out$differenz <- c(out$differenz, liste2[j])
-#         }
-#     }
-    
-    return(out)
-}
 
 ## bivariate copula selection
 fit.ACopula <- function(u1, u2, familyset = NA, selectioncrit = "AIC", indeptest = FALSE, level = 0.05, weights = NA) {



Mehr Informationen über die Mailingliste Vinecopula-commits