[Vinecopula-commits] r103 - pkg/R

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Do Jul 16 12:28:23 CEST 2015


Author: etobi
Date: 2015-07-16 12:28:23 +0200 (Thu, 16 Jul 2015)
New Revision: 103

Modified:
   pkg/R/RVineStructureSelect.r
Log:
RVineStructureSelect: Adjust to new version of igraph. Tree structure was not selected correctly. igraph function names changed to the names used in the new version. Some small modifications to avoid some for loops and make the code easier to read.

Modified: pkg/R/RVineStructureSelect.r
===================================================================
--- pkg/R/RVineStructureSelect.r	2015-07-01 19:28:15 UTC (rev 102)
+++ pkg/R/RVineStructureSelect.r	2015-07-16 10:28:23 UTC (rev 103)
@@ -46,9 +46,9 @@
     ## estimation in first tree ----------------------------
     # find optimal tree
     g <- initializeFirstGraph(data, weights)
-    mst <- findMaximumTauTree(g, mode = type) 
+    MST <- findMaximumTauTree(g, mode = type) 
     # estimate pair-copulas
-    VineTree <- fit.FirstTreeCopulas(mst, 
+    VineTree <- fit.FirstTreeCopulas(MST, 
                                      data, 
                                      familyset,
                                      selectioncrit,
@@ -67,9 +67,9 @@
             familyset <- 0
         # find optimal tree
         g <- buildNextGraph(VineTree, weights)
-        mst <- findMaximumTauTree(g, mode = type) 
+        MST <- findMaximumTauTree(g, mode = type) 
         # estimate pair-copulas
-        VineTree <- fit.TreeCopulas(mst,
+        VineTree <- fit.TreeCopulas(MST,
                                     VineTree, 
                                     familyset, 
                                     selectioncrit,
@@ -86,49 +86,53 @@
     as.RVM(RVine)
 }
 
+## full graph with Kendall's tau as edge weights
 initializeFirstGraph <- function(data.univ, weights) {
     
     # C = cor(data.univ,method='kendall')
     q <- dim(data.univ)[2]
-    C <- matrix(rep(1, q * q), ncol = q)
+#     C <- matrix(rep(1, q * q), ncol = q)
+#     
+#     for (i in 1:(q - 1)) {
+#         for (j in (i + 1):q) {
+#             tau <- fasttau(data.univ[, i], data.univ[, j], weights)
+#             C[i, j] <- tau
+#             C[j, i] <- tau
+#         }
+#     }
+#     rownames(C) <- colnames(C) <- colnames(data.univ)
+    C <- TauMatrix(data = data.univ, weights = weights)
     
-    for (i in 1:(q - 1)) {
-        for (j in (i + 1):q) {
-            tau <- fasttau(data.univ[, i], data.univ[, j], weights)
-            C[i, j] <- tau
-            C[j, i] <- tau
-        }
-    }
-    rownames(C) <- colnames(C) <- colnames(data.univ)
-    
-    g <- graph.adjacency(C, mode = "lower", weighted = TRUE, diag = FALSE)
+    g <- graph_from_adjacency_matrix(C, mode = "lower", weighted = TRUE, diag = FALSE)
     E(g)$tau <- E(g)$weight
-    E(g)$name <- paste(get.edgelist(g)[, 1], get.edgelist(g)[, 2], sep = ",")
+    E(g)$name <- paste(as_edgelist(g)[, 1], as_edgelist(g)[, 2], sep = ",")
     
-    for (i in 1:ecount(g)) {
-        E(g)$conditionedSet[[i]] <- get.edges(g, i)
+    for (i in 1:gsize(g)) {
+        E(g)$conditionedSet[[i]] <- ends(g, i, names = FALSE)
     }
     return(g)
 }
 
+## find maximum spanning tree/ first vine tree
 findMaximumTauTree <- function(g, mode = "RVine") {
     
     if (mode == "RVine") {
-        return(minimum.spanning.tree(g, weights = 1 - abs(E(g)$weight)))
+        return(mst(g, weights = 1 - abs(E(g)$weight)))
     } else if (mode == "CVine") {
-        M <- abs(get.adjacency(g, attr = "weight", sparse = 0))
+        M <- abs(as_adjacency_matrix(g, attr = "weight", sparse = 0))
         sumtaus <- rowSums(M)
         root <- which.max(sumtaus)
         
-        Ecken <- get.edges(g, 1:ecount(g))
+        Ecken <- ends(g, 1:gsize(g), names = FALSE)
         pos <- Ecken[, 2] == root | Ecken[, 1] == root
         
-        mst <- delete.edges(g, E(g)[!pos])
+        MST <- delete_edges(g, E(g)[!pos])
         
-        return(mst)
+        return(MST)
     }
 }
 
+# not required any longer? Use TauMatrix instead
 fasttau <- function(x, y, weights = NA) {
     if (any(is.na(weights))) {
         m <- length(x)
@@ -155,41 +159,41 @@
     return(ktau)
 }
 
-
-fit.FirstTreeCopulas <- function(mst, data.univ, type, copulaSelectionBy, testForIndependence, testForIndependence.level, weights = NA) {
+## fit pair-copulas for the first vine tree
+fit.FirstTreeCopulas <- function(MST, data.univ, type, copulaSelectionBy, testForIndependence, testForIndependence.level, weights = NA) {
     
-    d <- ecount(mst)
+    d <- gsize(MST)
     
     parameterForACopula <- list()
     
     for (i in 1:d) {
         parameterForACopula[[i]] <- list()
         
-        a <- get.edges(mst, i)
+        a <- ends(MST, i, names = FALSE)
         
         parameterForACopula[[i]]$zr1 <- data.univ[, a[1]]
         parameterForACopula[[i]]$zr2 <- data.univ[, a[2]]
         
-        E(mst)[i]$Copula.Data.1 <- list(data.univ[, a[1]])
-        E(mst)[i]$Copula.Data.2 <- list(data.univ[, a[2]])
+        E(MST)[i]$Copula.Data.1 <- list(data.univ[, a[1]])
+        E(MST)[i]$Copula.Data.2 <- list(data.univ[, a[2]])
         
-        if (is.null(V(mst)[a[1]]$name)) {
-            E(mst)[i]$Copula.CondName.1 <- a[1] 
+        if (is.null(V(MST)[a[1]]$name)) {
+            E(MST)[i]$Copula.CondName.1 <- a[1] 
         } else { 
-            E(mst)[i]$Copula.CondName.1 <- V(mst)[a[1]]$name
+            E(MST)[i]$Copula.CondName.1 <- V(MST)[a[1]]$name
         }
         
-        if (is.null(V(mst)[a[2]]$name)) {
-            E(mst)[i]$Copula.CondName.2 <- a[2] 
+        if (is.null(V(MST)[a[2]]$name)) {
+            E(MST)[i]$Copula.CondName.2 <- a[2] 
         } else {
-            E(mst)[i]$Copula.CondName.2 <- V(mst)[a[2]]$name
+            E(MST)[i]$Copula.CondName.2 <- V(MST)[a[2]]$name
         }
         
-        if (is.null(V(mst)[a[1]]$name) || is.null(V(mst)[a[2]]$name)) {
-            E(mst)[i]$Copula.Name <- paste(a[1], a[2], sep = " , ") 
+        if (is.null(V(MST)[a[1]]$name) || is.null(V(MST)[a[2]]$name)) {
+            E(MST)[i]$Copula.Name <- paste(a[1], a[2], sep = " , ") 
         } else {
-            E(mst)[i]$Copula.Name <- paste(V(mst)[a[1]]$name, 
-                                           V(mst)[a[2]]$name,
+            E(MST)[i]$Copula.Name <- paste(V(MST)[a[1]]$name, 
+                                           V(MST)[a[2]]$name,
                                            sep = " , ")
         }
     }
@@ -202,29 +206,30 @@
                             weights)
     
     for (i in 1:d) {
-        E(mst)$Copula.param[[i]] <- c(outForACopula[[i]]$par,
+        E(MST)$Copula.param[[i]] <- c(outForACopula[[i]]$par,
                                       outForACopula[[i]]$par2)
-        E(mst)[i]$Copula.type <- outForACopula[[i]]$family
-        E(mst)[i]$Copula.out <- list(outForACopula[[i]])
+        E(MST)[i]$Copula.type <- outForACopula[[i]]$family
+        E(MST)[i]$Copula.out <- list(outForACopula[[i]])
         
-        E(mst)[i]$Copula.CondData.1 <- list(outForACopula[[i]]$CondOn.1)
-        E(mst)[i]$Copula.CondData.2 <- list(outForACopula[[i]]$CondOn.2)
+        E(MST)[i]$Copula.CondData.1 <- list(outForACopula[[i]]$CondOn.1)
+        E(MST)[i]$Copula.CondData.2 <- list(outForACopula[[i]]$CondOn.2)
     }
     
-    return(mst)
+    return(MST)
 }
 
-fit.TreeCopulas <- function(mst, oldVineGraph, type, copulaSelectionBy, testForIndependence, testForIndependence.level, progress, weights = NA) {
-    d <- ecount(mst)
+## fit pair-copulas for vine trees 2,...
+fit.TreeCopulas <- function(MST, oldVineGraph, type, copulaSelectionBy, testForIndependence, testForIndependence.level, progress, weights = NA) {
+    d <- gsize(MST)
     
     parameterForACopula <- list()
     
     for (i in 1:d) {
         parameterForACopula[[i]] <- list()
         
-        con <- get.edge(mst, i)
+        con <- as.vector(ends(MST, i, names = FALSE))
         
-        temp <- get.edges(oldVineGraph, con)
+        temp <- ends(oldVineGraph, con, names = FALSE)
         
         if ((temp[1, 1] == temp[2, 1]) || (temp[1, 2] == temp[2, 1])) {
             same <- temp[2, 1]
@@ -234,8 +239,8 @@
             }
         }
         
-        other1 <- temp[1, temp[1, ] != same]
-        other2 <- temp[2, temp[2, ] != same]
+        # other1 <- temp[1, temp[1, ] != same]
+        # other2 <- temp[2, temp[2, ] != same]
         
         if (temp[1, 1] == same) {
             zr1 <- E(oldVineGraph)[con[1]]$Copula.CondData.2
@@ -266,16 +271,16 @@
         }
         
         if (progress == TRUE) 
-            message(n1a, " + ", n2a, " --> ", E(mst)[i]$name)
+            message(n1a, " + ", n2a, " --> ", E(MST)[i]$name)
         
         parameterForACopula[[i]]$zr1 <- zr1a
         parameterForACopula[[i]]$zr2 <- zr2a
         
-        E(mst)[i]$Copula.Data.1 <- list(zr1a)
-        E(mst)[i]$Copula.Data.2 <- list(zr2a)
+        E(MST)[i]$Copula.Data.1 <- list(zr1a)
+        E(MST)[i]$Copula.Data.2 <- list(zr2a)
         
-        E(mst)[i]$Copula.CondName.2 <- n1a
-        E(mst)[i]$Copula.CondName.1 <- n2a
+        E(MST)[i]$Copula.CondName.1 <- n1a
+        E(MST)[i]$Copula.CondName.2 <- n2a
     }
     
     outForACopula <- lapply(X = parameterForACopula,
@@ -286,24 +291,25 @@
                             weights)
     
     for (i in 1:d) {
-        E(mst)$Copula.param[[i]] <- c(outForACopula[[i]]$par, outForACopula[[i]]$par2)
-        E(mst)[i]$Copula.type <- outForACopula[[i]]$family
-        E(mst)[i]$Copula.out <- list(outForACopula[[i]])
+        E(MST)$Copula.param[[i]] <- c(outForACopula[[i]]$par, outForACopula[[i]]$par2)
+        E(MST)[i]$Copula.type <- outForACopula[[i]]$family
+        E(MST)[i]$Copula.out <- list(outForACopula[[i]])
         
-        E(mst)[i]$Copula.CondData.2 <- list(outForACopula[[i]]$CondOn.1)
-        E(mst)[i]$Copula.CondData.1 <- list(outForACopula[[i]]$CondOn.2)
+        E(MST)[i]$Copula.CondData.1 <- list(outForACopula[[i]]$CondOn.1)
+        E(MST)[i]$Copula.CondData.2 <- list(outForACopula[[i]]$CondOn.2)
     }
     
-    return(mst)
+    return(MST)
 }
 
+## initialize graph for next vine tree (possible edges)
 buildNextGraph <- function(oldVineGraph, weights = NA) {
     
-    EL <- get.edgelist(oldVineGraph)
-    d <- ecount(oldVineGraph)
+    # EL <- as_edgelist(oldVineGraph)
+    d <- gsize(oldVineGraph)
     
     
-    g <- graph.full(d)
+    g <- make_full_graph(d)
     V(g)$name <- E(oldVineGraph)$name
     V(g)$conditionedSet <- E(oldVineGraph)$conditionedSet
     
@@ -311,14 +317,14 @@
         V(g)$conditioningSet <- E(oldVineGraph)$conditioningSet
     }
     
-    for (i in 1:ecount(g)) {
+    for (i in 1:gsize(g)) {
         
-        con <- get.edge(g, i)
+        con <- as.vector(ends(g, i, names = FALSE))
         
-        temp <- get.edges(oldVineGraph, con)
+        temp <- ends(oldVineGraph, con, names = FALSE)
         
+        ## check for proximity condition
         ok <- FALSE
-        
         if ((temp[1, 1] == temp[2, 1]) || (temp[1, 2] == temp[2, 1])) {
             ok <- TRUE
             same <- temp[2, 1]
@@ -329,9 +335,10 @@
             }
         }
         
+        ## if proximity condition is fulfilled
         if (ok) {
-            other1 <- temp[1, temp[1, ] != same]
-            other2 <- temp[2, temp[2, ] != same]
+            # other1 <- temp[1, temp[1, ] != same]
+            # other2 <- temp[2, temp[2, ] != same]
             
             if (temp[1, 1] == same) {
                 zr1 <- E(oldVineGraph)[con[1]]$Copula.CondData.2
@@ -354,39 +361,40 @@
             }
             keine_nas <- !(is.na(zr1a) | is.na(zr2a))
             # print(keine_nas) print(zr1a) E(g)[i]$weight = cor(x=zr1[keine_nas],y=zr2[keine_nas], method='kendall')
-            E(g)[i]$weight <- fasttau(zr1a[keine_nas], zr2a[keine_nas], weights)
+            E(g)[i]$weight <- TauMatrix(cbind(zr1a[keine_nas], zr2a[keine_nas]), weights)[2,1]
             
-            name.node1 <- strsplit(V(g)[con[1]]$name, split = " *[,|] *")[[1]]
-            name.node2 <- strsplit(V(g)[con[2]]$name, split = " *[,|] *")[[1]]
+            name.node1 <- strsplit(V(g)[con[1]]$name, split = " *[,;] *")[[1]]
+            name.node2 <- strsplit(V(g)[con[2]]$name, split = " *[,;] *")[[1]]
             
-            schnitt <- c()
+            ## conditioning set
+            schnitt <- intersect(name.node1, name.node2)      
+#             for (j in 1:length(name.node1)) {
+#                 for (k in 1:length(name.node2)) {
+#                     if (name.node1[j] == name.node2[k]) {
+#                         schnitt <- c(schnitt, name.node1[j])
+#                         name.node1[j] <- ""
+#                         name.node2[k] <- ""
+#                         break
+#                     }
+#                 }
+#             }
             
-            for (j in 1:length(name.node1)) {
-                for (k in 1:length(name.node2)) {
-                    if (name.node1[j] == name.node2[k]) {
-                        schnitt <- c(schnitt, name.node1[j])
-                        name.node1[j] <- ""
-                        name.node2[k] <- ""
-                        break
-                    }
-                }
-            }
+            ## conditioned set
+            differenz <- c(setdiff(name.node1, name.node2), setdiff(name.node2, name.node1))
+#             for (j in 1:length(name.node1)) {
+#                 if (name.node1[j] != "") {
+#                     differenz <- c(differenz, name.node1[j])
+#                 }
+#             }
+#             for (j in 1:length(name.node2)) {
+#                 if (name.node2[j] != "") {
+#                     differenz <- c(differenz, name.node2[j])
+#                 }
+#             }
             
-            differenz <- c()
-            for (j in 1:length(name.node1)) {
-                if (name.node1[j] != "") {
-                    differenz <- c(differenz, name.node1[j])
-                }
-            }
-            for (j in 1:length(name.node2)) {
-                if (name.node2[j] != "") {
-                    differenz <- c(differenz, name.node2[j])
-                }
-            }
-            
             E(g)[i]$name <- paste(paste(differenz, collapse = ","),
                                   paste(schnitt, collapse = ","),
-                                  sep = " | ")
+                                  sep = " ; ")
             
             if (is.list(V(g)[con[1]]$conditionedSet)) {
                 l1 <- c(as.vector(V(g)[con[1]]$conditionedSet[[1]]),
@@ -414,7 +422,7 @@
     
     E(g)$tau <- E(g)$weight
     
-    g <- delete.edges(g, E(g)[E(g)$todel])
+    g <- delete_edges(g, E(g)[E(g)$todel])
     
     return(g)
 }
@@ -428,34 +436,35 @@
 
 intern_SchnittDifferenz <- function(liste1, liste2) {
     out <- list()
-    out$schnitt <- c()
-    out$differenz <- c()
+    out$schnitt <- intersect(liste1, liste2)
+    out$differenz <- c(setdiff(liste1, liste2), setdiff(liste2, liste1))
     
-    for (j in 1:length(liste1)) {
-        for (k in 1:length(liste2)) {
-            if (!is.na(liste2[k]) && liste1[j] == liste2[k]) {
-                out$schnitt <- c(out$schnitt, liste1[j])
-                liste1[j] <- NA
-                liste2[k] <- NA
-                break
-            }
-        }
-    }
+#     for (j in 1:length(liste1)) {
+#         for (k in 1:length(liste2)) {
+#             if (!is.na(liste2[k]) && liste1[j] == liste2[k]) {
+#                 out$schnitt <- c(out$schnitt, liste1[j])
+#                 liste1[j] <- NA
+#                 liste2[k] <- NA
+#                 break
+#             }
+#         }
+#     }
+#     
+#     for (j in 1:length(liste1)) {
+#         if (!is.na(liste1[j])) {
+#             out$differenz <- c(out$differenz, liste1[j])
+#         }
+#     }
+#     for (j in 1:length(liste2)) {
+#         if (!is.na(liste2[j])) {
+#             out$differenz <- c(out$differenz, liste2[j])
+#         }
+#     }
     
-    for (j in 1:length(liste1)) {
-        if (!is.na(liste1[j])) {
-            out$differenz <- c(out$differenz, liste1[j])
-        }
-    }
-    for (j in 1:length(liste2)) {
-        if (!is.na(liste2[j])) {
-            out$differenz <- c(out$differenz, liste2[j])
-        }
-    }
-    
     return(out)
 }
 
+## bivariate copula selection
 fit.ACopula <- function(u1, u2, familyset = NA, selectioncrit = "AIC", indeptest = FALSE, level = 0.05, weights = NA) {
     
     ## select family and estimate parameter(s) for the pair copula
@@ -505,6 +514,7 @@
     out
 }
 
+## build R-Vine matrix object based on nested set of trees
 as.RVM <- function(RVine) {
     
     ## initialize objects



Mehr Informationen über die Mailingliste Vinecopula-commits