[Vinecopula-commits] r103 - pkg/R
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Do Jul 16 12:28:23 CEST 2015
Author: etobi
Date: 2015-07-16 12:28:23 +0200 (Thu, 16 Jul 2015)
New Revision: 103
Modified:
pkg/R/RVineStructureSelect.r
Log:
RVineStructureSelect: Adjust to new version of igraph. Tree structure was not selected correctly. igraph function names changed to the names used in the new version. Some small modifications to avoid some for loops and make the code easier to read.
Modified: pkg/R/RVineStructureSelect.r
===================================================================
--- pkg/R/RVineStructureSelect.r 2015-07-01 19:28:15 UTC (rev 102)
+++ pkg/R/RVineStructureSelect.r 2015-07-16 10:28:23 UTC (rev 103)
@@ -46,9 +46,9 @@
## estimation in first tree ----------------------------
# find optimal tree
g <- initializeFirstGraph(data, weights)
- mst <- findMaximumTauTree(g, mode = type)
+ MST <- findMaximumTauTree(g, mode = type)
# estimate pair-copulas
- VineTree <- fit.FirstTreeCopulas(mst,
+ VineTree <- fit.FirstTreeCopulas(MST,
data,
familyset,
selectioncrit,
@@ -67,9 +67,9 @@
familyset <- 0
# find optimal tree
g <- buildNextGraph(VineTree, weights)
- mst <- findMaximumTauTree(g, mode = type)
+ MST <- findMaximumTauTree(g, mode = type)
# estimate pair-copulas
- VineTree <- fit.TreeCopulas(mst,
+ VineTree <- fit.TreeCopulas(MST,
VineTree,
familyset,
selectioncrit,
@@ -86,49 +86,53 @@
as.RVM(RVine)
}
+## full graph with Kendall's tau as edge weights
initializeFirstGraph <- function(data.univ, weights) {
# C = cor(data.univ,method='kendall')
q <- dim(data.univ)[2]
- C <- matrix(rep(1, q * q), ncol = q)
+# C <- matrix(rep(1, q * q), ncol = q)
+#
+# for (i in 1:(q - 1)) {
+# for (j in (i + 1):q) {
+# tau <- fasttau(data.univ[, i], data.univ[, j], weights)
+# C[i, j] <- tau
+# C[j, i] <- tau
+# }
+# }
+# rownames(C) <- colnames(C) <- colnames(data.univ)
+ C <- TauMatrix(data = data.univ, weights = weights)
- for (i in 1:(q - 1)) {
- for (j in (i + 1):q) {
- tau <- fasttau(data.univ[, i], data.univ[, j], weights)
- C[i, j] <- tau
- C[j, i] <- tau
- }
- }
- rownames(C) <- colnames(C) <- colnames(data.univ)
-
- g <- graph.adjacency(C, mode = "lower", weighted = TRUE, diag = FALSE)
+ g <- graph_from_adjacency_matrix(C, mode = "lower", weighted = TRUE, diag = FALSE)
E(g)$tau <- E(g)$weight
- E(g)$name <- paste(get.edgelist(g)[, 1], get.edgelist(g)[, 2], sep = ",")
+ E(g)$name <- paste(as_edgelist(g)[, 1], as_edgelist(g)[, 2], sep = ",")
- for (i in 1:ecount(g)) {
- E(g)$conditionedSet[[i]] <- get.edges(g, i)
+ for (i in 1:gsize(g)) {
+ E(g)$conditionedSet[[i]] <- ends(g, i, names = FALSE)
}
return(g)
}
+## find maximum spanning tree/ first vine tree
findMaximumTauTree <- function(g, mode = "RVine") {
if (mode == "RVine") {
- return(minimum.spanning.tree(g, weights = 1 - abs(E(g)$weight)))
+ return(mst(g, weights = 1 - abs(E(g)$weight)))
} else if (mode == "CVine") {
- M <- abs(get.adjacency(g, attr = "weight", sparse = 0))
+ M <- abs(as_adjacency_matrix(g, attr = "weight", sparse = 0))
sumtaus <- rowSums(M)
root <- which.max(sumtaus)
- Ecken <- get.edges(g, 1:ecount(g))
+ Ecken <- ends(g, 1:gsize(g), names = FALSE)
pos <- Ecken[, 2] == root | Ecken[, 1] == root
- mst <- delete.edges(g, E(g)[!pos])
+ MST <- delete_edges(g, E(g)[!pos])
- return(mst)
+ return(MST)
}
}
+# not required any longer? Use TauMatrix instead
fasttau <- function(x, y, weights = NA) {
if (any(is.na(weights))) {
m <- length(x)
@@ -155,41 +159,41 @@
return(ktau)
}
-
-fit.FirstTreeCopulas <- function(mst, data.univ, type, copulaSelectionBy, testForIndependence, testForIndependence.level, weights = NA) {
+## fit pair-copulas for the first vine tree
+fit.FirstTreeCopulas <- function(MST, data.univ, type, copulaSelectionBy, testForIndependence, testForIndependence.level, weights = NA) {
- d <- ecount(mst)
+ d <- gsize(MST)
parameterForACopula <- list()
for (i in 1:d) {
parameterForACopula[[i]] <- list()
- a <- get.edges(mst, i)
+ a <- ends(MST, i, names = FALSE)
parameterForACopula[[i]]$zr1 <- data.univ[, a[1]]
parameterForACopula[[i]]$zr2 <- data.univ[, a[2]]
- E(mst)[i]$Copula.Data.1 <- list(data.univ[, a[1]])
- E(mst)[i]$Copula.Data.2 <- list(data.univ[, a[2]])
+ E(MST)[i]$Copula.Data.1 <- list(data.univ[, a[1]])
+ E(MST)[i]$Copula.Data.2 <- list(data.univ[, a[2]])
- if (is.null(V(mst)[a[1]]$name)) {
- E(mst)[i]$Copula.CondName.1 <- a[1]
+ if (is.null(V(MST)[a[1]]$name)) {
+ E(MST)[i]$Copula.CondName.1 <- a[1]
} else {
- E(mst)[i]$Copula.CondName.1 <- V(mst)[a[1]]$name
+ E(MST)[i]$Copula.CondName.1 <- V(MST)[a[1]]$name
}
- if (is.null(V(mst)[a[2]]$name)) {
- E(mst)[i]$Copula.CondName.2 <- a[2]
+ if (is.null(V(MST)[a[2]]$name)) {
+ E(MST)[i]$Copula.CondName.2 <- a[2]
} else {
- E(mst)[i]$Copula.CondName.2 <- V(mst)[a[2]]$name
+ E(MST)[i]$Copula.CondName.2 <- V(MST)[a[2]]$name
}
- if (is.null(V(mst)[a[1]]$name) || is.null(V(mst)[a[2]]$name)) {
- E(mst)[i]$Copula.Name <- paste(a[1], a[2], sep = " , ")
+ if (is.null(V(MST)[a[1]]$name) || is.null(V(MST)[a[2]]$name)) {
+ E(MST)[i]$Copula.Name <- paste(a[1], a[2], sep = " , ")
} else {
- E(mst)[i]$Copula.Name <- paste(V(mst)[a[1]]$name,
- V(mst)[a[2]]$name,
+ E(MST)[i]$Copula.Name <- paste(V(MST)[a[1]]$name,
+ V(MST)[a[2]]$name,
sep = " , ")
}
}
@@ -202,29 +206,30 @@
weights)
for (i in 1:d) {
- E(mst)$Copula.param[[i]] <- c(outForACopula[[i]]$par,
+ E(MST)$Copula.param[[i]] <- c(outForACopula[[i]]$par,
outForACopula[[i]]$par2)
- E(mst)[i]$Copula.type <- outForACopula[[i]]$family
- E(mst)[i]$Copula.out <- list(outForACopula[[i]])
+ E(MST)[i]$Copula.type <- outForACopula[[i]]$family
+ E(MST)[i]$Copula.out <- list(outForACopula[[i]])
- E(mst)[i]$Copula.CondData.1 <- list(outForACopula[[i]]$CondOn.1)
- E(mst)[i]$Copula.CondData.2 <- list(outForACopula[[i]]$CondOn.2)
+ E(MST)[i]$Copula.CondData.1 <- list(outForACopula[[i]]$CondOn.1)
+ E(MST)[i]$Copula.CondData.2 <- list(outForACopula[[i]]$CondOn.2)
}
- return(mst)
+ return(MST)
}
-fit.TreeCopulas <- function(mst, oldVineGraph, type, copulaSelectionBy, testForIndependence, testForIndependence.level, progress, weights = NA) {
- d <- ecount(mst)
+## fit pair-copulas for vine trees 2,...
+fit.TreeCopulas <- function(MST, oldVineGraph, type, copulaSelectionBy, testForIndependence, testForIndependence.level, progress, weights = NA) {
+ d <- gsize(MST)
parameterForACopula <- list()
for (i in 1:d) {
parameterForACopula[[i]] <- list()
- con <- get.edge(mst, i)
+ con <- as.vector(ends(MST, i, names = FALSE))
- temp <- get.edges(oldVineGraph, con)
+ temp <- ends(oldVineGraph, con, names = FALSE)
if ((temp[1, 1] == temp[2, 1]) || (temp[1, 2] == temp[2, 1])) {
same <- temp[2, 1]
@@ -234,8 +239,8 @@
}
}
- other1 <- temp[1, temp[1, ] != same]
- other2 <- temp[2, temp[2, ] != same]
+ # other1 <- temp[1, temp[1, ] != same]
+ # other2 <- temp[2, temp[2, ] != same]
if (temp[1, 1] == same) {
zr1 <- E(oldVineGraph)[con[1]]$Copula.CondData.2
@@ -266,16 +271,16 @@
}
if (progress == TRUE)
- message(n1a, " + ", n2a, " --> ", E(mst)[i]$name)
+ message(n1a, " + ", n2a, " --> ", E(MST)[i]$name)
parameterForACopula[[i]]$zr1 <- zr1a
parameterForACopula[[i]]$zr2 <- zr2a
- E(mst)[i]$Copula.Data.1 <- list(zr1a)
- E(mst)[i]$Copula.Data.2 <- list(zr2a)
+ E(MST)[i]$Copula.Data.1 <- list(zr1a)
+ E(MST)[i]$Copula.Data.2 <- list(zr2a)
- E(mst)[i]$Copula.CondName.2 <- n1a
- E(mst)[i]$Copula.CondName.1 <- n2a
+ E(MST)[i]$Copula.CondName.1 <- n1a
+ E(MST)[i]$Copula.CondName.2 <- n2a
}
outForACopula <- lapply(X = parameterForACopula,
@@ -286,24 +291,25 @@
weights)
for (i in 1:d) {
- E(mst)$Copula.param[[i]] <- c(outForACopula[[i]]$par, outForACopula[[i]]$par2)
- E(mst)[i]$Copula.type <- outForACopula[[i]]$family
- E(mst)[i]$Copula.out <- list(outForACopula[[i]])
+ E(MST)$Copula.param[[i]] <- c(outForACopula[[i]]$par, outForACopula[[i]]$par2)
+ E(MST)[i]$Copula.type <- outForACopula[[i]]$family
+ E(MST)[i]$Copula.out <- list(outForACopula[[i]])
- E(mst)[i]$Copula.CondData.2 <- list(outForACopula[[i]]$CondOn.1)
- E(mst)[i]$Copula.CondData.1 <- list(outForACopula[[i]]$CondOn.2)
+ E(MST)[i]$Copula.CondData.1 <- list(outForACopula[[i]]$CondOn.1)
+ E(MST)[i]$Copula.CondData.2 <- list(outForACopula[[i]]$CondOn.2)
}
- return(mst)
+ return(MST)
}
+## initialize graph for next vine tree (possible edges)
buildNextGraph <- function(oldVineGraph, weights = NA) {
- EL <- get.edgelist(oldVineGraph)
- d <- ecount(oldVineGraph)
+ # EL <- as_edgelist(oldVineGraph)
+ d <- gsize(oldVineGraph)
- g <- graph.full(d)
+ g <- make_full_graph(d)
V(g)$name <- E(oldVineGraph)$name
V(g)$conditionedSet <- E(oldVineGraph)$conditionedSet
@@ -311,14 +317,14 @@
V(g)$conditioningSet <- E(oldVineGraph)$conditioningSet
}
- for (i in 1:ecount(g)) {
+ for (i in 1:gsize(g)) {
- con <- get.edge(g, i)
+ con <- as.vector(ends(g, i, names = FALSE))
- temp <- get.edges(oldVineGraph, con)
+ temp <- ends(oldVineGraph, con, names = FALSE)
+ ## check for proximity condition
ok <- FALSE
-
if ((temp[1, 1] == temp[2, 1]) || (temp[1, 2] == temp[2, 1])) {
ok <- TRUE
same <- temp[2, 1]
@@ -329,9 +335,10 @@
}
}
+ ## if proximity condition is fulfilled
if (ok) {
- other1 <- temp[1, temp[1, ] != same]
- other2 <- temp[2, temp[2, ] != same]
+ # other1 <- temp[1, temp[1, ] != same]
+ # other2 <- temp[2, temp[2, ] != same]
if (temp[1, 1] == same) {
zr1 <- E(oldVineGraph)[con[1]]$Copula.CondData.2
@@ -354,39 +361,40 @@
}
keine_nas <- !(is.na(zr1a) | is.na(zr2a))
# print(keine_nas) print(zr1a) E(g)[i]$weight = cor(x=zr1[keine_nas],y=zr2[keine_nas], method='kendall')
- E(g)[i]$weight <- fasttau(zr1a[keine_nas], zr2a[keine_nas], weights)
+ E(g)[i]$weight <- TauMatrix(cbind(zr1a[keine_nas], zr2a[keine_nas]), weights)[2,1]
- name.node1 <- strsplit(V(g)[con[1]]$name, split = " *[,|] *")[[1]]
- name.node2 <- strsplit(V(g)[con[2]]$name, split = " *[,|] *")[[1]]
+ name.node1 <- strsplit(V(g)[con[1]]$name, split = " *[,;] *")[[1]]
+ name.node2 <- strsplit(V(g)[con[2]]$name, split = " *[,;] *")[[1]]
- schnitt <- c()
+ ## conditioning set
+ schnitt <- intersect(name.node1, name.node2)
+# for (j in 1:length(name.node1)) {
+# for (k in 1:length(name.node2)) {
+# if (name.node1[j] == name.node2[k]) {
+# schnitt <- c(schnitt, name.node1[j])
+# name.node1[j] <- ""
+# name.node2[k] <- ""
+# break
+# }
+# }
+# }
- for (j in 1:length(name.node1)) {
- for (k in 1:length(name.node2)) {
- if (name.node1[j] == name.node2[k]) {
- schnitt <- c(schnitt, name.node1[j])
- name.node1[j] <- ""
- name.node2[k] <- ""
- break
- }
- }
- }
+ ## conditioned set
+ differenz <- c(setdiff(name.node1, name.node2), setdiff(name.node2, name.node1))
+# for (j in 1:length(name.node1)) {
+# if (name.node1[j] != "") {
+# differenz <- c(differenz, name.node1[j])
+# }
+# }
+# for (j in 1:length(name.node2)) {
+# if (name.node2[j] != "") {
+# differenz <- c(differenz, name.node2[j])
+# }
+# }
- differenz <- c()
- for (j in 1:length(name.node1)) {
- if (name.node1[j] != "") {
- differenz <- c(differenz, name.node1[j])
- }
- }
- for (j in 1:length(name.node2)) {
- if (name.node2[j] != "") {
- differenz <- c(differenz, name.node2[j])
- }
- }
-
E(g)[i]$name <- paste(paste(differenz, collapse = ","),
paste(schnitt, collapse = ","),
- sep = " | ")
+ sep = " ; ")
if (is.list(V(g)[con[1]]$conditionedSet)) {
l1 <- c(as.vector(V(g)[con[1]]$conditionedSet[[1]]),
@@ -414,7 +422,7 @@
E(g)$tau <- E(g)$weight
- g <- delete.edges(g, E(g)[E(g)$todel])
+ g <- delete_edges(g, E(g)[E(g)$todel])
return(g)
}
@@ -428,34 +436,35 @@
intern_SchnittDifferenz <- function(liste1, liste2) {
out <- list()
- out$schnitt <- c()
- out$differenz <- c()
+ out$schnitt <- intersect(liste1, liste2)
+ out$differenz <- c(setdiff(liste1, liste2), setdiff(liste2, liste1))
- for (j in 1:length(liste1)) {
- for (k in 1:length(liste2)) {
- if (!is.na(liste2[k]) && liste1[j] == liste2[k]) {
- out$schnitt <- c(out$schnitt, liste1[j])
- liste1[j] <- NA
- liste2[k] <- NA
- break
- }
- }
- }
+# for (j in 1:length(liste1)) {
+# for (k in 1:length(liste2)) {
+# if (!is.na(liste2[k]) && liste1[j] == liste2[k]) {
+# out$schnitt <- c(out$schnitt, liste1[j])
+# liste1[j] <- NA
+# liste2[k] <- NA
+# break
+# }
+# }
+# }
+#
+# for (j in 1:length(liste1)) {
+# if (!is.na(liste1[j])) {
+# out$differenz <- c(out$differenz, liste1[j])
+# }
+# }
+# for (j in 1:length(liste2)) {
+# if (!is.na(liste2[j])) {
+# out$differenz <- c(out$differenz, liste2[j])
+# }
+# }
- for (j in 1:length(liste1)) {
- if (!is.na(liste1[j])) {
- out$differenz <- c(out$differenz, liste1[j])
- }
- }
- for (j in 1:length(liste2)) {
- if (!is.na(liste2[j])) {
- out$differenz <- c(out$differenz, liste2[j])
- }
- }
-
return(out)
}
+## bivariate copula selection
fit.ACopula <- function(u1, u2, familyset = NA, selectioncrit = "AIC", indeptest = FALSE, level = 0.05, weights = NA) {
## select family and estimate parameter(s) for the pair copula
@@ -505,6 +514,7 @@
out
}
+## build R-Vine matrix object based on nested set of trees
as.RVM <- function(RVine) {
## initialize objects
Mehr Informationen über die Mailingliste Vinecopula-commits