[Eventstudies-commits] r315 - pkg/R

Mon May 12 09:06:57 CEST 2014

Author: vikram
Date: 2014-05-12 09:06:57 +0200 (Mon, 12 May 2014)
New Revision: 315

Modified:
   pkg/R/ees.R
Log:
modified ees function to summary.ees function, it takes the output from get.clusters.formatted and provides the summary

Modified: pkg/R/ees.R
===================================================================

--- pkg/R/ees.R	2014-05-10 06:57:27 UTC (rev 314)
+++ pkg/R/ees.R	2014-05-12 07:06:57 UTC (rev 315)
@@ -6,10 +6,7 @@
 
 #----------------------------------------------------------------
 # INPUT:
-# 'input'     : Data series for which extreme events are 
-#               to be identified. More than one series 
-#               is permissble. The 'input' should be in time
-#               series format.
+# 'input'     : Output of get.clusters.formatted
 # 'prob.value': This is the tail value for which event is
 #               to be defined. For eg: prob.value=5 will
 #               consider 5% tail on both sides
@@ -34,37 +31,15 @@
 #     - Clustered, Un-clustered and Both
 #------------------------------------------------------------------
 # NOTE:
-ees <- function(input,prob.value){
+summary.ees <- function(input,prob.value){
   no.var <- NCOL(input)
 
-  #------------------------------------------------
-  # Breaking the function if any input is not given
-  #------------------------------------------------
-  # For one variable
-  # If class of data is not time series
-  class.input <- class(input)%in%c("xts","zoo")
-  if(class.input==FALSE){
-    stop("Input data is not in time series format. Valid 'input' should be of class xts and zoo")
-  }
-  
-  # Converting an xts object to zoo series
-  input.class <- length(which(class(input)%in%"xts"))
-  if(length(input.class)==1){
-    input <- zoo(input)
-  }
-
   #-----------------------------------------
   # Event series: Clustered and un-clustered
   #-----------------------------------------
-  tmp <- get.clusters.formatted(event.series=input,
-                                response.series=input,
-                                probvalue=prob.value,
-                                event.value="nonreturns",
-                                response.value="nonreturns")
-  
-  tail.events <- tmp[which(tmp$left.tail==1 | tmp$right.tail==1),]
-  clustered.tail.events <- tmp[which(tmp$cluster.pattern>1),]
-  unclustered.tail.events <- tmp[-which(tmp$cluster.pattern>1),]
+  tail.events <- input[which(input$left.tail==1 | input$right.tail==1),]
+  clustered.tail.events <- input[which(input$cluster.pattern>1),]
+  unclustered.tail.events <- input[-which(input$cluster.pattern>1),]
   # Left tail data
   left.tail.clustered <- clustered.tail.events[which(clustered.tail.events$left.tail==1),c("event.series","cluster.pattern")]
   left.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$left.tail==1),c("event.series","cluster.pattern")]
@@ -78,10 +53,10 @@
   # Extreme event output
   #---------------------
   # Summary statistics
-  summ.st <- sumstat(input)
+  summ.st <- attr(input,"sumstat")
 
   # Distribtution of events
-  event.dist <- extreme.events.distribution(input,prob.value)
+  event.dist <- attr(input,"extreme.events.distribution")
 
   # Run length distribution
   runlength <- runlength.dist(input,prob.value)
@@ -221,8 +196,8 @@
 get.clusters.formatted <- function(event.series,
                                    response.series,
                                    probvalue=5,
-                                   event.value="returns",
-                                   response.value="returns"){
+                                   event.value="nonreturns",
+                                   response.value="nonreturns"){
    # Getting levels in event format
   tmp <- gen.data(event.series,
                   probvalue=probvalue,
@@ -329,6 +304,8 @@
                         "response.series","cluster.pattern")
 
   # Results
+  attr(tmp.ts, which = "sumstat") <- sumstat(input = event.series)
+  attr(tmp.ts, which = "extreme.events.distribution") <- extreme.events.distribution(input = event.series, gcf.output = tmp.ts, prob.value = probvalue)
   return(tmp.ts)
 }
 
@@ -370,70 +347,6 @@
   return(tmp)
 }
 
-######################
-# Yearly summary stats
-######################
-#----------------------------
-# INPUT:
-# 'input': Data series for which event cluster distribution
-#        is to be calculated;
-# 'prob.value': Probility value for which tail is to be constructed this
-#       value is equivalent to one side tail for eg. if prob.value=5
-#       then we have values of 5% tail on both sides
-# Functions used: yearly.exevent.summary()
-# OUTPUT:
-# Yearly distribution of extreme events
-#----------------------------
-yearly.exevent.dist <- function(input, prob.value){
-  no.var <- NCOL(input)
-  mylist <- list()
-  # Estimating cluster count
-  #--------------------
-  # Formatting clusters
-  #--------------------
-  tmp <- get.clusters.formatted(event.series=input,
-                                response.series=input,
-                                probvalue=prob.value,
-                                event.value="nonreturns",
-                                response.value="nonreturns")
-
-  tmp.res <- yearly.exevent.summary(tmp)
-  tmp.res[is.na(tmp.res)] <- 0
-  # Left and right tail
-  lower.tail.yearly.exevent <- tmp.res[,1:2]
-  upper.tail.yearly.exevent <- tmp.res[,3:4]
-  output <- list()
-  output$lower.tail <- lower.tail.yearly.exevent
-  output$upper.tail <- upper.tail.yearly.exevent
-  mylist <- output
-
-  return(mylist)
-}
-
-#------------------------------------------------
-# Get yearly no. and median for good and bad days
-#------------------------------------------------
-yearly.exevent.summary <- function(tmp){
-  tmp.bad <- tmp[which(tmp[,"left.tail"]==1),]
-  tmp.good <- tmp[which(tmp[,"right.tail"]==1),]
-  # Bad days
-  tmp.bad.y <- apply.yearly(xts(tmp.bad),function(x)nrow(x))
-  tmp.bad.y <- merge(tmp.bad.y,apply.yearly(xts(tmp.bad[,1]),function(x)median(x,na.rm=T)))
-  index(tmp.bad.y) <- as.yearmon(as.Date(substr(index(tmp.bad.y),1,4),"%Y"))
-  # Good days
-  tmp.good.y <- apply.yearly(xts(tmp.good),function(x)nrow(x))
-  tmp.good.y <- merge(tmp.good.y,apply.yearly(xts(tmp.good[,1]),function(x)median(x,na.rm=T)))
-    index(tmp.good.y) <- as.yearmon(as.Date(substr(index(tmp.good.y),1,4),"%Y"))
-  tmp.res <- merge(tmp.bad.y,tmp.good.y)
-  colnames(tmp.res) <- c("number.lowertail","median.lowertail",
-                         "number.uppertail","median.uppertail")
-  output <- as.data.frame(tmp.res)
-  cn <- rownames(output)
-  rownames(output) <- sapply(rownames(output),
-                             function(x)substr(x,nchar(x)-3,nchar(x)))
-  return(output)
-}
-
 #############################
 # Getting event segregation
 # - clustered and unclustered
@@ -455,7 +368,7 @@
 # Distribution of extreme events
 #----------------------------
 
-extreme.events.distribution <- function(input,prob.value){
+extreme.events.distribution <- function(input, gcf.output, prob.value){
   # Creating an empty frame
   no.var <- NCOL(input)
   lower.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6))
@@ -473,7 +386,7 @@
   # Cluster count
   #--------------
   # Non-returns (if it is already in return format)
-  tmp <- get.event.count(input,probvalue=prob.value,
+  tmp <- get.event.count(input, gcf.output, probvalue=prob.value,
                          value="nonreturns")
   lower.tail.dist  <- tmp[1,]
   upper.tail.dist  <- tmp[2,]
@@ -489,57 +402,104 @@
 # Functions used in event count calculation
 get.event.count <- function(series,
                             probvalue=5,
+                            gcf.output,
                             value="returns"){
   # Extracting dataset
   tmp.old <- gen.data(series,probvalue,value)
-  tmp <- get.clusters.formatted(event.series=series,
-                                response.series=series,
-                                probvalue,
-                                event.value=value,
-                                response.value=value)
-  
-  cp <- tmp[,"cluster.pattern"]
+  cp <- gcf.output[,"cluster.pattern"]
   lvl <- as.numeric(levels(as.factor(cp)))
   lvl.use <- lvl[which(lvl>1)]
   # Calculating Total events
   tot.ev.l <- length(which(tmp.old[,"left.tail"]==1))
   tot.ev.r <- length(which(tmp.old[,"right.tail"]==1))
   # Calculating Unclustered events
-  un.clstr.l <- length(which(tmp[,"left.tail"]==1 &
-                             tmp[,"cluster.pattern"]==1))
-  un.clstr.r <- length(which(tmp[,"right.tail"]==1 &
-                             tmp[,"cluster.pattern"]==1))
+  un.clstr.l <- length(which(gcf.output[,"left.tail"]==1 &
+                             gcf.output[,"cluster.pattern"]==1))
+  un.clstr.r <- length(which(gcf.output[,"right.tail"]==1 &
+                             gcf.output[,"cluster.pattern"]==1))
   # Calculating Used clusters
   us.cl.l <- us.cl.r <- NULL
   for(i in 1:length(lvl.use)){
-    tmp1 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] &
-                         tmp[,"left.tail"]==1))*lvl.use[i]
-    tmp2 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] &
-                         tmp[,"right.tail"]==1))*lvl.use[i]
+    tmp1 <- length(which(gcf.output[,"cluster.pattern"]==lvl.use[i] &
+                         gcf.output[,"left.tail"]==1))*lvl.use[i]
+    tmp2 <- length(which(gcf.output[,"cluster.pattern"]==lvl.use[i] &
+                         gcf.output[,"right.tail"]==1))*lvl.use[i]
     us.cl.l <- sum(us.cl.l,tmp1,na.rm=TRUE)
     us.cl.r <- sum(us.cl.r,tmp2,na.rm=TRUE)
   }
 
   # Making a table
   tb <- data.frame(matrix(NA,2,6))
-  colnames(tb) <- c("unclstr","used.clstr","removed.clstr","tot.clstr","tot","tot.used")
+  colnames(tb) <- c("unclustered.events","used.clustered.events","removed.clustered.events","total.clustered.events","total.events","total.used.events")
   rownames(tb) <- c("lower","upper")
-  tb[,"tot"] <- c(tot.ev.l,tot.ev.r)
-  tb[,"unclstr"] <- c(un.clstr.l,un.clstr.r)
-  tb[,"used.clstr"] <- c(us.cl.l,us.cl.r)
-  tb[,"tot.used"] <- tb$unclstr+tb$used.clstr
-  tb[,"tot.clstr"] <- tb$tot-tb$unclstr
-  tb[,"removed.clstr"] <- tb$tot.clstr-tb$used.clstr
+  tb[,"total.events"] <- c(tot.ev.l,tot.ev.r)
+  tb[,"unclustered.events"] <- c(un.clstr.l,un.clstr.r)
+  tb[,"used.clustered.events"] <- c(us.cl.l,us.cl.r)
+  tb[,"total.used.events"] <- tb$unclustered.events+tb$used.clustered.events
+  tb[,"total.clustered.events"] <- tb$total.events-tb$unclustered.events
+  tb[,"removed.clustered.events"] <- tb$total.clustered.events-tb$used.clustered.events
 
   return(tb)
 }
 
+######################
+# Yearly summary stats
+######################
+#----------------------------
+# INPUT:
+# 'input': Output from get.clusters.formatted function
+# 'prob.value': Probility value for which tail is to be constructed this
+#       value is equivalent to one side tail for eg. if prob.value=5
+#       then we have values of 5% tail on both sides
+# Functions used: yearly.exevent.summary()
+# OUTPUT:
+# Yearly distribution of extreme events
+#----------------------------
+yearly.exevent.dist <- function(input, prob.value){
+  mylist <- list()
+  ## Estimating cluster count  
+  tmp.res <- yearly.exevent.summary(input)
+  tmp.res[is.na(tmp.res)] <- 0
+  ## Left and right tail
+  lower.tail.yearly.exevent <- tmp.res[,1:2]
+  upper.tail.yearly.exevent <- tmp.res[,3:4]
+  output <- list()
+  output$lower.tail <- lower.tail.yearly.exevent
+  output$upper.tail <- upper.tail.yearly.exevent
+  mylist <- output
+  return(mylist)
+}
+
+#------------------------------------------------
+# Get yearly no. and median for good and bad days
+#------------------------------------------------
+yearly.exevent.summary <- function(tmp){
+  tmp.bad <- tmp[which(tmp[,"left.tail"]==1),]
+  tmp.good <- tmp[which(tmp[,"right.tail"]==1),]
+  # Bad days
+  tmp.bad.y <- apply.yearly(xts(tmp.bad),function(x)nrow(x))
+  tmp.bad.y <- merge(tmp.bad.y,apply.yearly(xts(tmp.bad[,1]),function(x)median(x,na.rm=T)))
+  index(tmp.bad.y) <- as.yearmon(as.Date(substr(index(tmp.bad.y),1,4),"%Y"))
+  # Good days
+  tmp.good.y <- apply.yearly(xts(tmp.good),function(x)nrow(x))
+  tmp.good.y <- merge(tmp.good.y,apply.yearly(xts(tmp.good[,1]),function(x)median(x,na.rm=T)))
+    index(tmp.good.y) <- as.yearmon(as.Date(substr(index(tmp.good.y),1,4),"%Y"))
+  tmp.res <- merge(tmp.bad.y,tmp.good.y)
+  colnames(tmp.res) <- c("number.lowertail","median.lowertail",
+                         "number.uppertail","median.uppertail")
+  output <- as.data.frame(tmp.res)
+  cn <- rownames(output)
+  rownames(output) <- sapply(rownames(output),
+                             function(x)substr(x,nchar(x)-3,nchar(x)))
+  return(output)
+}
+
 ####################################
 # Quantile values for extreme events
 ####################################
 #-----------------------------------
 # INPUT:
-# 'input': Data series in time series format
+# 'input': Output of get.clusters.formatted
 # Note: The input series expects the input to be in levels not in returns,
 #       if the some the inputs are already in return formats one has to
 #       use the other variable 'already.return.series'
@@ -551,9 +511,8 @@
 #-----------------------------------
 quantile.extreme.values <- function(input, prob.value){
   # Creating an empty frame
-  no.var <- NCOL(input)
-  lower.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6))
-  upper.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6))
+  lower.tail.qnt.value <- data.frame(matrix(NA,nrow=1,ncol=6))
+  upper.tail.qnt.value <- data.frame(matrix(NA,nrow=1,ncol=6))
   colnames(lower.tail.qnt.value) <- c("Min","25%","Median","75%","Max",
                                       "Mean")
   rownames(lower.tail.qnt.value) <- "extreme.events"
@@ -561,24 +520,15 @@
                                       "Mean")
   rownames(upper.tail.qnt.value) <- "extreme.events"
   # Estimating cluster count
-  #--------------------
-  # Formatting clusters
-  #--------------------
-  tmp <- get.clusters.formatted(event.series=input,
-                                response.series=input,
-                                probvalue=prob.value,
-                                event.value="nonreturns",
-                                response.value="nonreturns")
-
   # Left tail
-  tmp.left.tail <- tmp[which(tmp$left.tail==1),
+  tmp.left.tail <- input[which(input$left.tail==1),
                        "event.series"]
   df.left <- t(data.frame(quantile(tmp.left.tail,c(0,0.25,0.5,0.75,1))))
   tmp.left <- round(cbind(df.left,mean(tmp.left.tail)),2)
   rownames(tmp.left) <- "extreme.events"
   colnames(tmp.left) <- c("0%","25%","Median","75%","100%","Mean")
   # Right tail
-  tmp.right.tail <- tmp[which(tmp$right.tail==1),
+  tmp.right.tail <- input[which(input$right.tail==1),
                         "event.series"]
   df.right <- t(data.frame(quantile(tmp.right.tail,c(0,0.25,0.5,0.75,1))))
   tmp.right <- round(cbind(df.right,
@@ -613,29 +563,20 @@
 #-----------------------------------
 runlength.dist <- function(input, prob.value){
 
-   # Creating an empty frame
-  no.var <- NCOL(input)
-  
   # Finding maximum Run length
   # Seed value
   max.runlength <- 0 
   #---------------------------
   # Estimating max. Run length
   #---------------------------
-  tmp <- get.clusters.formatted(event.series=input,
-                                response.series=input,
-                                probvalue=prob.value,
-                                event.value="nonreturns",
-                                response.value="nonreturns")
-
-  tmp.runlength <- get.cluster.distribution(tmp,"event.series")
+  tmp.runlength <- get.cluster.distribution(input,"event.series")
   max.runlength <- max(max.runlength,as.numeric(colnames(tmp.runlength)[NCOL(tmp.runlength)]))
   
   # Generating empty frame
   col.names <- seq(2:max.runlength)+1
-  lower.tail.runlength <- data.frame(matrix(NA,nrow=no.var,
+  lower.tail.runlength <- data.frame(matrix(NA,nrow=1,
                                             ncol=length(col.names)))
-  upper.tail.runlength <- data.frame(matrix(NA,nrow=no.var,
+  upper.tail.runlength <- data.frame(matrix(NA,nrow=1,
                                             ncol=length(col.names)))
   colnames(lower.tail.runlength) <- col.names
   rownames(lower.tail.runlength) <- "clustered.events"
@@ -645,7 +586,7 @@
   #----------------------
   # Run length estimation
   #----------------------
-  tmp.res <- get.cluster.distribution(tmp,"event.series")
+  tmp.res <- get.cluster.distribution(input,"event.series")
   for(j in 1:length(colnames(tmp.res))){
     col.number <- colnames(tmp.res)[j]
     lower.tail.runlength[1,col.number] <- tmp.res[1,col.number]