[Eventstudies-commits] r60 - in pkg: . R data man vignettes

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Apr 29 22:46:01 CEST 2013


Author: vikram
Date: 2013-04-29 22:46:01 +0200 (Mon, 29 Apr 2013)
New Revision: 60

Added:
   pkg/R/ees.R
   pkg/data/eesData.rda
   pkg/man/ees.Rd
   pkg/man/eesData.Rd
   pkg/vignettes/ees.Rnw
Removed:
   pkg/R/identifyextremeevents.R
   pkg/data/identifyexeventData.rda
   pkg/man/identifyexeventData.Rd
   pkg/man/identifyextremeevents.Rd
   pkg/vignettes/identifyextremeevent.Rnw
Modified:
   pkg/NAMESPACE
Log:
Added new eesPlot functionality

Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE	2013-04-29 14:23:37 UTC (rev 59)
+++ pkg/NAMESPACE	2013-04-29 20:46:01 UTC (rev 60)
@@ -1,3 +1,3 @@
-export(inference.Ecar, phys2eventtime, remap.cumsum, remap.cumprod, remap.event.reindex, identifyextremeevents)
+export(inference.Ecar, phys2eventtime, remap.cumsum, remap.cumprod, remap.event.reindex, ees, eesPlot, deprintize)
 
 

Copied: pkg/R/ees.R (from rev 57, pkg/R/identifyextremeevents.R)
===================================================================
--- pkg/R/ees.R	                        (rev 0)
+++ pkg/R/ees.R	2013-04-29 20:46:01 UTC (rev 60)
@@ -0,0 +1,883 @@
+
+# Total 16 functions
+############################
+# Identifying extreme events
+############################
+# libraries required
+library(zoo)
+#----------------------------------------------------------------
+# INPUT:
+# 'input'     : Data series for which extreme events are 
+#               to be identified. More than one series 
+#               is permissble. The 'input' should be in time
+#               series format.
+# 'prob.value': This is the tail value for which event is
+#               to be defined. For eg: prob.value=5 will
+#               consider 5% tail on both sides
+#-----------------------------------------------------------------
+# OUTPUT:
+# Result will be in a list of 3 with following tables:
+# 1. Summary statistics
+#    a. Summary of whole data-set
+# 2. Lower tail: Extreme event tables
+#    a. Distribution of extreme events
+#    b. Run length distribution
+#    c. Quantile values
+#    d. Yearly distribution
+#    e. Extreme event data
+#     - Clustered, Un-clustered and Both
+# 3. Upper tail: Extreme event tables
+#    a. Distribution of extreme events
+#    b. Run length distribution
+#    c. Quantile values
+#    d. Yearly distribution
+#    e. Extreme event data
+#     - Clustered, Un-clustered and Both
+#------------------------------------------------------------------
+# NOTE:
+ees <- function(input,prob.value){
+  no.var <- NCOL(input)
+
+  #------------------------------------------------
+  # Breaking the function if any input is not given
+  #------------------------------------------------
+  # For one variable
+  # If class of data is not time series
+  class.input <- class(input)%in%c("xts","zoo")
+  if(class.input==FALSE){
+    stop("Input data is not in time series format. Valid 'input' should be of class xts and zoo")
+  }
+  
+  # Converting an xts object to zoo series
+  input.class <- length(which(class(input)%in%"xts"))
+  if(length(input.class)==1){
+    input <- zoo(input)
+  }
+
+  #-----------------------------------------
+  # Event series: Clustered and un-clustered
+  #-----------------------------------------
+  tmp <- get.clusters.formatted(event.series=input,
+                                response.series=input,
+                                probvalue=prob.value,
+                                event.value="nonreturns",
+                                response.value="nonreturns")
+  tail.events <- tmp[which(tmp$left.tail==1 | tmp$right.tail==1),]
+  clustered.tail.events <- tmp[which(tmp$cluster.pattern>1),]
+  unclustered.tail.events <- tmp[-which(tmp$cluster.pattern>1),]
+  # Left tail data
+  left.tail.clustered <- clustered.tail.events[which(clustered.tail.events$left.tail==1),c("event.series","cluster.pattern")]
+  left.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$left.tail==1),c("event.series","cluster.pattern")]
+  left.all <- tail.events[which(tail.events$left.tail==1),c("event.series","cluster.pattern")]
+  # Right tail data
+  right.tail.clustered <- clustered.tail.events[which(clustered.tail.events$right.tail==1),c("event.series","cluster.pattern")]
+  right.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$right.tail==1),c("event.series","cluster.pattern")]
+  right.all <- tail.events[which(tail.events$right.tail==1),c("event.series","cluster.pattern")]
+  
+  #---------------------
+  # Extreme event output
+  #---------------------
+  # Summary statistics
+  summ.st <- sumstat(input)
+
+  # Distribtution of events
+  event.dist <- extreme.events.distribution(input,prob.value)
+
+  # Run length distribution
+  runlength <- runlength.dist(input,prob.value)
+
+  # Quantile extreme values 
+  qnt.values <- quantile.extreme.values(input,prob.value)
+
+  # Yearly distribution of extreme event dates
+  yearly.exevent <- yearly.exevent.dist(input,prob.value)
+
+  #---------------------
+  # Compiling the output
+  #---------------------
+  output <- lower.tail <- upper.tail <- list()
+  # Compiling lower tail and upper tail separately
+  # Lower tail
+  lower.tail$data <- list(left.all,left.tail.clustered,
+                          left.tail.unclustered)
+  names(lower.tail$data) <- c("All","Clustered","Unclustered")
+  lower.tail$extreme.event.distribution <- event.dist$lower.tail
+  lower.tail$runlength <- runlength$lower.tail
+  lower.tail$quantile.values <- qnt.values$lower.tail
+  lower.tail$yearly.extreme.event <- yearly.exevent$lower.tail
+  # Upper tail
+  upper.tail$data <- list(right.all,right.tail.clustered,
+                          right.tail.unclustered)
+  names(upper.tail$data) <- c("All","Clustered","Unclustered")
+  upper.tail$extreme.event.distribution <- event.dist$upper.tail
+  upper.tail$runlength <- runlength$upper.tail
+  upper.tail$quantile.values <- qnt.values$upper.tail
+  upper.tail$yearly.extreme.event <- yearly.exevent$upper.tail
+  # Output
+  output$data.summary <- summ.st
+  output$lower.tail <- lower.tail
+  output$upper.tail <- upper.tail
+  return(output)
+}
+
+########################################
+# Functions used for formatting clusters
+########################################
+#------------------------
+# Categorzing tail events
+# for ES analysis
+#------------------------
+# Generates returns for the series
+# Mark left tail, right tail events
+gen.data <- function(d,probvalue,value="nonreturns"){
+  res <- data.frame(dates=index(d),value=coredata(d))
+  if(value=="returns"){
+    res$returns <- c(NA,coredata(diff(log(d))*100))
+  }else{
+    res$returns <- d
+  }
+  pval <- c(probvalue/100,(1-(probvalue/100)))
+  pval <- quantile(res$returns,prob=pval,na.rm=TRUE)
+  res$left.tail <- as.numeric(res$returns < pval[1])
+  res$right.tail <- as.numeric(res$returns > pval[2])
+  res$both.tails <- res$left.tail + res$right.tail
+  res <- res[complete.cases(res),]
+  if(value=="returns"){
+    return(res[-1,])
+  }else{
+    return(res)
+  }
+}
+
+
+#-------------------
+# Summarise patterns
+summarise.rle <- function(oneseries){
+  tp <- rle(oneseries)
+  tp1 <- data.frame(tp$lengths,tp$values)
+  tp1 <- subset(tp1,tp1[,2]==1)
+  summary(tp1[,1])
+}
+
+# Summarise the pattern of cluster
+summarise.cluster <- function(obj){
+  rle.both <- summarise.rle(obj$both.tail)
+  rle.left <- summarise.rle(obj$left.tail)
+  rle.right <- summarise.rle(obj$right.tail)
+  rbind(both=rle.both,left=rle.left,right=rle.right)
+}
+  
+# Getting location for the length
+exact.pattern.location <- function(us,pt,pt.len){
+  st <- rle(us)
+  len <- st$length
+  loc.cs <- cumsum(st$length)
+  loc <- loc.cs[which(st$values==pt & st$length==pt.len)]-pt.len+1
+  return(loc)
+}
+
+# Identify and mark mixed clusters
+identify.mixedclusters <- function(m,j){
+  m$remove.mixed <- 0
+  rownum <- which(m$pattern==TRUE)
+  for(i in 1:length(rownum)){
+    nextnum <- rownum[i]+j-1
+    twonums <- m$returns[c(rownum[i]:nextnum)] > 0
+    if(sum(twonums)==j || sum(twonums)==0){
+        next
+      }else{
+        m$remove.mixed[c(rownum[i]:nextnum)] <- 5
+      }
+  }
+  m
+}
+
+#--------------------
+# Formatting clusters
+#--------------------
+# This function takes does the following transformation:
+#----------------------------------------------------
+# What the function does?
+# i.   Get extreme events from event.series
+# ii.  Remove all the mixed clusters
+# iii. Get different types cluster
+# iv.  Further club the clusters for event series and
+#      corresponding response series to get
+#      clustered returns
+# v.   Throw the output in timeseries format
+#----------------------------------------------------
+# Input for the function
+#    event.series = Series in levels or returns on events
+#                   is to be defined
+# response.series = Series in levels or returns on which
+#                   response is to be generated
+#      prob.value = Tail value for defining an event
+#    event.value  = What value is to be studied
+#                   returns or levels
+# Similarly for response.value
+#----------------------------------------------------
+# Output = Formatted clusters in time series format
+#----------------------------------------------------
+get.clusters.formatted <- function(event.series,
+                                   response.series,
+                                   probvalue=5,
+                                   event.value="returns",
+                                   response.value="returns"){
+   # Getting levels in event format
+  tmp <- gen.data(event.series,
+                  probvalue=probvalue,
+                  value=event.value)
+  res.ser <- gen.data(response.series,
+                      probvalue=probvalue,
+                      value=response.value)
+  # Storing old data points
+  tmp.old <- tmp
+
+  # Get pattern with maximum length
+  res <- summarise.cluster(tmp)
+  max.len <- max(res[,"Max."])
+
+  #------------------------
+  # Removing mixed clusters
+  #------------------------
+  for(i in max.len:2){
+    which.pattern <- rep(1,i)
+    patrn <- exact.pattern.location(tmp$both.tails,1,i)
+    # If pattern does not exist move to next pattern
+    if(length(patrn)==0){next}
+    tmp$pattern <- FALSE
+    tmp$pattern[patrn] <- TRUE
+    tmp <- identify.mixedclusters(m=tmp,i)
+    me <- length(which(tmp$remove.mixed==5))
+    
+    if(me!=0){
+      tmp <- tmp[-which(tmp$remove.mixed==5),]
+      cat("Pattern of:",i,";",
+          "Discarded event:",me/i,"\n")
+    }
+  }
+  tmp.nc <- tmp
+
+  # Merging event and response series
+  tmp.es <- xts(tmp[,-1],as.Date(tmp$dates))
+  tmp.rs <- xts(res.ser[,-1],as.Date(res.ser$dates))
+  tmp.m <- merge(tmp.es,res.ser=tmp.rs[,c("value","returns")],
+                 all=F)
+  
+  # Formatting 
+  if(event.value=="returns"){
+    which.value <- event.value
+  }else{
+    which.value <- "value"
+  }
+  # Converting to data.frame
+  temp <- as.data.frame(tmp.m)
+  temp$dates <- rownames(temp)
+  n <- temp
+  # Get pattern with maximum length
+  res <- summarise.cluster(temp)
+  max.len <- max(res[,"Max."])
+  cat("Maximum length after removing mixed clusters is",
+      max.len,"\n")
+  # Marking clusters
+  n$cluster.pattern <- n$both.tails
+  for(pt.len in max.len:1){
+    mark <- exact.pattern.location(n$both.tails,1,pt.len)
+    if(length(mark)==0){next}
+    n$cluster.pattern[mark] <- pt.len
+  }
+  
+  #-------------------
+  # Clustering returns
+  #-------------------
+  print("Clustering events.")
+  for(pt.len in max.len:2){
+    rownum <- exact.pattern.location(n$both.tails,1,pt.len)
+    # If pattern does not exist
+    if(length(rownum)==0){
+      cat("Pattern",pt.len,"does not exist.","\n");next
+    }
+    # Clustering
+    while(length(rownum)>0){
+      prevnum <- rownum[1]-1
+      lastnum <- rownum[1]+pt.len-1
+    # Clustering event series
+      if(event.value=="returns"){
+        newreturns <- (n$value[lastnum]-n$value[prevnum])*100/n$value[prevnum]
+        n[rownum[1],c("value","returns")] <-  c(n$value[lastnum],newreturns)
+      }else{
+        newreturns <- sum(n$value[rownum[1]:lastnum],na.rm=T)
+        n[rownum[1],c("value","returns")] <-  c(n$value[lastnum],newreturns)
+      }
+    # Clustering response series
+      if(response.value=="returns"){
+        newreturns.rs <- (n$value.1[lastnum]-n$value.1[prevnum])*100/n$value.1[prevnum]
+        n[rownum[1],c("value.1","returns.1")] <-  c(n$value.1[lastnum],newreturns.rs)
+      }else{
+        newreturns <- sum(n$value.1[rownum[1]:lastnum],na.rm=T)
+        n[rownum[1],c("value.1","returns.1")] <-  c(n$value.1[lastnum],newreturns)
+      }
+      n <- n[-c((rownum[1]+1):lastnum),]
+      rownum <- exact.pattern.location(n$both.tails,1,pt.len)
+    }
+  }
+  # Columns to keep
+  cn <- c(which.value,"left.tail","right.tail",
+          "returns.1","cluster.pattern")
+  tmp.ts <- zoo(n[,cn],order.by=as.Date(n$dates))
+  colnames(tmp.ts) <- c("event.series","left.tail","right.tail",
+                        "response.series","cluster.pattern")
+
+  # Results
+  return(tmp.ts)
+}
+
+##############################
+# Summary statistics functions
+##############################
+#---------------------------------------------
+# Table 1: Summary statistics
+# INPUT: Time series data-set for which
+#        summary statistics is to be estimated
+# OUTPUT: A data frame with:
+# - Values: "Minimum", 5%,"25%","Median",
+#           "Mean","75%","95%","Maximum",
+#           "Standard deviation","IQR",
+#           "Observations"
+#----------------------------------------------
+sumstat <- function(input){
+  no.var <- NCOL(input)
+  if(no.var==1){input <- xts(input)}
+  # Creating empty frame: chassis
+  tmp <- data.frame(matrix(NA,nrow=11,ncol=NCOL(input)))
+  colnames(tmp) <-  "summary" 
+  rownames(tmp) <- c("Min","5%","25%","Median","Mean","75%","95%",
+                         "Max","sd","IQR","Obs.")
+  # Estimating summary statistics
+  tmp[1,] <- apply(input,2,function(x){min(x,na.rm=TRUE)})
+  tmp[2,] <- apply(input,2,function(x){quantile(x,0.05,na.rm=TRUE)})
+  tmp[3,] <- apply(input,2,function(x){quantile(x,0.25,na.rm=TRUE)})
+  tmp[4,] <- apply(input,2,function(x){median(x,na.rm=TRUE)})
+  tmp[5,] <- apply(input,2,function(x){mean(x,na.rm=TRUE)})
+  tmp[6,] <- apply(input,2,function(x){quantile(x,0.75,na.rm=TRUE)})
+  tmp[7,] <- apply(input,2,function(x){quantile(x,0.95,na.rm=TRUE)})
+  tmp[8,] <- apply(input,2,function(x){max(x,na.rm=TRUE)})
+  tmp[9,] <- apply(input,2,function(x){sd(x,na.rm=TRUE)})
+  tmp[10,] <- apply(input,2,function(x){IQR(x,na.rm=TRUE)})
+  tmp[11,] <- apply(input,2,function(x){NROW(x)})
+  tmp <- round(tmp,2)
+
+  return(tmp)
+}
+
+######################
+# Yearly summary stats
+######################
+#----------------------------
+# INPUT:
+# 'input': Data series for which event cluster distribution
+#        is to be calculated;
+# 'prob.value': Probility value for which tail is to be constructed this
+#       value is equivalent to one side tail for eg. if prob.value=5
+#       then we have values of 5% tail on both sides
+# Functions used: yearly.exevent.summary()
+# OUTPUT:
+# Yearly distribution of extreme events
+#----------------------------
+yearly.exevent.dist <- function(input, prob.value){
+  no.var <- NCOL(input)
+  mylist <- list()
+  # Estimating cluster count
+  #--------------------
+  # Formatting clusters
+  #--------------------
+  tmp <- get.clusters.formatted(event.series=input,
+                                response.series=input,
+                                probvalue=prob.value,
+                                event.value="nonreturns",
+                                response.value="nonreturns")
+
+  tmp.res <- yearly.exevent.summary(tmp)
+  tmp.res[is.na(tmp.res)] <- 0
+  # Left and right tail
+  lower.tail.yearly.exevent <- tmp.res[,1:2]
+  upper.tail.yearly.exevent <- tmp.res[,3:4]
+  output <- list()
+  output$lower.tail <- lower.tail.yearly.exevent
+  output$upper.tail <- upper.tail.yearly.exevent
+  mylist <- output
+
+  return(mylist)
+}
+
+#------------------------------------------------
+# Get yearly no. and median for good and bad days
+#------------------------------------------------
+yearly.exevent.summary <- function(tmp){
+  tmp.bad <- tmp[which(tmp[,"left.tail"]==1),]
+  tmp.good <- tmp[which(tmp[,"right.tail"]==1),]
+  # Bad days
+  tmp.bad.y <- apply.yearly(xts(tmp.bad),function(x)nrow(x))
+  tmp.bad.y <- merge(tmp.bad.y,apply.yearly(xts(tmp.bad[,1]),function(x)median(x,na.rm=T)))
+  index(tmp.bad.y) <- as.yearmon(as.Date(substr(index(tmp.bad.y),1,4),"%Y"))
+  # Good days
+  tmp.good.y <- apply.yearly(xts(tmp.good),function(x)nrow(x))
+  tmp.good.y <- merge(tmp.good.y,apply.yearly(xts(tmp.good[,1]),function(x)median(x,na.rm=T)))
+    index(tmp.good.y) <- as.yearmon(as.Date(substr(index(tmp.good.y),1,4),"%Y"))
+  tmp.res <- merge(tmp.bad.y,tmp.good.y)
+  colnames(tmp.res) <- c("number.lowertail","median.lowertail",
+                         "number.uppertail","median.uppertail")
+  output <- as.data.frame(tmp.res)
+  cn <- rownames(output)
+  rownames(output) <- sapply(rownames(output),
+                             function(x)substr(x,nchar(x)-3,nchar(x)))
+  return(output)
+}
+
+#############################
+# Getting event segregation
+# - clustered and unclustered
+#############################
+#----------------------------
+# INPUT:
+# 'input': Data series for which event cluster distribution
+#        is to be calculated;
+# Note: The input series expects the input to be in levels not in returns,
+#       if the some the inputs are already in return formats one has to
+#       use the other variable 'already.return.series'
+# 'already.return.series': column name is to be given which already has
+#       return series in the data-set
+# 'prob.value': Probility value for which tail is to be constructed this
+#       value is equivalent to one side tail for eg. if prob.value=5
+#       then we have values of 5% tail on both sides
+# Functions used: get.event.count()
+# OUTPUT:
+# Distribution of extreme events
+#----------------------------
+
+extreme.events.distribution <- function(input,prob.value){
+  # Creating an empty frame
+  no.var <- NCOL(input)
+  lower.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6))
+  upper.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6))
+  colnames(lower.tail.dist) <- c("Unclustered","Used clusters",
+                                 "Removed clusters","Total clusters",
+                                 "Total","Total used clusters")
+  rownames(lower.tail.dist) <- colnames(input)
+  colnames(upper.tail.dist) <- c("Unclustered","Used clusters",
+                                 "Removed clusters","Total clusters",
+                                 "Total","Total used clusters")
+  rownames(upper.tail.dist) <- colnames(input)
+  # Estimating cluster count
+  #--------------
+  # Cluster count
+  #--------------
+  # Non-returns (if it is already in return format)
+  tmp <- get.event.count(input,probvalue=prob.value,
+                         value="nonreturns")
+  lower.tail.dist  <- tmp[1,]
+  upper.tail.dist  <- tmp[2,]
+
+  #-----------------------------
+  # Naming the tail distribution
+  #-----------------------------
+  mylist <- list(lower.tail.dist,upper.tail.dist)
+  names(mylist) <- c("lower.tail", "upper.tail")
+  return(mylist)
+}
+
+# Functions used in event count calculation
+get.event.count <- function(series,
+                            probvalue=5,
+                            value="returns"){
+  # Extracting dataset
+  tmp.old <- gen.data(series,probvalue,value)
+  tmp <- get.clusters.formatted(event.series=series,
+                                response.series=series,
+                                probvalue,
+                                event.value=value,
+                                response.value=value)
+  
+  cp <- tmp[,"cluster.pattern"]
+  lvl <- as.numeric(levels(as.factor(cp)))
+  lvl.use <- lvl[which(lvl>1)]
+  # Calculating Total events
+  tot.ev.l <- length(which(tmp.old[,"left.tail"]==1))
+  tot.ev.r <- length(which(tmp.old[,"right.tail"]==1))
+  # Calculating Unclustered events
+  un.clstr.l <- length(which(tmp[,"left.tail"]==1 &
+                             tmp[,"cluster.pattern"]==1))
+  un.clstr.r <- length(which(tmp[,"right.tail"]==1 &
+                             tmp[,"cluster.pattern"]==1))
+  # Calculating Used clusters
+  us.cl.l <- us.cl.r <- NULL
+  for(i in 1:length(lvl.use)){
+    tmp1 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] &
+                         tmp[,"left.tail"]==1))*lvl.use[i]
+    tmp2 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] &
+                         tmp[,"right.tail"]==1))*lvl.use[i]
+    us.cl.l <- sum(us.cl.l,tmp1,na.rm=TRUE)
+    us.cl.r <- sum(us.cl.r,tmp2,na.rm=TRUE)
+  }
+
+  # Making a table
+  tb <- data.frame(matrix(NA,2,6))
+  colnames(tb) <- c("unclstr","used.clstr","removed.clstr","tot.clstr","tot","tot.used")
+  rownames(tb) <- c("lower","upper")
+  tb[,"tot"] <- c(tot.ev.l,tot.ev.r)
+  tb[,"unclstr"] <- c(un.clstr.l,un.clstr.r)
+  tb[,"used.clstr"] <- c(us.cl.l,us.cl.r)
+  tb[,"tot.used"] <- tb$unclstr+tb$used.clstr
+  tb[,"tot.clstr"] <- tb$tot-tb$unclstr
+  tb[,"removed.clstr"] <- tb$tot.clstr-tb$used.clstr
+
+  return(tb)
+}
+
+####################################
+# Quantile values for extreme events
+####################################
+#-----------------------------------
+# INPUT:
+# 'input': Data series in time series format
+# Note: The input series expects the input to be in levels not in returns,
+#       if the some the inputs are already in return formats one has to
+#       use the other variable 'already.return.series'
+# 'already.return.series': column name is to be given which already has
+#       return series in the data-set
+# Functions used: get.clusters.formatted()
+# OUTPUT:
+# Lower tail and Upper tail quantile values
+#-----------------------------------
+quantile.extreme.values <- function(input, prob.value){
+  # Creating an empty frame
+  no.var <- NCOL(input)
+  lower.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6))
+  upper.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6))
+  colnames(lower.tail.qnt.value) <- c("Min","25%","Median","75%","Max",
+                                      "Mean")
+  rownames(lower.tail.qnt.value) <- "extreme.events"
+  colnames(upper.tail.qnt.value) <- c("Min","25%","Median","75%","Max",
+                                      "Mean")
+  rownames(upper.tail.qnt.value) <- "extreme.events"
+  # Estimating cluster count
+  #--------------------
+  # Formatting clusters
+  #--------------------
+  tmp <- get.clusters.formatted(event.series=input,
+                                response.series=input,
+                                probvalue=prob.value,
+                                event.value="nonreturns",
+                                response.value="nonreturns")
+
+  # Left tail
+  tmp.left.tail <- tmp[which(tmp$left.tail==1),
+                       "event.series"]
+  df.left <- t(data.frame(quantile(tmp.left.tail,c(0,0.25,0.5,0.75,1))))
+  tmp.left <- round(cbind(df.left,mean(tmp.left.tail)),2)
+  rownames(tmp.left) <- "extreme.events"
+  colnames(tmp.left) <- c("0%","25%","Median","75%","100%","Mean")
+  # Right tail
+  tmp.right.tail <- tmp[which(tmp$right.tail==1),
+                        "event.series"]
+  df.right <- t(data.frame(quantile(tmp.right.tail,c(0,0.25,0.5,0.75,1))))
+  tmp.right <- round(cbind(df.right,
+                           mean(tmp.right.tail)),2)
+  rownames(tmp.right) <- "extreme.events"
+  colnames(tmp.right) <- c("0%","25%","Median","75%","100%","Mean")
+  
+  lower.tail.qnt.value  <- tmp.left 
+  upper.tail.qnt.value  <- tmp.right
+
+  mylist <- list(lower.tail.qnt.value,upper.tail.qnt.value)
+  names(mylist) <- c("lower.tail", "upper.tail")
+  return(mylist)
+}
+
+##########################
+# Run length distribution
+##########################
+#-----------------------------------
+# INPUT:
+# 'input': Data series in time series format
+# Note: The input series expects the input to be in levels not in returns,
+#       if the some the inputs are already in return formats one has to
+#       use the other variable 'already.return.series'
+# 'already.return.series': column name is to be given which already has
+#       return series in the data-set
+# Functions used: get.clusters.formatted()
+#                 get.cluster.distribution()
+#                 numbers2words()
+# OUTPUT:
+# Lower tail and Upper tail Run length distribution
+#-----------------------------------
+runlength.dist <- function(input, prob.value){
+
+   # Creating an empty frame
+  no.var <- NCOL(input)
+  
+  # Finding maximum Run length
+  # Seed value
+  max.runlength <- 0 
+  #---------------------------
+  # Estimating max. Run length
+  #---------------------------
+  tmp <- get.clusters.formatted(event.series=input,
+                                response.series=input,
+                                probvalue=prob.value,
+                                event.value="nonreturns",
+                                response.value="nonreturns")
+
+  tmp.runlength <- get.cluster.distribution(tmp,"event.series")
+  max.runlength <- max(max.runlength,as.numeric(colnames(tmp.runlength)[NCOL(tmp.runlength)]))
+  
+  # Generating empty frame
+  col.names <- seq(2:max.runlength)+1
+  lower.tail.runlength <- data.frame(matrix(NA,nrow=no.var,
+                                            ncol=length(col.names)))
+  upper.tail.runlength <- data.frame(matrix(NA,nrow=no.var,
+                                            ncol=length(col.names)))
+  colnames(lower.tail.runlength) <- col.names
+  rownames(lower.tail.runlength) <- "clustered.events"
+  colnames(upper.tail.runlength) <- col.names
+  rownames(upper.tail.runlength) <- "clustered.events"
+
+  #----------------------
+  # Run length estimation
+  #----------------------
+  tmp.res <- get.cluster.distribution(tmp,"event.series")
+  for(j in 1:length(colnames(tmp.res))){
+    col.number <- colnames(tmp.res)[j]
+    lower.tail.runlength[1,col.number] <- tmp.res[1,col.number]
+    upper.tail.runlength[1,col.number] <- tmp.res[2,col.number]
+  }
+  
+  # Replacing NA's with zeroes
+  lower.tail.runlength[is.na(lower.tail.runlength)] <- 0
+  upper.tail.runlength[is.na(upper.tail.runlength)] <- 0
+
+  # creating column names
+  word.cn <- NULL
+  for(i in 1:length(col.names)){
+    word.cn[i] <- numbers2words(col.names[i])
+  }
+  colnames(lower.tail.runlength) <- word.cn
+  colnames(upper.tail.runlength) <- word.cn
+  mylist <- list(lower.tail.runlength,upper.tail.runlength)
+  names(mylist) <- c("lower.tail", "upper.tail")
+  return(mylist) 
+}
+
+#-------------------------
+# Get cluster distribution
+#-------------------------
+# Input for this function is the output of get.cluster.formatted
+get.cluster.distribution <- function(tmp,variable){
+  # Extract cluster category 
+  cp <- tmp[,"cluster.pattern"]
+  lvl <- as.numeric(levels(as.factor(cp)))
+  lvl.use <- lvl[which(lvl>1)]
+  # Get numbers for each category
+  tb <- data.frame(matrix(NA,2,length(lvl.use)))
+  colnames(tb) <- as.character(lvl.use)
+  rownames(tb) <- c(paste(variable,":lower tail"),
+                    paste(variable,":upper tail"))
+  for(i in 1:length(lvl.use)){
+    tb[1,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i]
+                            & tmp[,"left.tail"]==1))
+    tb[2,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i]
+                            & tmp[,"right.tail"]==1))
+    
+  }
+  return(tb)
+}
+
+#----------------------------
+# Converting numbers to words
+#----------------------------
+numbers2words <- function(x){
+  helper <- function(x){
+    digits <- rev(strsplit(as.character(x), "")[[1]])
+    nDigits <- length(digits)
+    if (nDigits == 1) as.vector(ones[digits])
+    else if (nDigits == 2)
+      if (x <= 19) as.vector(teens[digits[1]])
+      else trim(paste(tens[digits[2]],
+                      Recall(as.numeric(digits[1]))))
+    else if (nDigits == 3) trim(paste(ones[digits[3]], "hundred",
+               Recall(makeNumber(digits[2:1]))))
+    else {
+      nSuffix <- ((nDigits + 2) %/% 3) - 1
+      if (nSuffix > length(suffixes)) stop(paste(x, "is too large!"))
+      trim(paste(Recall(makeNumber(digits[
+                                          nDigits:(3*nSuffix + 1)])),
+                 suffixes[nSuffix],
+                 Recall(makeNumber(digits[(3*nSuffix):1]))))
+    }
+  }
+  trim <- function(text){
+    gsub("^\ ", "", gsub("\ *$", "", text))
+  }
+  makeNumber <- function(...) as.numeric(paste(..., collapse=""))
+  opts <- options(scipen=100)
+  on.exit(options(opts))
+  ones <- c("", "one", "two", "three", "four", "five", "six", "seven",
+            "eight", "nine")
+  names(ones) <- 0:9
+  teens <- c("ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
+             "sixteen", " seventeen", "eighteen", "nineteen")
+  names(teens) <- 0:9
+  tens <- c("twenty", "thirty", "forty", "fifty", "sixty", "seventy",
+            "eighty",
+            "ninety")
+  names(tens) <- 2:9
+  x <- round(x)
+  suffixes <- c("thousand", "million", "billion", "trillion")
+  if (length(x) > 1) return(sapply(x, helper))
+  helper(x)
+}
+
+##########################
+# Extreme event study plot
+##########################
+# This function generates event study plot for clustered and un-clustered data
+#-------------------------
+# Input for the function
+#                  z   = Data object with both the series response.series and event.series
+# response.series.name = Column name of the series for which response is observed
+#    event.series.name = Column name of the series on which event is observed
+#          titlestring = Title string for the event study plot
+#                 ylab = Y - axis label
+#                width = width of event window for event study plot
+#           prob.value = Probability value for which extreme events is determined
+#-------------------------
+eesPlot <- function(z, response.series.name,
+                    event.series.name,
+                    titlestring, ylab, width=5,
+                    prob.value=5){
+  #-----------------
+  # Get event dates
+  #-----------------
+  # Get both clustered and unclustered dates
+  e.s <- z[,event.series.name]
+  r.s <- z[,response.series.name]
+  data.use <- get.clusters.formatted(event.series=e.s,
+                                      response.series=r.s,
+                                      probvalue=prob.value,
+                                      event.value="nonreturns",
+                                      response.value="nonreturns",
+                                      result="series")
+  # Get only unclustered data
+  data.frmt <- data.use[which(data.use$cluster.pattern==1),]
+  data.frmt2 <- data.use[which(data.use$cluster.pattern!=0),]
+
+  # get dates for bigdays and baddays
+  baddays.normal <- index(data.frmt[which(data.frmt[,"left.tail"]==1)])
+  bigdays.normal <- index(data.frmt[which(data.frmt[,"right.tail"]==1)])
+  baddays.purged <- index(data.frmt2[which(data.frmt2[,"left.tail"]==1)])
+  bigdays.purged <- index(data.frmt2[which(data.frmt2[,"right.tail"]==1)])
+
+  d.good.normal <- bigdays.normal
+  d.bad.normal <- baddays.normal
+  d.good.purged <- bigdays.purged
+  d.bad.purged <- baddays.purged
+  
+  # ES for normal returns
+  es.good.normal <- corecomp(data.use,d.good.normal,
+                             "response.series",width)
+  es.bad.normal <- corecomp(data.use,d.bad.normal,
+                            "response.series",width)
+  
+  # ES for purged returns
+  es.good.purged <- corecomp(data.use,d.good.purged,
+                             "response.series",width)
+  es.bad.purged <- corecomp(data.use,d.bad.purged,
+                            "response.series",width)
+  
+  big.normal <- max(abs(cbind(es.good.normal,es.bad.normal)))
+  big.purged <- max(abs(cbind(es.good.purged,es.bad.purged)))
+  big <- max(big.normal,big.purged)
+  hilo1 <- c(-big,big)
+  
+  #---------------
+  # Plotting graph
+  plot.es.graph.both(es.good.normal,es.bad.normal,
+                     es.good.purged,es.bad.purged,
+                     width,titlestring,ylab)
+}
+#--------------------------
+# Eventstudy analysis
+# -using eventstudy package
+#--------------------------
+corecomp <- function(z,dlist,seriesname,width) {
+  events <- data.frame(unit=rep(seriesname, length(dlist)), when=dlist)
+  es.results <- phys2eventtime(z, events, width=0)
+  es.w <- window(es.results$z.e, start=-width, end=+width)
+  # Replaing NA's with zeroes
+  es.w[is.na(es.w)] <- 0
+  es.w <- remap.cumsum(es.w, is.pc=FALSE, base=0)
+  inference.Ecar(es.w)
+}
+ 
+#----------------------------------
+# Plotting graph in es.error.metric
+#----------------------------------
+plot.es.graph.both <- function(es.good.normal,es.bad.normal,
+                               es.good.purged,es.bad.purged,
+                               width,titlestring,ylab){
+  big.normal <- max(abs(cbind(es.good.normal,es.bad.normal)))
+  big.purged <- max(abs(cbind(es.good.purged,es.bad.purged)))
+  big <- max(big.normal,big.purged)
+  hilo1 <- c(-big,big)
+
+  # Plotting graph
+  par(mfrow=c(1,2))
+  
+  # Plot very good days
+  plot(-width:width, es.good.normal[,2], type="l", lwd=2, ylim=hilo1, col="red",
+       xlab="Event time (days)", ylab=ylab,
+       main=paste("Very good", " (by ", titlestring, ")", sep=""))
+  lines(-width:width, es.good.purged[,2], lwd=2, lty=1,type="l", col="orange")
+  points(-width:width, es.good.normal[,2], pch=19,col="red")
+  points(-width:width, es.good.purged[,2], pch=25,col="orange")
+  lines(-width:width, es.good.normal[,1], lwd=0.8, lty=2, col="red")
+  lines(-width:width, es.good.normal[,3], lwd=0.8, lty=2, col="red")
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/eventstudies -r 60


More information about the Eventstudies-commits mailing list