[Eventstudies-commits] r325 - in pkg: . R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Wed May 14 12:29:02 CEST 2014


Author: vikram
Date: 2014-05-14 12:29:01 +0200 (Wed, 14 May 2014)
New Revision: 325

Added:
   pkg/R/eesInference.R
   pkg/man/eesDates.Rd
   pkg/man/eesInference.Rd
   pkg/man/eesSummary.Rd
   pkg/man/get.clusters.formatted.Rd
Removed:
   pkg/R/ees.R
Modified:
   pkg/NAMESPACE
   pkg/man/ees.Rd
Log:
Added new ees functions to NAMESPACE; added documentation for the same

Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE	2014-05-12 14:53:23 UTC (rev 324)
+++ pkg/NAMESPACE	2014-05-14 10:29:01 UTC (rev 325)
@@ -1,6 +1,6 @@
 export(eventstudy, inference.bootstrap, inference.wilcox, phys2eventtime,
        remap.cumsum, remap.cumprod, remap.event.reindex, eesSummary, eesDates, 
-	eesInference)
+	eesInference, get.clusters.formatted)
 
 export(marketResidual,
        excessReturn)

Deleted: pkg/R/ees.R
===================================================================
--- pkg/R/ees.R	2014-05-12 14:53:23 UTC (rev 324)
+++ pkg/R/ees.R	2014-05-14 10:29:01 UTC (rev 325)
@@ -1,825 +0,0 @@
-
-# Total 16 functions
-############################
-# Identifying extreme events
-############################
-
-#----------------------------------------------------------------
-# INPUT:
-# 'input'     : Output of get.clusters.formatted
-# 'prob.value': This is the tail value for which event is
-#               to be defined. For eg: prob.value=5 will
-#               consider 5% tail on both sides
-#-----------------------------------------------------------------
-# OUTPUT:
-# Result will be in a list of 3 with following tables:
-# 1. Summary statistics
-#    a. Summary of whole data-set
-# 2. Lower tail: Extreme event tables
-#    a. Distribution of extreme events
-#    b. Run length distribution
-#    c. Quantile values
-#    d. Yearly distribution
-#    e. Extreme event data
-#     - Clustered, Un-clustered and Both
-# 3. Upper tail: Extreme event tables
-#    a. Distribution of extreme events
-#    b. Run length distribution
-#    c. Quantile values
-#    d. Yearly distribution
-#    e. Extreme event data
-#     - Clustered, Un-clustered and Both
-#------------------------------------------------------------------
-# NOTE:
-eesSummary <- function(x, ...){
-  no.var <- NCOL(input)
-
-  #---------------------
-  # Extreme event output
-  #---------------------
-  # Summary statistics
-  summ.st <- attr(input,"sumstat")
-  colnames(summ.st) <- NULL
-  summ.st <- t(summ.st)
-
-  # Distribtution of events
-  event.dist <- attr(input,"extreme.events.distribution")
-
-  # Run length distribution
-  runlength <- runlength.dist(input)
-
-  # Quantile extreme values 
-  qnt.values <- quantile.extreme.values(input)
-  
-  # Yearly distribution of extreme event dates
-  yearly.exevent <- yearly.exevent.dist(input)
-
-  #---------------------
-  # Compiling the output
-  #---------------------
-  output <- lower.tail <- upper.tail <- list()
-  # Compiling lower tail and upper tail separately
-  # Lower tail
-  lower.tail$extreme.event.distribution <- event.dist$lower.tail
-  lower.tail$runlength <- runlength$lower.tail
-  lower.tail$quantile.values <- qnt.values$lower.tail
-  lower.tail$yearly.extreme.event <- round(t(yearly.exevent$lower.tail),2)
-  # Upper tail
-  upper.tail$extreme.event.distribution <- event.dist$upper.tail
-  upper.tail$runlength <- runlength$upper.tail
-  upper.tail$quantile.values <- qnt.values$upper.tail
-  upper.tail$yearly.extreme.event <- round(t(yearly.exevent$upper.tail),2)
-  # Output
-  output$data.summary <- summ.st
-  output$lower.tail <- lower.tail
-  output$upper.tail <- upper.tail
-  return(output)
-}
-
-########################################
-# Functions used for formatting clusters
-########################################
-#------------------------
-# Categorzing tail events
-# for ES analysis
-#------------------------
-# Generates returns for the series
-# Mark left tail, right tail events
-gen.data <- function(d,probvalue,value="nonreturns"){
-  res <- data.frame(dates=index(d),value=coredata(d))
-  if(value=="returns"){
-    res$returns <- c(NA,coredata(diff(log(d))*100))
-  }else{
-    res$returns <- d
-  }
-  pval <- c(probvalue/100,(1-(probvalue/100)))
-  pval <- quantile(res$returns,prob=pval,na.rm=TRUE)
-  res$left.tail <- as.numeric(res$returns < pval[1])
-  res$right.tail <- as.numeric(res$returns > pval[2])
-  res$both.tails <- res$left.tail + res$right.tail
-  res <- res[complete.cases(res),]
-  if(value=="returns"){
-    return(res[-1,])
-  }else{
-    return(res)
-  }
-}
-
-
-#-------------------
-# Summarise patterns
-summarise.rle <- function(oneseries){
-  tp <- rle(oneseries)
-  tp1 <- data.frame(tp$lengths,tp$values)
-  tp1 <- subset(tp1,tp1[,2]==1)
-  summary(tp1[,1])
-}
-
-# Summarise the pattern of cluster
-summarise.cluster <- function(obj){
-  rle.both <- summarise.rle(obj$both.tail)
-  rle.left <- summarise.rle(obj$left.tail)
-  rle.right <- summarise.rle(obj$right.tail)
-  rbind(both=rle.both,left=rle.left,right=rle.right)
-}
-  
-# Getting location for the length
-exact.pattern.location <- function(us,pt,pt.len){
-  st <- rle(us)
-  len <- st$length
-  loc.cs <- cumsum(st$length)
-  loc <- loc.cs[which(st$values==pt & st$length==pt.len)]-pt.len+1
-  return(loc)
-}
-
-# Identify and mark mixed clusters
-identify.mixedclusters <- function(m,j){
-  m$remove.mixed <- 0
-  rownum <- which(m$pattern==TRUE)
-  for(i in 1:length(rownum)){
-    nextnum <- rownum[i]+j-1
-    twonums <- m$returns[c(rownum[i]:nextnum)] > 0
-    if(sum(twonums)==j || sum(twonums)==0){
-        next
-      }else{
-        m$remove.mixed[c(rownum[i]:nextnum)] <- 5
-      }
-  }
-  m
-}
-
-#--------------------
-# Formatting clusters
-#--------------------
-# This function takes does the following transformation:
-#----------------------------------------------------
-# What the function does?
-# i.   Get extreme events from event.series
-# ii.  Remove all the mixed clusters
-# iii. Get different types cluster
-# iv.  Further club the clusters for event series and
-#      corresponding response series to get
-#      clustered returns
-# v.   Throw the output in timeseries format
-#----------------------------------------------------
-# Input for the function
-#    event.series = Series in levels or returns on events
-#                   is to be defined
-# response.series = Series in levels or returns on which
-#                   response is to be generated
-#      prob.value = Tail value for defining an event
-#    event.value  = What value is to be studied
-#                   returns or levels
-# Similarly for response.value
-#----------------------------------------------------
-# Output = Formatted clusters in time series format
-#----------------------------------------------------
-get.clusters.formatted <- function(event.series,
-                                   response.series,
-                                   probvalue=5,
-                                   event.value="nonreturns",
-                                   response.value="nonreturns"){
-   # Getting levels in event format
-  tmp <- gen.data(event.series,
-                  probvalue=probvalue,
-                  value=event.value)
-  res.ser <- gen.data(response.series,
-                      probvalue=probvalue,
-                      value=response.value)
-  # Storing old data points
-  tmp.old <- tmp
-
-  # Get pattern with maximum length
-  res <- summarise.cluster(tmp)
-  max.len <- max(res[,"Max."])
-
-  #------------------------
-  # Removing mixed clusters
-  #------------------------
-  for(i in max.len:2){
-    which.pattern <- rep(1,i)
-    patrn <- exact.pattern.location(tmp$both.tails,1,i)
-    # If pattern does not exist move to next pattern
-    if(length(patrn)==0){next}
-    tmp$pattern <- FALSE
-    tmp$pattern[patrn] <- TRUE
-    tmp <- identify.mixedclusters(m=tmp,i)
-    me <- length(which(tmp$remove.mixed==5))
-    
-    if(me!=0){
-      tmp <- tmp[-which(tmp$remove.mixed==5),]
-      cat("Pattern of:",i,";",
-          "Discarded event:",me/i,"\n")
-    }
-  }
-  tmp.nc <- tmp
-
-  # Merging event and response series
-  tmp.es <- xts(tmp[,-1],as.Date(tmp$dates))
-  tmp.rs <- xts(res.ser[,-1],as.Date(res.ser$dates))
-  tmp.m <- merge(tmp.es,res.ser=tmp.rs[,c("value","returns")],
-                 all=F)
-  
-  # Formatting 
-  if(event.value=="returns"){
-    which.value <- event.value
-  }else{
-    which.value <- "value"
-  }
-  # Converting to data.frame
-  temp <- as.data.frame(tmp.m)
-  temp$dates <- rownames(temp)
-  n <- temp
-  # Get pattern with maximum length
-  res <- summarise.cluster(temp)
-  max.len <- max(res[,"Max."])
-  cat("Maximum length after removing mixed clusters is",
-      max.len,"\n")
-  # Marking clusters
-  n$cluster.pattern <- n$both.tails
-  for(pt.len in max.len:1){
-    mark <- exact.pattern.location(n$both.tails,1,pt.len)
-    if(length(mark)==0){next}
-    n$cluster.pattern[mark] <- pt.len
-  }
-  
-  #-------------------
-  # Clustering returns
-  #-------------------
-  print("Clustering events.")
-  for(pt.len in max.len:2){
-    rownum <- exact.pattern.location(n$both.tails,1,pt.len)
-    # If pattern does not exist
-    if(length(rownum)==0){
-      cat("Pattern",pt.len,"does not exist.","\n");next
-    }
-    # Clustering
-    while(length(rownum)>0){
-      prevnum <- rownum[1]-1
-      lastnum <- rownum[1]+pt.len-1
-    # Clustering event series
-      if(event.value=="returns"){
-        newreturns <- (n$value[lastnum]-n$value[prevnum])*100/n$value[prevnum]
-        n[rownum[1],c("value","returns")] <-  c(n$value[lastnum],newreturns)
-      }else{
-        newreturns <- sum(n$value[rownum[1]:lastnum],na.rm=T)
-        n[rownum[1],c("value","returns")] <-  c(n$value[lastnum],newreturns)
-      }
-    # Clustering response series
-      if(response.value=="returns"){
-        newreturns.rs <- (n$value.1[lastnum]-n$value.1[prevnum])*100/n$value.1[prevnum]
-        n[rownum[1],c("value.1","returns.1")] <-  c(n$value.1[lastnum],newreturns.rs)
-      }else{
-        newreturns <- sum(n$value.1[rownum[1]:lastnum],na.rm=T)
-        n[rownum[1],c("value.1","returns.1")] <-  c(n$value.1[lastnum],newreturns)
-      }
-      n <- n[-c((rownum[1]+1):lastnum),]
-      rownum <- exact.pattern.location(n$both.tails,1,pt.len)
-    }
-  }
-  # Columns to keep
-  cn <- c(which.value,"left.tail","right.tail",
-          "returns.1","cluster.pattern")
-  tmp.ts <- zoo(n[,cn],order.by=as.Date(n$dates))
-  colnames(tmp.ts) <- c("event.series","left.tail","right.tail",
-                        "response.series","cluster.pattern")
-
-  # Results
-  attr(tmp.ts, which = "sumstat") <- sumstat(input = event.series)
-  attr(tmp.ts, which = "extreme.events.distribution") <- extreme.events.distribution(input = event.series, gcf.output = tmp.ts, prob.value = probvalue)
-  attr(tmp.ts, which = "probvalue") <- probvalue
-  class(tmp.ts) <- c("ees","zoo")
-  return(tmp.ts)
-}
-
-##############################
-# Summary statistics functions
-##############################
-#---------------------------------------------
-# Table 1: Summary statistics
-# INPUT: Time series data-set for which
-#        summary statistics is to be estimated
-# OUTPUT: A data frame with:
-# - Values: "Minimum", 5%,"25%","Median",
-#           "Mean","75%","95%","Maximum",
-#           "Standard deviation","IQR",
-#           "Observations"
-#----------------------------------------------
-sumstat <- function(input){
-  no.var <- NCOL(input)
-  if(no.var==1){input <- xts(input)}
-  # Creating empty frame: chassis
-  tmp <- data.frame(matrix(NA,nrow=11,ncol=NCOL(input)))
-  colnames(tmp) <-  "summary" 
-  rownames(tmp) <- c("Min","5%","25%","Median","Mean","75%","95%",
-                         "Max","sd","IQR","Obs.")
-  # Estimating summary statistics
-  tmp[1,] <- apply(input,2,function(x){min(x,na.rm=TRUE)})
-  tmp[2,] <- apply(input,2,function(x){quantile(x,0.05,na.rm=TRUE)})
-  tmp[3,] <- apply(input,2,function(x){quantile(x,0.25,na.rm=TRUE)})
-  tmp[4,] <- apply(input,2,function(x){median(x,na.rm=TRUE)})
-  tmp[5,] <- apply(input,2,function(x){mean(x,na.rm=TRUE)})
-  tmp[6,] <- apply(input,2,function(x){quantile(x,0.75,na.rm=TRUE)})
-  tmp[7,] <- apply(input,2,function(x){quantile(x,0.95,na.rm=TRUE)})
-  tmp[8,] <- apply(input,2,function(x){max(x,na.rm=TRUE)})
-  tmp[9,] <- apply(input,2,function(x){sd(x,na.rm=TRUE)})
-  tmp[10,] <- apply(input,2,function(x){IQR(x,na.rm=TRUE)})
-  tmp[11,] <- apply(input,2,function(x){NROW(x)})
-  tmp <- round(tmp,2)
-
-  return(tmp)
-}
-
-#############################
-# Getting event segregation
-# - clustered and unclustered
-#############################
-#----------------------------
-# INPUT:
-# 'input': Data series for which event cluster distribution
-#        is to be calculated;
-# Note: The input series expects the input to be in levels not in returns,
-#       if the some the inputs are already in return formats one has to
-#       use the other variable 'already.return.series'
-# 'already.return.series': column name is to be given which already has
-#       return series in the data-set
-# 'prob.value': Probility value for which tail is to be constructed this
-#       value is equivalent to one side tail for eg. if prob.value=5
-#       then we have values of 5% tail on both sides
-# Functions used: get.event.count()
-# OUTPUT:
-# Distribution of extreme events
-#----------------------------
-
-extreme.events.distribution <- function(input, gcf.output, prob.value){
-  # Creating an empty frame
-  no.var <- NCOL(input)
-  lower.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6))
-  upper.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6))
-  colnames(lower.tail.dist) <- c("Unclustered","Used clusters",
-                                 "Removed clusters","Total clusters",
-                                 "Total","Total used clusters")
-  rownames(lower.tail.dist) <- colnames(input)
-  colnames(upper.tail.dist) <- c("Unclustered","Used clusters",
-                                 "Removed clusters","Total clusters",
-                                 "Total","Total used clusters")
-  rownames(upper.tail.dist) <- colnames(input)
-  # Estimating cluster count
-  #--------------
-  # Cluster count
-  #--------------
-  # Non-returns (if it is already in return format)
-  tmp <- get.event.count(input, gcf.output, probvalue=prob.value,
-                         value="nonreturns")
-  lower.tail.dist  <- tmp[1,]
-  upper.tail.dist  <- tmp[2,]
-
-  #-----------------------------
-  # Naming the tail distribution
-  #-----------------------------
-  mylist <- list(lower.tail.dist,upper.tail.dist)
-  names(mylist) <- c("lower.tail", "upper.tail")
-  return(mylist)
-}
-
-# Functions used in event count calculation
-get.event.count <- function(series,
-                            probvalue=5,
-                            gcf.output,
-                            value="returns"){
-  # Extracting dataset
-  tmp.old <- gen.data(series,probvalue,value)
-  cp <- gcf.output[,"cluster.pattern"]
-  lvl <- as.numeric(levels(as.factor(cp)))
-  lvl.use <- lvl[which(lvl>1)]
-  # Calculating Total events
-  tot.ev.l <- length(which(tmp.old[,"left.tail"]==1))
-  tot.ev.r <- length(which(tmp.old[,"right.tail"]==1))
-  # Calculating Unclustered events
-  un.clstr.l <- length(which(gcf.output[,"left.tail"]==1 &
-                             gcf.output[,"cluster.pattern"]==1))
-  un.clstr.r <- length(which(gcf.output[,"right.tail"]==1 &
-                             gcf.output[,"cluster.pattern"]==1))
-  # Calculating Used clusters
-  us.cl.l <- us.cl.r <- NULL
-  for(i in 1:length(lvl.use)){
-    tmp1 <- length(which(gcf.output[,"cluster.pattern"]==lvl.use[i] &
-                         gcf.output[,"left.tail"]==1))*lvl.use[i]
-    tmp2 <- length(which(gcf.output[,"cluster.pattern"]==lvl.use[i] &
-                         gcf.output[,"right.tail"]==1))*lvl.use[i]
-    us.cl.l <- sum(us.cl.l,tmp1,na.rm=TRUE)
-    us.cl.r <- sum(us.cl.r,tmp2,na.rm=TRUE)
-  }
-
-  # Making a table
-  tb <- data.frame(matrix(NA,2,6))
-  colnames(tb) <- c("unclustered.events","used.clustered.events","removed.clustered.events","total.clustered.events","total.events","total.used.events")
-  rownames(tb) <- c("lower","upper")
-  tb[,"total.events"] <- c(tot.ev.l,tot.ev.r)
-  tb[,"unclustered.events"] <- c(un.clstr.l,un.clstr.r)
-  tb[,"used.clustered.events"] <- c(us.cl.l,us.cl.r)
-  tb[,"total.used.events"] <- tb$unclustered.events+tb$used.clustered.events
-  tb[,"total.clustered.events"] <- tb$total.events-tb$unclustered.events
-  tb[,"removed.clustered.events"] <- tb$total.clustered.events-tb$used.clustered.events
-
-  return(tb)
-}
-
-######################
-# Yearly summary stats
-######################
-#----------------------------
-# INPUT:
-# 'input': Output from get.clusters.formatted function
-# 'prob.value': Probility value for which tail is to be constructed this
-#       value is equivalent to one side tail for eg. if prob.value=5
-#       then we have values of 5% tail on both sides
-# Functions used: yearly.exevent.summary()
-# OUTPUT:
-# Yearly distribution of extreme events
-#----------------------------
-yearly.exevent.dist <- function(input){
-  mylist <- list()
-  ## Estimating cluster count  
-  tmp.res <- yearly.exevent.summary(input)
-  tmp.res[is.na(tmp.res)] <- 0
-  ## Left and right tail
-  lower.tail.yearly.exevent <- tmp.res[,1:2]
-  upper.tail.yearly.exevent <- tmp.res[,3:4]
-  output <- list()
-  output$lower.tail <- lower.tail.yearly.exevent
-  output$upper.tail <- upper.tail.yearly.exevent
-  mylist <- output
-  return(mylist)
-}
-
-#------------------------------------------------
-# Get yearly no. and median for good and bad days
-#------------------------------------------------
-yearly.exevent.summary <- function(tmp){
-  tmp.bad <- tmp[which(tmp[,"left.tail"]==1),]
-  tmp.good <- tmp[which(tmp[,"right.tail"]==1),]
-  # Bad days
-  tmp.bad.y <- apply.yearly(xts(tmp.bad),function(x)nrow(x))
-  tmp.bad.y <- merge(tmp.bad.y,apply.yearly(xts(tmp.bad[,1]),function(x)median(x,na.rm=T)))
-  index(tmp.bad.y) <- as.yearmon(as.Date(substr(index(tmp.bad.y),1,4),"%Y"))
-  # Good days
-  tmp.good.y <- apply.yearly(xts(tmp.good),function(x)nrow(x))
-  tmp.good.y <- merge(tmp.good.y,apply.yearly(xts(tmp.good[,1]),function(x)median(x,na.rm=T)))
-    index(tmp.good.y) <- as.yearmon(as.Date(substr(index(tmp.good.y),1,4),"%Y"))
-  tmp.res <- merge(tmp.bad.y,tmp.good.y)
-  colnames(tmp.res) <- c("total.events.l","median.value.l",
-                         "total.events.u","median.value.u")
-  output <- as.data.frame(tmp.res)
-  cn <- rownames(output)
-  rownames(output) <- sapply(rownames(output),
-                             function(x)substr(x,nchar(x)-3,nchar(x)))
-  return(output)
-}
-
-####################################
-# Quantile values for extreme events
-####################################
-#-----------------------------------
-# INPUT:
-# 'input': Output of get.clusters.formatted
-# Note: The input series expects the input to be in levels not in returns,
-#       if the some the inputs are already in return formats one has to
-#       use the other variable 'already.return.series'
-# 'already.return.series': column name is to be given which already has
-#       return series in the data-set
-# Functions used: get.clusters.formatted()
-# OUTPUT:
-# Lower tail and Upper tail quantile values
-#-----------------------------------
-quantile.extreme.values <- function(input){
-  # Creating an empty frame
-  lower.tail.qnt.value <- data.frame(matrix(NA,nrow=1,ncol=6))
-  upper.tail.qnt.value <- data.frame(matrix(NA,nrow=1,ncol=6))
-  colnames(lower.tail.qnt.value) <- c("Min","25%","Median","75%","Max",
-                                      "Mean")
-  rownames(lower.tail.qnt.value) <- "extreme.events"
-  colnames(upper.tail.qnt.value) <- c("Min","25%","Median","75%","Max",
-                                      "Mean")
-  rownames(upper.tail.qnt.value) <- "extreme.events"
-  # Estimating cluster count
-  # Left tail
-  tmp.left.tail <- input[which(input$left.tail==1),
-                       "event.series"]
-  df.left <- t(data.frame(quantile(tmp.left.tail,c(0,0.25,0.5,0.75,1))))
-  tmp.left <- round(cbind(df.left,mean(tmp.left.tail)),2)
-  rownames(tmp.left) <- "extreme.events"
-  colnames(tmp.left) <- c("0%","25%","Median","75%","100%","Mean")
-  # Right tail
-  tmp.right.tail <- input[which(input$right.tail==1),
-                        "event.series"]
-  df.right <- t(data.frame(quantile(tmp.right.tail,c(0,0.25,0.5,0.75,1))))
-  tmp.right <- round(cbind(df.right,
-                           mean(tmp.right.tail)),2)
-  rownames(tmp.right) <- "extreme.events"
-  colnames(tmp.right) <- c("0%","25%","Median","75%","100%","Mean")
-  
-  lower.tail.qnt.value  <- tmp.left 
-  upper.tail.qnt.value  <- tmp.right
-
-  mylist <- list(lower.tail.qnt.value,upper.tail.qnt.value)
-  names(mylist) <- c("lower.tail", "upper.tail")
-  return(mylist)
-}
-
-##########################
-# Run length distribution
-##########################
-#-----------------------------------
-# INPUT:
-# 'input': Data series in time series format
-# Note: The input series expects the input to be in levels not in returns,
-#       if the some the inputs are already in return formats one has to
-#       use the other variable 'already.return.series'
-# 'already.return.series': column name is to be given which already has
-#       return series in the data-set
-# Functions used: get.clusters.formatted()
-#                 get.cluster.distribution()
-#                 numbers2words()
-# OUTPUT:
-# Lower tail and Upper tail Run length distribution
-#-----------------------------------
-runlength.dist <- function(input){
-
-  # Finding maximum Run length
-  # Seed value
-  max.runlength <- 0 
-  #---------------------------
-  # Estimating max. Run length
-  #---------------------------
-  tmp.runlength <- get.cluster.distribution(input,"event.series")
-  max.runlength <- max(max.runlength,as.numeric(colnames(tmp.runlength)[NCOL(tmp.runlength)]))
-  
-  # Generating empty frame
-  col.names <- seq(2:max.runlength)+1
-  lower.tail.runlength <- data.frame(matrix(NA,nrow=1,
-                                            ncol=length(col.names)))
-  upper.tail.runlength <- data.frame(matrix(NA,nrow=1,
-                                            ncol=length(col.names)))
-  colnames(lower.tail.runlength) <- col.names
-  rownames(lower.tail.runlength) <- "clustered.events"
-  colnames(upper.tail.runlength) <- col.names
-  rownames(upper.tail.runlength) <- "clustered.events"
-
-  #----------------------
-  # Run length estimation
-  #----------------------
-  tmp.res <- get.cluster.distribution(input,"event.series")
-  for(j in 1:length(colnames(tmp.res))){
-    col.number <- colnames(tmp.res)[j]
-    lower.tail.runlength[1,col.number] <- tmp.res[1,col.number]
-    upper.tail.runlength[1,col.number] <- tmp.res[2,col.number]
-  }
-  
-  # Replacing NA's with zeroes
-  lower.tail.runlength[is.na(lower.tail.runlength)] <- 0
-  upper.tail.runlength[is.na(upper.tail.runlength)] <- 0
-
-  # creating column names
-  word.cn <- NULL
-  for(i in 1:length(col.names)){
-    word.cn[i] <- numbers2words(col.names[i])
-  }
-  colnames(lower.tail.runlength) <- word.cn
-  colnames(upper.tail.runlength) <- word.cn
-  mylist <- list(lower.tail.runlength,upper.tail.runlength)
-  names(mylist) <- c("lower.tail", "upper.tail")
-  return(mylist) 
-}
-
-#-------------------------
-# Get cluster distribution
-#-------------------------
-# Input for this function is the output of get.cluster.formatted
-get.cluster.distribution <- function(tmp,variable){
-  # Extract cluster category 
-  cp <- tmp[,"cluster.pattern"]
-  lvl <- as.numeric(levels(as.factor(cp)))
-  lvl.use <- lvl[which(lvl>1)]
-  # Get numbers for each category
-  tb <- data.frame(matrix(NA,2,length(lvl.use)))
-  colnames(tb) <- as.character(lvl.use)
-  rownames(tb) <- c(paste(variable,":lower tail"),
-                    paste(variable,":upper tail"))
-  for(i in 1:length(lvl.use)){
-    tb[1,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i]
-                            & tmp[,"left.tail"]==1))
-    tb[2,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i]
-                            & tmp[,"right.tail"]==1))
-    
-  }
-  return(tb)
-}
-
-#----------------------------
-# Converting numbers to words
-#----------------------------
-numbers2words <- function(x){
-  helper <- function(x){
-    digits <- rev(strsplit(as.character(x), "")[[1]])
-    nDigits <- length(digits)
-    if (nDigits == 1) as.vector(ones[digits])
-    else if (nDigits == 2)
-      if (x <= 19) as.vector(teens[digits[1]])
-      else trim(paste(tens[digits[2]],
-                      Recall(as.numeric(digits[1]))))
-    else if (nDigits == 3) trim(paste(ones[digits[3]], "hundred",
-               Recall(makeNumber(digits[2:1]))))
-    else {
-      nSuffix <- ((nDigits + 2) %/% 3) - 1
-      if (nSuffix > length(suffixes)) stop(paste(x, "is too large!"))
-      trim(paste(Recall(makeNumber(digits[
-                                          nDigits:(3*nSuffix + 1)])),
-                 suffixes[nSuffix],
-                 Recall(makeNumber(digits[(3*nSuffix):1]))))
-    }
-  }
-  trim <- function(text){
-    gsub("^\ ", "", gsub("\ *$", "", text))
-  }
-  makeNumber <- function(...) as.numeric(paste(..., collapse=""))
-  opts <- options(scipen=100)
-  on.exit(options(opts))
-  ones <- c("", "one", "two", "three", "four", "five", "six", "seven",
-            "eight", "nine")
-  names(ones) <- 0:9
-  teens <- c("ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
-             "sixteen", " seventeen", "eighteen", "nineteen")
-  names(teens) <- 0:9
-  tens <- c("twenty", "thirty", "forty", "fifty", "sixty", "seventy",
-            "eighty",
-            "ninety")
-  names(tens) <- 2:9
-  x <- round(x)
-  suffixes <- c("thousand", "million", "billion", "trillion")
-  if (length(x) > 1) return(sapply(x, helper))
-  helper(x)
-}
-
-##########################
-# Extreme event study plot
-##########################
-# This function generates event study plot for clustered and un-clustered data
-#-------------------------
-# Input for the function
-#                  z   = Data object with both the series response.series and event.series
-# response.series.name = Column name of the series for which response is observed
-#    event.series.name = Column name of the series on which event is observed
-#          titlestring = Title string for the event study plot
-#                 ylab = Y - axis label
-#                width = width of event window for event study plot
-#           prob.value = Probability value for which extreme events is determined
-#-------------------------
-
-######################
-## Extreme event dates
-######################
-## Input:  get.clusters.formatted (GCF) output
-## Output: Extreme Event dates for normal and purged data
-eesDates <- function(input){
-  ##-----------------
-  ## Get event dates
-  ##-----------------
-  ## Get only unclustered data
-  data.only.cluster <- input[which(input$cluster.pattern==1),]
-  data.no.cluster <- input[which(input$cluster.pattern!=0),]
-
-  ## get dates for bigdays and baddays
-  days.bad.normal <- index(data.only.cluster[which(data.only.cluster[,"left.tail"]==1)])
-  days.good.normal <- index(data.only.cluster[which(data.only.cluster[,"right.tail"]==1)])
-  days.bad.purged <- index(data.no.cluster[which(data.no.cluster[,"left.tail"]==1)])
-  days.good.purged <- index(data.no.cluster[which(data.no.cluster[,"right.tail"]==1)])
-  ## Event list
-  events.good.normal <- data.frame(outcome.unit=rep("response.series",
-                                     length(days.good.normal)),
-                                   event.when=days.good.normal)
-  events.bad.normal <- data.frame(outcome.unit=rep("response.series",
-                                     length(days.bad.normal)),
-                                   event.when=days.bad.normal)
-  events.good.purged <- data.frame(outcome.unit=rep("response.series",
-                                     length(days.good.purged)),
-                                   event.when=days.good.purged)
-  events.bad.purged <- data.frame(outcome.unit=rep("response.series",
-                                     length(days.bad.purged)),
-                                   event.when=days.bad.purged)
-  dates <- list(events.good.normal=events.good.normal,
-                events.bad.normal=events.bad.normal,
-                events.good.purged=events.good.purged,
-                events.bad.purged=events.bad.purged)
-  for(i in 1:length(dates)){dates[[i]][,1] <- as.character(dates[[i]][,1])}
-  return(dates)
-}
-
-##----------------------
-## Getting ees inference
-##----------------------
-## Event study plot for EES (extreme event studies)
-## Input: Output of GCF
-## eventLists: Output of eesDates
-eesInference <- function(input, eventLists, to.remap, remap, width,
-                         inference = TRUE, inference.strategy = "bootstrap"){
-  inf <- list()
-  ## Computing inference
-  ## Normal
-                                        # Good days
-  inf$good.normal <- eventstudy(input, eventList=eventLists$events.good.normal,
-                                type="None", to.remap=to.remap,
-                                remap=remap, width=width, inference=inference,
-                                inference.strategy=inference.strategy)
-                                        # Bad days
-  inf$bad.normal <- eventstudy(input, eventList=eventLists$events.bad.normal,
-                                type="None", to.remap=to.remap,
-                                remap=remap, width=width, inference=inference,
-                                inference.strategy=inference.strategy)
-  ## Purged
-                                          # Good days
-  inf$good.purged <- eventstudy(input, eventList=eventLists$events.good.purged,
-                                type="None", to.remap=to.remap,
-                                remap=remap, width=width, inference=inference,
-                                inference.strategy=inference.strategy)
-                                            # Bad days
-  inf$bad.purged <- eventstudy(input, eventList=eventLists$events.bad.purged,
-                                type="None", to.remap=to.remap,
-                                remap=remap, width=width, inference=inference,
-                                inference.strategy=inference.strategy)
-
-  class(inf) <- "ees"
-  return(inf)
-}
-
-plot.ees <- function(x, xlab = NULL, ...){
-  ## assign own labels if they're missing
-  if (is.null(xlab)) {
-      xlab <- "Event time"
-  }
-  ## Inference
-  es.good.normal <- x$good.normal$eventstudy.output
-  es.bad.normal <- x$bad.normal$eventstudy.output
-  es.good.purged <- x$good.purged$eventstudy.output
-  es.bad.purged <- x$bad.purged$eventstudy.output
-                                        # Width
-  width <- (NROW(x[[1]]$eventstudy.output)-1)/2
-  ##---------------
-  ## Plotting graph
-  ##---------------
-  big.normal <- max(abs(cbind(es.good.normal,es.bad.normal)))
-  big.purged <- max(abs(cbind(es.good.purged,es.bad.purged)))
-  big <- max(big.normal,big.purged)
-  ylim.max <- c(-big,big)
-
-  # Plotting graph
-  par(mfrow=c(1,2))
-  
-  # Plot very good days
-  plot(-width:width, es.good.normal[,2], type="l", lwd=2,
-       ylim=ylim.max, col="red", xlab=xlab,
-       main="Very good days", ...)       
-  lines(-width:width, es.good.purged[,2], lwd=2, lty=1,type="l", col="orange")
-  points(-width:width, es.good.normal[,2], pch=19,col="red")
-  points(-width:width, es.good.purged[,2], pch=25,col="orange")
-  lines(-width:width, es.good.normal[,1], lwd=0.8, lty=2, col="red")
-  lines(-width:width, es.good.normal[,3], lwd=0.8, lty=2, col="red")
-  lines(-width:width, es.good.purged[,1], lwd=0.8, lty=4, col="orange")
-  lines(-width:width, es.good.purged[,3], lwd=0.8, lty=4, col="orange")
-  abline(h=0,v=0)
-  points(-width:width, es.good.normal[,1], pch=19,col="red")
-  points(-width:width, es.good.purged[,1],pch=25,col="orange")
-  points(-width:width, es.good.normal[,3], pch=19,col="red")
-  points(-width:width, es.good.purged[,3],pch=25,col="orange")
-    
-  legend("topleft",legend=c("Un-clustered","Clustered"),
-         cex=0.7,pch=c(19,25),
-         col=c("red","orange"),lty=c(1,1),bty="n")
-    
-  # Plot very bad days
-  plot(-width:width, es.bad.normal[,2], type="l", lwd=2,
-       ylim=ylim.max, col="red", xlab=xlab,
-       main = "Very bad days",...)
-  lines(-width:width, es.bad.purged[,2], lwd=2, lty=1,type="l", col="orange")
-  points(-width:width, es.bad.normal[,2], pch=19,col="red")
-  points(-width:width, es.bad.purged[,2], pch=25,col="orange")
-  lines(-width:width, es.bad.normal[,1], lwd=0.8, lty=2, col="red")
-  lines(-width:width, es.bad.normal[,3], lwd=0.8, lty=2, col="red")
-  lines(-width:width, es.bad.purged[,1], lwd=0.8, lty=2, col="orange")
-  lines(-width:width, es.bad.purged[,3], lwd=0.8, lty=2, col="orange")
-  points(-width:width, es.bad.normal[,1], pch=19,col="red")
-  points(-width:width, es.bad.purged[,1], pch=25,col="orange")
-  points(-width:width, es.bad.normal[,3], pch=19,col="red")
-  points(-width:width, es.bad.purged[,3], pch=25,col="orange")
-    
-  abline(h=0,v=0)
-  legend("topleft",legend=c("Un-clustered","Clustered"),
-         cex=0.7,pch=c(19,25),
-         col=c("red","orange"),lty=c(1,1),bty="n")
-}
-
-#--------------------------
-# Suppress the messages
-deprintize<-function(f){
- return(function(...) {capture.output(w<-f(...));return(w);});
-}

Copied: pkg/R/eesInference.R (from rev 324, pkg/R/ees.R)
===================================================================
--- pkg/R/eesInference.R	                        (rev 0)
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/eventstudies -r 325


More information about the Eventstudies-commits mailing list