From noreply at r-forge.r-project.org Thu Feb 7 08:46:07 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Thu, 7 Feb 2013 08:46:07 +0100 (CET) Subject: [Eventstudies-commits] r31 - in pkg: R data man Message-ID: <20130207074607.AB98A184618@r-forge.r-project.org> Author: vikram Date: 2013-02-07 08:46:07 +0100 (Thu, 07 Feb 2013) New Revision: 31 Added: pkg/R/identifyExtremeEvents.R pkg/data/IdentifyExevent.rda pkg/man/exact.pattern.location.Rd pkg/man/extreme.events.distribution.Rd pkg/man/gen.data.Rd pkg/man/get.cluster.distribution.Rd pkg/man/get.clusters.formatted.Rd pkg/man/get.event.count.Rd pkg/man/identify.extreme.events.Rd pkg/man/identify.mixedclusters.Rd pkg/man/numbers2words.Rd pkg/man/quantlie.extreme.values.Rd pkg/man/runlength.dist.Rd pkg/man/summarise.cluster.Rd pkg/man/summarise.rle.Rd pkg/man/sumstat.Rd pkg/man/yearly.exevent.dist.Rd pkg/man/yearly.exevent.summary.Rd Log: Added Rd documentation files, data and functions for Identify extreme events Added: pkg/R/identifyExtremeEvents.R =================================================================== --- pkg/R/identifyExtremeEvents.R (rev 0) +++ pkg/R/identifyExtremeEvents.R 2013-02-07 07:46:07 UTC (rev 31) @@ -0,0 +1,737 @@ + +# Total 16 functions +############################ +# Identifying extreme events +############################ +# libraries required +library(xts) +#---------------------------------------------------------------- +# INPUT: +# 'input' : Data series for which extreme events are +# to be identified. More than one series +# is permissble. The 'input' should be in time +# series format. +# 'prob.value': This is the tail value for which event is +# to be defined. For eg: prob.value=5 will +# consider 5% tail on both sides +#----------------------------------------------------------------- +# OUTPUT: +# Result will be in a list of 3 with following tables: +# 1. Summary statistics +# a. Summary of whole data-set +# 2. Lower tail: Extreme event tables +# a. Distribution of extreme events +# b. Run length distribution +# c. Quantile values +# d. Yearly distribution +# e. Extreme event data +# - Clustered, Un-clustered and Both +# 3. Upper tail: Extreme event tables +# a. Distribution of extreme events +# b. Run length distribution +# c. Quantile values +# d. Yearly distribution +# e. Extreme event data +# - Clustered, Un-clustered and Both +#------------------------------------------------------------------ +# NOTE: +identify.extreme.events <- function(input,prob.value){ + no.var <- NCOL(input) + + #------------------------------------------------ + # Breaking the function if any input is not given + #------------------------------------------------ + # For one variable + # If class of data is not time series + class.input <- class(input)%in%c("xts","zoo") + if(class.input==FALSE){ + stop("Input data is not in time series format. Valid 'input' should be of class xts and zoo") + } + + # Converting an xts object to zoo series + input.class <- length(which(class(input)%in%"xts")) + if(length(input.class)==1){ + input <- zoo(input) + } + + #----------------------------------------- + # Event series: Clustered and un-clustered + #----------------------------------------- + tmp <- get.clusters.formatted(event.series=input, + response.series=input, + probvalue=prob.value, + event.value="nonreturns", + response.value="nonreturns") + tail.events <- tmp[which(tmp$left.tail==1 | tmp$right.tail==1),] + clustered.tail.events <- tmp[which(tmp$cluster.pattern>1),] + unclustered.tail.events <- tmp[-which(tmp$cluster.pattern>1),] + # Left tail data + left.tail.clustered <- clustered.tail.events[which(clustered.tail.events$left.tail==1),c("event.series","cluster.pattern")] + left.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$left.tail==1),c("event.series","cluster.pattern")] + left.all <- tail.events[which(tail.events$left.tail==1),c("event.series","cluster.pattern")] + # Right tail data + right.tail.clustered <- clustered.tail.events[which(clustered.tail.events$right.tail==1),c("event.series","cluster.pattern")] + right.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$right.tail==1),c("event.series","cluster.pattern")] + right.all <- tail.events[which(tail.events$right.tail==1),c("event.series","cluster.pattern")] + + #--------------------- + # Extreme event output + #--------------------- + # Summary statistics + summ.st <- sumstat(input) + + # Distribtution of events + event.dist <- extreme.events.distribution(input,prob.value) + + # Run length distribution + runlength <- runlength.dist(input,prob.value) + + # Quantile extreme values + qnt.values <- quantile.extreme.values(input,prob.value) + + # Yearly distribution of extreme event dates + yearly.exevent <- yearly.exevent.dist(input,prob.value) + + #--------------------- + # Compiling the output + #--------------------- + output <- lower.tail <- upper.tail <- list() + # Compiling lower tail and upper tail separately + # Lower tail + lower.tail$data <- list(left.all,left.tail.clustered, + left.tail.unclustered) + names(lower.tail$data) <- c("All","Clustered","Un-clustered") + lower.tail$extreme.event.distribution <- event.dist$lower.tail + lower.tail$runlength <- runlength$lower.tail + lower.tail$quantile.values <- qnt.values$lower.tail + lower.tail$yearly.extreme.event <- yearly.exevent$lower.tail + # Upper tail + upper.tail$data <- list(right.all,right.tail.clustered, + right.tail.unclustered) + names(upper.tail$data) <- c("All","Clustered","Un-clustered") + upper.tail$extreme.event.distribution <- event.dist$upper.tail + upper.tail$runlength <- runlength$upper.tail + upper.tail$quantile.values <- qnt.values$upper.tail + upper.tail$yearly.extreme.event <- yearly.exevent$upper.tail + # Output + output$data.summary <- summ.st + output$lower.tail <- lower.tail + output$upper.tail <- upper.tail + return(output) +} + +######################################## +# Functions used for formatting clusters +######################################## +#------------------------ +# Categorzing tail events +# for ES analysis +#------------------------ +# Generates returns for the series +# Mark left tail, right tail events +gen.data <- function(d,probvalue,value="nonreturns"){ + res <- data.frame(dates=index(d),value=coredata(d)) + if(value=="returns"){ + res$returns <- c(NA,coredata(diff(log(d))*100)) + }else{ + res$returns <- d + } + pval <- c(probvalue/100,(1-(probvalue/100))) + pval <- quantile(res$returns,prob=pval,na.rm=TRUE) + res$left.tail <- as.numeric(res$returns < pval[1]) + res$right.tail <- as.numeric(res$returns > pval[2]) + res$both.tails <- res$left.tail + res$right.tail + if(value=="returns"){ + return(res[-1,]) + }else{ + return(res) + } +} + + +#------------------- +# Summarise patterns +summarise.rle <- function(oneseries){ + tp <- rle(oneseries) + tp1 <- data.frame(tp$lengths,tp$values) + tp1 <- subset(tp1,tp1[,2]==1) + summary(tp1[,1]) +} + +# Summarise the pattern of cluster +summarise.cluster <- function(obj){ + rle.both <- summarise.rle(obj$both.tail) + rle.left <- summarise.rle(obj$left.tail) + rle.right <- summarise.rle(obj$right.tail) + rbind(both=rle.both,left=rle.left,right=rle.right) +} + +# Getting location for the length +exact.pattern.location <- function(us,pt,pt.len){ + st <- rle(us) + len <- st$length + loc.cs <- cumsum(st$length) + loc <- loc.cs[which(st$values==pt & st$length==pt.len)]-pt.len+1 + return(loc) +} + +# Identify and mark mixed clusters +identify.mixedclusters <- function(m,j){ + m$remove.mixed <- 0 + rownum <- which(m$pattern==TRUE) + for(i in 1:length(rownum)){ + nextnum <- rownum[i]+j-1 + twonums <- m$returns[c(rownum[i]:nextnum)] > 0 + if(sum(twonums)==j || sum(twonums)==0){ + next + }else{ + m$remove.mixed[c(rownum[i]:nextnum)] <- 5 + } + } + m +} + +#-------------------- +# Formatting clusters +#-------------------- +# This function takes does the following transformation: +#---------------------------------------------------- +# What the function does? +# i. Get extreme events from event.series +# ii. Remove all the mixed clusters +# iii. Get different types cluster +# iv. Further club the clusters for event series and +# corresponding response series to get +# clustered returns +# v. Throw the output in timeseries format +#---------------------------------------------------- +# Input for the function +# event.series = Series in levels or returns on events +# is to be defined +# response.series = Series in levels or returns on which +# response is to be generated +# prob.value = Tail value for defining an event +# event.value = What value is to be studied +# returns or levels +# Similarly for response.value +#---------------------------------------------------- +# Output = Formatted clusters in time series format +#---------------------------------------------------- +get.clusters.formatted <- function(event.series, + response.series, + probvalue=5, + event.value="returns", + response.value="returns"){ + # Getting levels in event format + tmp <- gen.data(event.series, + probvalue=probvalue, + value=event.value) + res.ser <- gen.data(response.series, + probvalue=probvalue, + value=response.value) + # Storing old data points + tmp.old <- tmp + + # Get pattern with maximum length + res <- summarise.cluster(tmp) + max.len <- max(res[,"Max."]) + + #------------------------ + # Removing mixed clusters + #------------------------ + for(i in max.len:2){ + which.pattern <- rep(1,i) + patrn <- exact.pattern.location(tmp$both.tails,1,i) + # If pattern does not exist move to next pattern + if(length(patrn)==0){next} + tmp$pattern <- FALSE + tmp$pattern[patrn] <- TRUE + tmp <- identify.mixedclusters(m=tmp,i) + me <- length(which(tmp$remove.mixed==5)) + + if(me!=0){ + tmp <- tmp[-which(tmp$remove.mixed==5),] + cat("Pattern of:",i,";", + "Disarded event:",me/i,"\n") + } + } + tmp.nc <- tmp + + # Merging event and response series + tmp.es <- xts(tmp[,-1],as.Date(tmp$dates)) + tmp.rs <- xts(res.ser[,-1],as.Date(res.ser$dates)) + tmp.m <- merge(tmp.es,res.ser=tmp.rs[,c("value","returns")], + all=F) + + # Formatting + if(event.value=="returns"){ + which.value <- event.value + }else{ + which.value <- "value" + } + # Converting to data.frame + temp <- as.data.frame(tmp.m) + temp$dates <- rownames(temp) + n <- temp + # Get pattern with maximum length + res <- summarise.cluster(temp) + max.len <- max(res[,"Max."]) + cat("Maximum length after removing mixed clusters is", + max.len,"\n") + # Marking clusters + n$cluster.pattern <- n$both.tails + for(pt.len in max.len:1){ + mark <- exact.pattern.location(n$both.tails,1,pt.len) + if(length(mark)==0){next} + n$cluster.pattern[mark] <- pt.len + } + + #------------------- + # Clustering returns + #------------------- + print("Clustering events.") + for(pt.len in max.len:2){ + rownum <- exact.pattern.location(n$both.tails,1,pt.len) + # If pattern does not exist + if(length(rownum)==0){ + cat("Pattern",pt.len,"does not exist.","\n");next + } + # Clustering + while(length(rownum)>0){ + prevnum <- rownum[1]-1 + lastnum <- rownum[1]+pt.len-1 + # Clustering event series + if(event.value=="returns"){ + newreturns <- (n$value[lastnum]-n$value[prevnum])*100/n$value[prevnum] + n[rownum[1],c("value","returns")] <- c(n$value[lastnum],newreturns) + }else{ + newreturns <- sum(n$value[rownum[1]:lastnum],na.rm=T) + n[rownum[1],c("value","returns")] <- c(n$value[lastnum],newreturns) + } + # Clustering response series + if(response.value=="returns"){ + newreturns.rs <- (n$value.1[lastnum]-n$value.1[prevnum])*100/n$value.1[prevnum] + n[rownum[1],c("value.1","returns.1")] <- c(n$value.1[lastnum],newreturns.rs) + }else{ + newreturns <- sum(n$value.1[rownum[1]:lastnum],na.rm=T) + n[rownum[1],c("value.1","returns.1")] <- c(n$value.1[lastnum],newreturns) + } + n <- n[-c((rownum[1]+1):lastnum),] + rownum <- exact.pattern.location(n$both.tails,1,pt.len) + } + } + # Columns to keep + cn <- c(which.value,"left.tail","right.tail", + "returns.1","cluster.pattern") + tmp.ts <- zoo(n[,cn],order.by=as.Date(n$dates)) + colnames(tmp.ts) <- c("event.series","left.tail","right.tail", + "response.series","cluster.pattern") + + # Results + return(tmp.ts) +} + +############################## +# Summary statistics functions +############################## +#--------------------------------------------- +# Table 1: Summary statistics +# INPUT: Time series data-set for which +# summary statistics is to be estimated +# OUTPUT: A data frame with: +# - Values: "Minimum", 5%,"25%","Median", +# "Mean","75%","95%","Maximum", +# "Standard deviation","IQR", +# "Observations" +#---------------------------------------------- +sumstat <- function(input){ + no.var <- NCOL(input) + if(no.var==1){input <- xts(input)} + # Creating empty frame: chassis + tmp <- data.frame(matrix(NA,nrow=11,ncol=NCOL(input))) + colnames(tmp) <- colnames(input) + rownames(tmp) <- c("Min","5%","25%","Median","Mean","75%","95%", + "Max","sd","IQR","Obs.") + # Estimating summary statistics + tmp[1,] <- apply(input,2,function(x){min(x,na.rm=TRUE)}) + tmp[2,] <- apply(input,2,function(x){quantile(x,0.05,na.rm=TRUE)}) + tmp[3,] <- apply(input,2,function(x){quantile(x,0.25,na.rm=TRUE)}) + tmp[4,] <- apply(input,2,function(x){median(x,na.rm=TRUE)}) + tmp[5,] <- apply(input,2,function(x){mean(x,na.rm=TRUE)}) + tmp[6,] <- apply(input,2,function(x){quantile(x,0.75,na.rm=TRUE)}) + tmp[7,] <- apply(input,2,function(x){quantile(x,0.95,na.rm=TRUE)}) + tmp[8,] <- apply(input,2,function(x){max(x,na.rm=TRUE)}) + tmp[9,] <- apply(input,2,function(x){sd(x,na.rm=TRUE)}) + tmp[10,] <- apply(input,2,function(x){IQR(x,na.rm=TRUE)}) + tmp[11,] <- apply(input,2,function(x){NROW(x)}) + tmp <- round(tmp,2) + + return(tmp) +} + +###################### +# Yearly summary stats +###################### +#---------------------------- +# INPUT: +# 'input': Data series for which event cluster distribution +# is to be calculated; +# 'prob.value': Probility value for which tail is to be constructed this +# value is equivalent to one side tail for eg. if prob.value=5 +# then we have values of 5% tail on both sides +# Functions used: yearly.exevent.summary() +# OUTPUT: +# Yearly distribution of extreme events +#---------------------------- +yearly.exevent.dist <- function(input, prob.value){ + no.var <- NCOL(input) + mylist <- list() + # Estimating cluster count + #-------------------- + # Formatting clusters + #-------------------- + tmp <- get.clusters.formatted(event.series=input, + response.series=input, + probvalue=prob.value, + event.value="nonreturns", + response.value="nonreturns") + + tmp.res <- yearly.exevent.summary(tmp) + tmp.res[is.na(tmp.res)] <- 0 + # Left and right tail + lower.tail.yearly.exevent <- tmp.res[,1:2] + upper.tail.yearly.exevent <- tmp.res[,3:4] + output <- list() + output$lower.tail <- lower.tail.yearly.exevent + output$upper.tail <- upper.tail.yearly.exevent + mylist <- output + + return(mylist) +} + +#------------------------------------------------ +# Get yearly no. and median for good and bad days +#------------------------------------------------ +yearly.exevent.summary <- function(tmp){ + tmp.bad <- tmp[which(tmp[,"left.tail"]==1),] + tmp.good <- tmp[which(tmp[,"right.tail"]==1),] + # Bad days + tmp.bad.y <- apply.yearly(xts(tmp.bad),function(x)nrow(x)) + tmp.bad.y <- merge(tmp.bad.y,apply.yearly(xts(tmp.bad[,1]),function(x)median(x,na.rm=T))) + index(tmp.bad.y) <- as.yearmon(as.Date(substr(index(tmp.bad.y),1,4),"%Y")) + # Good days + tmp.good.y <- apply.yearly(xts(tmp.good),function(x)nrow(x)) + tmp.good.y <- merge(tmp.good.y,apply.yearly(xts(tmp.good[,1]),function(x)median(x,na.rm=T))) + index(tmp.good.y) <- as.yearmon(as.Date(substr(index(tmp.good.y),1,4),"%Y")) + tmp.res <- merge(tmp.bad.y,tmp.good.y) + colnames(tmp.res) <- c("number.baddays","median.baddays", + "number.gooddays","median.goodays") + output <- as.data.frame(tmp.res) + cn <- rownames(output) + rownames(output) <- sapply(rownames(output), + function(x)substr(x,nchar(x)-3,nchar(x))) + return(output) +} + +############################# +# Getting event segregation +# - clustered and unclustered +############################# +#---------------------------- +# INPUT: +# 'input': Data series for which event cluster distribution +# is to be calculated; +# Note: The input series expects the input to be in levels not in returns, +# if the some the inputs are already in return formats one has to +# use the other variable 'already.return.series' +# 'already.return.series': column name is to be given which already has +# return series in the data-set +# 'prob.value': Probility value for which tail is to be constructed this +# value is equivalent to one side tail for eg. if prob.value=5 +# then we have values of 5% tail on both sides +# Functions used: get.event.count() +# OUTPUT: +# Distribution of extreme events +#---------------------------- + +extreme.events.distribution <- function(input,prob.value){ + # Creating an empty frame + no.var <- NCOL(input) + lower.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6)) + upper.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6)) + colnames(lower.tail.dist) <- c("Unclustered","Used clusters", + "Removed clusters","Total clusters", + "Total","Total used clusters") + rownames(lower.tail.dist) <- colnames(input) + colnames(upper.tail.dist) <- c("Unclustered","Used clusters", + "Removed clusters","Total clusters", + "Total","Total used clusters") + rownames(upper.tail.dist) <- colnames(input) + # Estimating cluster count + #-------------- + # Cluster count + #-------------- + # Non-returns (if it is already in return format) + tmp <- get.event.count(input,probvalue=prob.value, + value="nonreturns") + lower.tail.dist <- tmp[1,] + upper.tail.dist <- tmp[2,] + + #----------------------------- + # Naming the tail distribution + #----------------------------- + mylist <- list(lower.tail.dist,upper.tail.dist) + names(mylist) <- c("lower.tail", "upper.tail") + return(mylist) +} + +# Functions used in event count calculation +get.event.count <- function(series, + probvalue=5, + value="returns"){ + # Extracting dataset + tmp.old <- gen.data(series,probvalue,value) + tmp <- get.clusters.formatted(event.series=series, + response.series=series, + probvalue, + event.value=value, + response.value=value) + + cp <- tmp[,"cluster.pattern"] + lvl <- as.numeric(levels(as.factor(cp))) + lvl.use <- lvl[which(lvl>1)] + # Calculating Total events + tot.ev.l <- length(which(tmp.old[,"left.tail"]==1)) + tot.ev.r <- length(which(tmp.old[,"right.tail"]==1)) + # Calculating Unclustered events + un.clstr.l <- length(which(tmp[,"left.tail"]==1 & + tmp[,"cluster.pattern"]==1)) + un.clstr.r <- length(which(tmp[,"right.tail"]==1 & + tmp[,"cluster.pattern"]==1)) + # Calculating Used clusters + us.cl.l <- us.cl.r <- NULL + for(i in 1:length(lvl.use)){ + tmp1 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] & + tmp[,"left.tail"]==1))*lvl.use[i] + tmp2 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] & + tmp[,"right.tail"]==1))*lvl.use[i] + us.cl.l <- sum(us.cl.l,tmp1,na.rm=TRUE) + us.cl.r <- sum(us.cl.r,tmp2,na.rm=TRUE) + } + + # Making a table + tb <- data.frame(matrix(NA,2,6)) + colnames(tb) <- c("unclstr","used.clstr","removed.clstr","tot.clstr","Tot","Tot.used") + rownames(tb) <- c("lower","upper") + tb[,"Tot"] <- c(tot.ev.l,tot.ev.r) + tb[,"unclstr"] <- c(un.clstr.l,un.clstr.r) + tb[,"used.clstr"] <- c(us.cl.l,us.cl.r) + tb[,"Tot.used"] <- tb$unclstr+tb$used.clstr + tb[,"tot.clstr"] <- tb$Tot-tb$unclstr + tb[,"removed.clstr"] <- tb$tot.clstr-tb$used.clstr + + return(tb) +} + +#################################### +# Quantile values for extreme events +#################################### +#----------------------------------- +# INPUT: +# 'input': Data series in time series format +# Note: The input series expects the input to be in levels not in returns, +# if the some the inputs are already in return formats one has to +# use the other variable 'already.return.series' +# 'already.return.series': column name is to be given which already has +# return series in the data-set +# Functions used: get.clusters.formatted() +# OUTPUT: +# Lower tail and Upper tail quantile values +#----------------------------------- +quantile.extreme.values <- function(input, prob.value){ + # Creating an empty frame + no.var <- NCOL(input) + lower.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6)) + upper.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6)) + colnames(lower.tail.qnt.value) <- c("Min","25%","Median","75%","Max", + "Mean") + rownames(lower.tail.qnt.value) <- colnames(input) + colnames(upper.tail.qnt.value) <- c("Min","25%","Median","75%","Max", + "Mean") + rownames(upper.tail.qnt.value) <- colnames(input) + # Estimating cluster count + #-------------------- + # Formatting clusters + #-------------------- + tmp <- get.clusters.formatted(event.series=input, + response.series=input, + probvalue=prob.value, + event.value="nonreturns", + response.value="nonreturns") + + # Left tail + tmp.left.tail <- tmp[which(tmp$left.tail==1), + "event.series"] + df.left <- t(data.frame(quantile(tmp.left.tail,c(0,0.25,0.5,0.75,1)))) + tmp.left <- round(cbind(df.left,mean(tmp.left.tail)),2) + rownames(tmp.left) <- NULL + colnames(tmp.left) <- c("0%","25%","Median","75%","100%","Mean") + # Right tail + tmp.right.tail <- tmp[which(tmp$right.tail==1), + "event.series"] + df.right <- t(data.frame(quantile(tmp.right.tail,c(0,0.25,0.5,0.75,1)))) + tmp.right <- round(cbind(df.right, + mean(tmp.right.tail)),2) + rownames(tmp.right) <- NULL + colnames(tmp.right) <- c("0%","25%","Median","75%","100%","Mean") + + lower.tail.qnt.value <- tmp.left + upper.tail.qnt.value <- tmp.right + + mylist <- list(lower.tail.qnt.value,upper.tail.qnt.value) + names(mylist) <- c("lower.tail", "upper.tail") + return(mylist) +} + +########################## +# Run length distribution +########################## +#----------------------------------- +# INPUT: +# 'input': Data series in time series format +# Note: The input series expects the input to be in levels not in returns, +# if the some the inputs are already in return formats one has to +# use the other variable 'already.return.series' +# 'already.return.series': column name is to be given which already has +# return series in the data-set +# Functions used: get.clusters.formatted() +# get.cluster.distribution() +# numbers2words() +# OUTPUT: +# Lower tail and Upper tail Run length distribution +#----------------------------------- +runlength.dist <- function(input, prob.value){ + + # Creating an empty frame + no.var <- NCOL(input) + + # Finding maximum Run length + # Seed value + max.runlength <- 0 + #--------------------------- + # Estimating max. Run length + #--------------------------- + tmp <- get.clusters.formatted(event.series=input, + response.series=input, + probvalue=prob.value, + event.value="nonreturns", + response.value="nonreturns") + + tmp.runlength <- get.cluster.distribution(tmp,"event.series") + max.runlength <- max(max.runlength,as.numeric(colnames(tmp.runlength)[NCOL(tmp.runlength)])) + + # Generating empty frame + col.names <- seq(2:max.runlength)+1 + lower.tail.runlength <- data.frame(matrix(NA,nrow=no.var, + ncol=length(col.names))) + upper.tail.runlength <- data.frame(matrix(NA,nrow=no.var, + ncol=length(col.names))) + colnames(lower.tail.runlength) <- col.names + rownames(lower.tail.runlength) <- colnames(input) + colnames(upper.tail.runlength) <- col.names + rownames(upper.tail.runlength) <- colnames(input) + + #---------------------- + # Run length estimation + #---------------------- + tmp.res <- get.cluster.distribution(tmp,"event.series") + for(j in 1:length(colnames(tmp.res))){ + col.number <- colnames(tmp.res)[j] + lower.tail.runlength[1,col.number] <- tmp.res[1,col.number] + upper.tail.runlength[1,col.number] <- tmp.res[2,col.number] + } + + # Replacing NA's with zeroes + lower.tail.runlength[is.na(lower.tail.runlength)] <- 0 + upper.tail.runlength[is.na(upper.tail.runlength)] <- 0 + + # creating column names + word.cn <- NULL + for(i in 1:length(col.names)){ + word.cn[i] <- numbers2words(col.names[i]) + } + colnames(lower.tail.runlength) <- word.cn + colnames(upper.tail.runlength) <- word.cn + mylist <- list(lower.tail.runlength,upper.tail.runlength) + names(mylist) <- c("lower.tail", "upper.tail") + return(mylist) +} + +#------------------------- +# Get cluster distribution +#------------------------- +# Input for this function is the output of get.cluster.formatted +get.cluster.distribution <- function(tmp,variable){ + # Extract cluster category + cp <- tmp[,"cluster.pattern"] + lvl <- as.numeric(levels(as.factor(cp))) + lvl.use <- lvl[which(lvl>1)] + # Get numbers for each category + tb <- data.frame(matrix(NA,2,length(lvl.use))) + colnames(tb) <- as.character(lvl.use) + rownames(tb) <- c(paste(variable,":lower tail"), + paste(variable,":upper tail")) + for(i in 1:length(lvl.use)){ + tb[1,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] + & tmp[,"left.tail"]==1)) + tb[2,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] + & tmp[,"right.tail"]==1)) + + } + return(tb) +} + +#---------------------------- +# Converting numbers to words +#---------------------------- +numbers2words <- function(x){ + helper <- function(x){ + digits <- rev(strsplit(as.character(x), "")[[1]]) + nDigits <- length(digits) + if (nDigits == 1) as.vector(ones[digits]) + else if (nDigits == 2) + if (x <= 19) as.vector(teens[digits[1]]) + else trim(paste(tens[digits[2]], + Recall(as.numeric(digits[1])))) + else if (nDigits == 3) trim(paste(ones[digits[3]], "hundred", + Recall(makeNumber(digits[2:1])))) + else { + nSuffix <- ((nDigits + 2) %/% 3) - 1 + if (nSuffix > length(suffixes)) stop(paste(x, "is too large!")) + trim(paste(Recall(makeNumber(digits[ + nDigits:(3*nSuffix + 1)])), + suffixes[nSuffix], + Recall(makeNumber(digits[(3*nSuffix):1])))) + } + } + trim <- function(text){ + gsub("^\ ", "", gsub("\ *$", "", text)) + } + makeNumber <- function(...) as.numeric(paste(..., collapse="")) + opts <- options(scipen=100) + on.exit(options(opts)) + ones <- c("", "one", "two", "three", "four", "five", "six", "seven", + "eight", "nine") + names(ones) <- 0:9 + teens <- c("ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", + "sixteen", " seventeen", "eighteen", "nineteen") + names(teens) <- 0:9 + tens <- c("twenty", "thirty", "forty", "fifty", "sixty", "seventy", + "eighty", + "ninety") + names(tens) <- 2:9 + x <- round(x) + suffixes <- c("thousand", "million", "billion", "trillion") + if (length(x) > 1) return(sapply(x, helper)) + helper(x) +} Added: pkg/data/IdentifyExevent.rda =================================================================== (Binary files differ) Property changes on: pkg/data/IdentifyExevent.rda ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Added: pkg/man/exact.pattern.location.Rd =================================================================== --- pkg/man/exact.pattern.location.Rd (rev 0) +++ pkg/man/exact.pattern.location.Rd 2013-02-07 07:46:07 UTC (rev 31) @@ -0,0 +1,39 @@ +\name{exact.pattern.location} +\alias{exact.pattern.location} + +\title{ + Gets the location of the pattern +} + +\description{ + The function gives the exact location of the exact pattern in the series. + } + +\usage{ +exact.pattern.location(us,pt,pt.len) +} + +\arguments{ + \item{us}{It is the series in which the location of the pattern is to + be found} + \item{pt}{It is the pattern which is to be searched in the series for + eg. 1} + \item{pt.len}{It is the length of the pattern which is to be searched + for} +} + +\value{ +Output is the row number of the start of the pattern +} + + +\examples{ +# Loading data +data(IdentifyExevent) +# Series input +input <- diff(log(input.data[,"sp500"])) +# Marking left tail and right tail +res <- gen.data(d=input,probvalue=5,value="nonreturns") +# Getting summarised run length +output <- exact.pattern.location(res$left.tail, pt=1, pt.len=2) +} \ No newline at end of file Added: pkg/man/extreme.events.distribution.Rd =================================================================== --- pkg/man/extreme.events.distribution.Rd (rev 0) +++ pkg/man/extreme.events.distribution.Rd 2013-02-07 07:46:07 UTC (rev 31) @@ -0,0 +1,40 @@ +\name{extreme.events.distribution} +\alias{extreme.events.distribution} + +\title{ + Estimating extreme event distribution of clustered and unclustered data +} + +\description{ + It is the wrapper over the get.event.count function for estimating + extreme event distribution of clustered and unclustered data + + } + +\usage{ +extreme.events.distribution(input, prob.value) +} + +\arguments{ + \item{input}{Series for which cluster distribution is to be checked} + \item{prob.value}{It is tail value for which the extreme event is to + be defined. For eg: prob.value of 5 will consider 5\% tail on both + sides} +} + +\value{ +Output will be the distribution of clustered and unclustered extreme +event of the series +} + +\seealso{ + get.event.count +} + +\examples{ +data(IdentifyExevent) +# Series input +input <- diff(log(input.data[,"sp500"])) +output <- extreme.events.distribution(input,prob.value=5) + +} \ No newline at end of file Added: pkg/man/gen.data.Rd =================================================================== --- pkg/man/gen.data.Rd (rev 0) +++ pkg/man/gen.data.Rd 2013-02-07 07:46:07 UTC (rev 31) @@ -0,0 +1,41 @@ +\name{gen.data} +\alias{gen.data} + +\title{ +Marking upper and lower tail events for extreme event analysis. +} + +\description{ +This function generates a column as left.tail and right.tail which has +binary numbers. If the observation belongs to left tail then the +left.tail will be 1 else 0. + } + +\usage{ +gen.data(d,probvalue,value) +} + +\arguments{ + \item{d}{'d' is the time-series on which extreme event analysis is + done.} + \item{probvalue}{It is tail value for which the extreme event is to + be defined. For eg: prob.value of 5 will consider 5\% tail on both + sides.} + \item{value}{If the series 'd' is in returns format then + value="nonreturns" else value="returns"} +} + +\value{ + Output is a data frame with columns as date, value, returns, left.tail + which is left tail dummy simmilarly for right tail and both tails. + +} + + +\examples{ +data(IdentifyExevent) +# Series input +input <- diff(log(input.data[,"sp500"])) +# Marking left tail and right tail +res <- gen.data(d=input,probvalue=5,value="nonreturns") +} \ No newline at end of file Added: pkg/man/get.cluster.distribution.Rd =================================================================== --- pkg/man/get.cluster.distribution.Rd (rev 0) +++ pkg/man/get.cluster.distribution.Rd 2013-02-07 07:46:07 UTC (rev 31) @@ -0,0 +1,38 @@ +\name{get.cluster.distribution} +\alias{get.cluster.distribution} + +\title{ + Estimating runlength distribution for the clusters +} + +\description{ + Estimating runlength distribution of the clusters in the extreme + event. + } + +\usage{ +get.cluster.distribution(tmp, variable) +} + +\arguments{ + \item{tmp}{It is the output of the get.clusters.formatted} + \item{variable}{Variable on which cluster distribution is to be + estimated; variable="event.series"} +} + +\value{ + Output is the runlength distribution for the clusters +} + +\seealso{ + get.clusters.formatted +} + +\examples{ +data(IdentifyExevent) +# Series input +input <- diff(log(input.data[,"sp500"])) +tmp <- get.clusters.formatted(event.series=input,response.series=input, +probvalue=5,event.value="nonreturns",response.value="nonreturns") [TRUNCATED] To get the complete diff run: svnlook diff /svnroot/eventstudies -r 31 From noreply at r-forge.r-project.org Thu Feb 7 12:43:29 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Thu, 7 Feb 2013 12:43:29 +0100 (CET) Subject: [Eventstudies-commits] r32 - in pkg: . R man tests Message-ID: <20130207114330.13E81183C46@r-forge.r-project.org> Author: vikram Date: 2013-02-07 12:43:29 +0100 (Thu, 07 Feb 2013) New Revision: 32 Added: pkg/NAMESPACE pkg/R/identify.extreme.events.R pkg/R/inference.Ecar.R pkg/R/remap.cumprod.R pkg/R/remap.cumsum.R pkg/R/remap.event.reindex.R Removed: pkg/R/identifyExtremeEvents.R pkg/R/inference.R pkg/R/remap_functions.R pkg/man/exact.pattern.location.Rd pkg/man/extreme.events.distribution.Rd pkg/man/gen.data.Rd pkg/man/get.cluster.distribution.Rd pkg/man/get.clusters.formatted.Rd pkg/man/get.event.count.Rd pkg/man/identify.mixedclusters.Rd pkg/man/numbers2words.Rd pkg/man/quantlie.extreme.values.Rd pkg/man/runlength.dist.Rd pkg/man/summarise.cluster.Rd pkg/man/summarise.rle.Rd pkg/man/sumstat.Rd pkg/man/yearly.exevent.dist.Rd pkg/man/yearly.exevent.summary.Rd pkg/tests/data/ Modified: pkg/tests/subbarao.R Log: Added NAMESPACE file; removed functions not used by user and their man files Added: pkg/NAMESPACE =================================================================== --- pkg/NAMESPACE (rev 0) +++ pkg/NAMESPACE 2013-02-07 11:43:29 UTC (rev 32) @@ -0,0 +1,3 @@ +export(inference.Ecar, phys2eventtime, remap.cumsum, remap.cumprod, remap.event.reindex, identify.extreme.events) +#S3method(identify, extreme.events) + Copied: pkg/R/identify.extreme.events.R (from rev 31, pkg/R/identifyExtremeEvents.R) =================================================================== --- pkg/R/identify.extreme.events.R (rev 0) +++ pkg/R/identify.extreme.events.R 2013-02-07 11:43:29 UTC (rev 32) @@ -0,0 +1,737 @@ + +# Total 16 functions +############################ +# Identifying extreme events +############################ +# libraries required +library(xts) +#---------------------------------------------------------------- +# INPUT: +# 'input' : Data series for which extreme events are +# to be identified. More than one series +# is permissble. The 'input' should be in time +# series format. +# 'prob.value': This is the tail value for which event is +# to be defined. For eg: prob.value=5 will +# consider 5% tail on both sides +#----------------------------------------------------------------- +# OUTPUT: +# Result will be in a list of 3 with following tables: +# 1. Summary statistics +# a. Summary of whole data-set +# 2. Lower tail: Extreme event tables +# a. Distribution of extreme events +# b. Run length distribution +# c. Quantile values +# d. Yearly distribution +# e. Extreme event data +# - Clustered, Un-clustered and Both +# 3. Upper tail: Extreme event tables +# a. Distribution of extreme events +# b. Run length distribution +# c. Quantile values +# d. Yearly distribution +# e. Extreme event data +# - Clustered, Un-clustered and Both +#------------------------------------------------------------------ +# NOTE: +identify.extreme.events <- function(input,prob.value){ + no.var <- NCOL(input) + + #------------------------------------------------ + # Breaking the function if any input is not given + #------------------------------------------------ + # For one variable + # If class of data is not time series + class.input <- class(input)%in%c("xts","zoo") + if(class.input==FALSE){ + stop("Input data is not in time series format. Valid 'input' should be of class xts and zoo") + } + + # Converting an xts object to zoo series + input.class <- length(which(class(input)%in%"xts")) + if(length(input.class)==1){ + input <- zoo(input) + } + + #----------------------------------------- + # Event series: Clustered and un-clustered + #----------------------------------------- + tmp <- get.clusters.formatted(event.series=input, + response.series=input, + probvalue=prob.value, + event.value="nonreturns", + response.value="nonreturns") + tail.events <- tmp[which(tmp$left.tail==1 | tmp$right.tail==1),] + clustered.tail.events <- tmp[which(tmp$cluster.pattern>1),] + unclustered.tail.events <- tmp[-which(tmp$cluster.pattern>1),] + # Left tail data + left.tail.clustered <- clustered.tail.events[which(clustered.tail.events$left.tail==1),c("event.series","cluster.pattern")] + left.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$left.tail==1),c("event.series","cluster.pattern")] + left.all <- tail.events[which(tail.events$left.tail==1),c("event.series","cluster.pattern")] + # Right tail data + right.tail.clustered <- clustered.tail.events[which(clustered.tail.events$right.tail==1),c("event.series","cluster.pattern")] + right.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$right.tail==1),c("event.series","cluster.pattern")] + right.all <- tail.events[which(tail.events$right.tail==1),c("event.series","cluster.pattern")] + + #--------------------- + # Extreme event output + #--------------------- + # Summary statistics + summ.st <- sumstat(input) + + # Distribtution of events + event.dist <- extreme.events.distribution(input,prob.value) + + # Run length distribution + runlength <- runlength.dist(input,prob.value) + + # Quantile extreme values + qnt.values <- quantile.extreme.values(input,prob.value) + + # Yearly distribution of extreme event dates + yearly.exevent <- yearly.exevent.dist(input,prob.value) + + #--------------------- + # Compiling the output + #--------------------- + output <- lower.tail <- upper.tail <- list() + # Compiling lower tail and upper tail separately + # Lower tail + lower.tail$data <- list(left.all,left.tail.clustered, + left.tail.unclustered) + names(lower.tail$data) <- c("All","Clustered","Un-clustered") + lower.tail$extreme.event.distribution <- event.dist$lower.tail + lower.tail$runlength <- runlength$lower.tail + lower.tail$quantile.values <- qnt.values$lower.tail + lower.tail$yearly.extreme.event <- yearly.exevent$lower.tail + # Upper tail + upper.tail$data <- list(right.all,right.tail.clustered, + right.tail.unclustered) + names(upper.tail$data) <- c("All","Clustered","Un-clustered") + upper.tail$extreme.event.distribution <- event.dist$upper.tail + upper.tail$runlength <- runlength$upper.tail + upper.tail$quantile.values <- qnt.values$upper.tail + upper.tail$yearly.extreme.event <- yearly.exevent$upper.tail + # Output + output$data.summary <- summ.st + output$lower.tail <- lower.tail + output$upper.tail <- upper.tail + return(output) +} + +######################################## +# Functions used for formatting clusters +######################################## +#------------------------ +# Categorzing tail events +# for ES analysis +#------------------------ +# Generates returns for the series +# Mark left tail, right tail events +gen.data <- function(d,probvalue,value="nonreturns"){ + res <- data.frame(dates=index(d),value=coredata(d)) + if(value=="returns"){ + res$returns <- c(NA,coredata(diff(log(d))*100)) + }else{ + res$returns <- d + } + pval <- c(probvalue/100,(1-(probvalue/100))) + pval <- quantile(res$returns,prob=pval,na.rm=TRUE) + res$left.tail <- as.numeric(res$returns < pval[1]) + res$right.tail <- as.numeric(res$returns > pval[2]) + res$both.tails <- res$left.tail + res$right.tail + if(value=="returns"){ + return(res[-1,]) + }else{ + return(res) + } +} + + +#------------------- +# Summarise patterns +summarise.rle <- function(oneseries){ + tp <- rle(oneseries) + tp1 <- data.frame(tp$lengths,tp$values) + tp1 <- subset(tp1,tp1[,2]==1) + summary(tp1[,1]) +} + +# Summarise the pattern of cluster +summarise.cluster <- function(obj){ + rle.both <- summarise.rle(obj$both.tail) + rle.left <- summarise.rle(obj$left.tail) + rle.right <- summarise.rle(obj$right.tail) + rbind(both=rle.both,left=rle.left,right=rle.right) +} + +# Getting location for the length +exact.pattern.location <- function(us,pt,pt.len){ + st <- rle(us) + len <- st$length + loc.cs <- cumsum(st$length) + loc <- loc.cs[which(st$values==pt & st$length==pt.len)]-pt.len+1 + return(loc) +} + +# Identify and mark mixed clusters +identify.mixedclusters <- function(m,j){ + m$remove.mixed <- 0 + rownum <- which(m$pattern==TRUE) + for(i in 1:length(rownum)){ + nextnum <- rownum[i]+j-1 + twonums <- m$returns[c(rownum[i]:nextnum)] > 0 + if(sum(twonums)==j || sum(twonums)==0){ + next + }else{ + m$remove.mixed[c(rownum[i]:nextnum)] <- 5 + } + } + m +} + +#-------------------- +# Formatting clusters +#-------------------- +# This function takes does the following transformation: +#---------------------------------------------------- +# What the function does? +# i. Get extreme events from event.series +# ii. Remove all the mixed clusters +# iii. Get different types cluster +# iv. Further club the clusters for event series and +# corresponding response series to get +# clustered returns +# v. Throw the output in timeseries format +#---------------------------------------------------- +# Input for the function +# event.series = Series in levels or returns on events +# is to be defined +# response.series = Series in levels or returns on which +# response is to be generated +# prob.value = Tail value for defining an event +# event.value = What value is to be studied +# returns or levels +# Similarly for response.value +#---------------------------------------------------- +# Output = Formatted clusters in time series format +#---------------------------------------------------- +get.clusters.formatted <- function(event.series, + response.series, + probvalue=5, + event.value="returns", + response.value="returns"){ + # Getting levels in event format + tmp <- gen.data(event.series, + probvalue=probvalue, + value=event.value) + res.ser <- gen.data(response.series, + probvalue=probvalue, + value=response.value) + # Storing old data points + tmp.old <- tmp + + # Get pattern with maximum length + res <- summarise.cluster(tmp) + max.len <- max(res[,"Max."]) + + #------------------------ + # Removing mixed clusters + #------------------------ + for(i in max.len:2){ + which.pattern <- rep(1,i) + patrn <- exact.pattern.location(tmp$both.tails,1,i) + # If pattern does not exist move to next pattern + if(length(patrn)==0){next} + tmp$pattern <- FALSE + tmp$pattern[patrn] <- TRUE + tmp <- identify.mixedclusters(m=tmp,i) + me <- length(which(tmp$remove.mixed==5)) + + if(me!=0){ + tmp <- tmp[-which(tmp$remove.mixed==5),] + cat("Pattern of:",i,";", + "Disarded event:",me/i,"\n") + } + } + tmp.nc <- tmp + + # Merging event and response series + tmp.es <- xts(tmp[,-1],as.Date(tmp$dates)) + tmp.rs <- xts(res.ser[,-1],as.Date(res.ser$dates)) + tmp.m <- merge(tmp.es,res.ser=tmp.rs[,c("value","returns")], + all=F) + + # Formatting + if(event.value=="returns"){ + which.value <- event.value + }else{ + which.value <- "value" + } + # Converting to data.frame + temp <- as.data.frame(tmp.m) + temp$dates <- rownames(temp) + n <- temp + # Get pattern with maximum length + res <- summarise.cluster(temp) + max.len <- max(res[,"Max."]) + cat("Maximum length after removing mixed clusters is", + max.len,"\n") + # Marking clusters + n$cluster.pattern <- n$both.tails + for(pt.len in max.len:1){ + mark <- exact.pattern.location(n$both.tails,1,pt.len) + if(length(mark)==0){next} + n$cluster.pattern[mark] <- pt.len + } + + #------------------- + # Clustering returns + #------------------- + print("Clustering events.") + for(pt.len in max.len:2){ + rownum <- exact.pattern.location(n$both.tails,1,pt.len) + # If pattern does not exist + if(length(rownum)==0){ + cat("Pattern",pt.len,"does not exist.","\n");next + } + # Clustering + while(length(rownum)>0){ + prevnum <- rownum[1]-1 + lastnum <- rownum[1]+pt.len-1 + # Clustering event series + if(event.value=="returns"){ + newreturns <- (n$value[lastnum]-n$value[prevnum])*100/n$value[prevnum] + n[rownum[1],c("value","returns")] <- c(n$value[lastnum],newreturns) + }else{ + newreturns <- sum(n$value[rownum[1]:lastnum],na.rm=T) + n[rownum[1],c("value","returns")] <- c(n$value[lastnum],newreturns) + } + # Clustering response series + if(response.value=="returns"){ + newreturns.rs <- (n$value.1[lastnum]-n$value.1[prevnum])*100/n$value.1[prevnum] + n[rownum[1],c("value.1","returns.1")] <- c(n$value.1[lastnum],newreturns.rs) + }else{ + newreturns <- sum(n$value.1[rownum[1]:lastnum],na.rm=T) + n[rownum[1],c("value.1","returns.1")] <- c(n$value.1[lastnum],newreturns) + } + n <- n[-c((rownum[1]+1):lastnum),] + rownum <- exact.pattern.location(n$both.tails,1,pt.len) + } + } + # Columns to keep + cn <- c(which.value,"left.tail","right.tail", + "returns.1","cluster.pattern") + tmp.ts <- zoo(n[,cn],order.by=as.Date(n$dates)) + colnames(tmp.ts) <- c("event.series","left.tail","right.tail", + "response.series","cluster.pattern") + + # Results + return(tmp.ts) +} + +############################## +# Summary statistics functions +############################## +#--------------------------------------------- +# Table 1: Summary statistics +# INPUT: Time series data-set for which +# summary statistics is to be estimated +# OUTPUT: A data frame with: +# - Values: "Minimum", 5%,"25%","Median", +# "Mean","75%","95%","Maximum", +# "Standard deviation","IQR", +# "Observations" +#---------------------------------------------- +sumstat <- function(input){ + no.var <- NCOL(input) + if(no.var==1){input <- xts(input)} + # Creating empty frame: chassis + tmp <- data.frame(matrix(NA,nrow=11,ncol=NCOL(input))) + colnames(tmp) <- colnames(input) + rownames(tmp) <- c("Min","5%","25%","Median","Mean","75%","95%", + "Max","sd","IQR","Obs.") + # Estimating summary statistics + tmp[1,] <- apply(input,2,function(x){min(x,na.rm=TRUE)}) + tmp[2,] <- apply(input,2,function(x){quantile(x,0.05,na.rm=TRUE)}) + tmp[3,] <- apply(input,2,function(x){quantile(x,0.25,na.rm=TRUE)}) + tmp[4,] <- apply(input,2,function(x){median(x,na.rm=TRUE)}) + tmp[5,] <- apply(input,2,function(x){mean(x,na.rm=TRUE)}) + tmp[6,] <- apply(input,2,function(x){quantile(x,0.75,na.rm=TRUE)}) + tmp[7,] <- apply(input,2,function(x){quantile(x,0.95,na.rm=TRUE)}) + tmp[8,] <- apply(input,2,function(x){max(x,na.rm=TRUE)}) + tmp[9,] <- apply(input,2,function(x){sd(x,na.rm=TRUE)}) + tmp[10,] <- apply(input,2,function(x){IQR(x,na.rm=TRUE)}) + tmp[11,] <- apply(input,2,function(x){NROW(x)}) + tmp <- round(tmp,2) + + return(tmp) +} + +###################### +# Yearly summary stats +###################### +#---------------------------- +# INPUT: +# 'input': Data series for which event cluster distribution +# is to be calculated; +# 'prob.value': Probility value for which tail is to be constructed this +# value is equivalent to one side tail for eg. if prob.value=5 +# then we have values of 5% tail on both sides +# Functions used: yearly.exevent.summary() +# OUTPUT: +# Yearly distribution of extreme events +#---------------------------- +yearly.exevent.dist <- function(input, prob.value){ + no.var <- NCOL(input) + mylist <- list() + # Estimating cluster count + #-------------------- + # Formatting clusters + #-------------------- + tmp <- get.clusters.formatted(event.series=input, + response.series=input, + probvalue=prob.value, + event.value="nonreturns", + response.value="nonreturns") + + tmp.res <- yearly.exevent.summary(tmp) + tmp.res[is.na(tmp.res)] <- 0 + # Left and right tail + lower.tail.yearly.exevent <- tmp.res[,1:2] + upper.tail.yearly.exevent <- tmp.res[,3:4] + output <- list() + output$lower.tail <- lower.tail.yearly.exevent + output$upper.tail <- upper.tail.yearly.exevent + mylist <- output + + return(mylist) +} + +#------------------------------------------------ +# Get yearly no. and median for good and bad days +#------------------------------------------------ +yearly.exevent.summary <- function(tmp){ + tmp.bad <- tmp[which(tmp[,"left.tail"]==1),] + tmp.good <- tmp[which(tmp[,"right.tail"]==1),] + # Bad days + tmp.bad.y <- apply.yearly(xts(tmp.bad),function(x)nrow(x)) + tmp.bad.y <- merge(tmp.bad.y,apply.yearly(xts(tmp.bad[,1]),function(x)median(x,na.rm=T))) + index(tmp.bad.y) <- as.yearmon(as.Date(substr(index(tmp.bad.y),1,4),"%Y")) + # Good days + tmp.good.y <- apply.yearly(xts(tmp.good),function(x)nrow(x)) + tmp.good.y <- merge(tmp.good.y,apply.yearly(xts(tmp.good[,1]),function(x)median(x,na.rm=T))) + index(tmp.good.y) <- as.yearmon(as.Date(substr(index(tmp.good.y),1,4),"%Y")) + tmp.res <- merge(tmp.bad.y,tmp.good.y) + colnames(tmp.res) <- c("number.baddays","median.baddays", + "number.gooddays","median.goodays") + output <- as.data.frame(tmp.res) + cn <- rownames(output) + rownames(output) <- sapply(rownames(output), + function(x)substr(x,nchar(x)-3,nchar(x))) + return(output) +} + +############################# +# Getting event segregation +# - clustered and unclustered +############################# +#---------------------------- +# INPUT: +# 'input': Data series for which event cluster distribution +# is to be calculated; +# Note: The input series expects the input to be in levels not in returns, +# if the some the inputs are already in return formats one has to +# use the other variable 'already.return.series' +# 'already.return.series': column name is to be given which already has +# return series in the data-set +# 'prob.value': Probility value for which tail is to be constructed this +# value is equivalent to one side tail for eg. if prob.value=5 +# then we have values of 5% tail on both sides +# Functions used: get.event.count() +# OUTPUT: +# Distribution of extreme events +#---------------------------- + +extreme.events.distribution <- function(input,prob.value){ + # Creating an empty frame + no.var <- NCOL(input) + lower.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6)) + upper.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6)) + colnames(lower.tail.dist) <- c("Unclustered","Used clusters", + "Removed clusters","Total clusters", + "Total","Total used clusters") + rownames(lower.tail.dist) <- colnames(input) + colnames(upper.tail.dist) <- c("Unclustered","Used clusters", + "Removed clusters","Total clusters", + "Total","Total used clusters") + rownames(upper.tail.dist) <- colnames(input) + # Estimating cluster count + #-------------- + # Cluster count + #-------------- + # Non-returns (if it is already in return format) + tmp <- get.event.count(input,probvalue=prob.value, + value="nonreturns") + lower.tail.dist <- tmp[1,] + upper.tail.dist <- tmp[2,] + + #----------------------------- + # Naming the tail distribution + #----------------------------- + mylist <- list(lower.tail.dist,upper.tail.dist) + names(mylist) <- c("lower.tail", "upper.tail") + return(mylist) +} + +# Functions used in event count calculation +get.event.count <- function(series, + probvalue=5, + value="returns"){ + # Extracting dataset + tmp.old <- gen.data(series,probvalue,value) + tmp <- get.clusters.formatted(event.series=series, + response.series=series, + probvalue, + event.value=value, + response.value=value) + + cp <- tmp[,"cluster.pattern"] + lvl <- as.numeric(levels(as.factor(cp))) + lvl.use <- lvl[which(lvl>1)] + # Calculating Total events + tot.ev.l <- length(which(tmp.old[,"left.tail"]==1)) + tot.ev.r <- length(which(tmp.old[,"right.tail"]==1)) + # Calculating Unclustered events + un.clstr.l <- length(which(tmp[,"left.tail"]==1 & + tmp[,"cluster.pattern"]==1)) + un.clstr.r <- length(which(tmp[,"right.tail"]==1 & + tmp[,"cluster.pattern"]==1)) + # Calculating Used clusters + us.cl.l <- us.cl.r <- NULL + for(i in 1:length(lvl.use)){ + tmp1 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] & + tmp[,"left.tail"]==1))*lvl.use[i] + tmp2 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] & + tmp[,"right.tail"]==1))*lvl.use[i] + us.cl.l <- sum(us.cl.l,tmp1,na.rm=TRUE) + us.cl.r <- sum(us.cl.r,tmp2,na.rm=TRUE) + } + + # Making a table + tb <- data.frame(matrix(NA,2,6)) + colnames(tb) <- c("unclstr","used.clstr","removed.clstr","tot.clstr","Tot","Tot.used") + rownames(tb) <- c("lower","upper") + tb[,"Tot"] <- c(tot.ev.l,tot.ev.r) + tb[,"unclstr"] <- c(un.clstr.l,un.clstr.r) + tb[,"used.clstr"] <- c(us.cl.l,us.cl.r) + tb[,"Tot.used"] <- tb$unclstr+tb$used.clstr + tb[,"tot.clstr"] <- tb$Tot-tb$unclstr + tb[,"removed.clstr"] <- tb$tot.clstr-tb$used.clstr + + return(tb) +} + +#################################### +# Quantile values for extreme events +#################################### +#----------------------------------- +# INPUT: +# 'input': Data series in time series format +# Note: The input series expects the input to be in levels not in returns, +# if the some the inputs are already in return formats one has to +# use the other variable 'already.return.series' +# 'already.return.series': column name is to be given which already has +# return series in the data-set +# Functions used: get.clusters.formatted() +# OUTPUT: +# Lower tail and Upper tail quantile values +#----------------------------------- +quantile.extreme.values <- function(input, prob.value){ + # Creating an empty frame + no.var <- NCOL(input) + lower.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6)) + upper.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6)) + colnames(lower.tail.qnt.value) <- c("Min","25%","Median","75%","Max", + "Mean") + rownames(lower.tail.qnt.value) <- colnames(input) + colnames(upper.tail.qnt.value) <- c("Min","25%","Median","75%","Max", + "Mean") + rownames(upper.tail.qnt.value) <- colnames(input) + # Estimating cluster count + #-------------------- + # Formatting clusters + #-------------------- + tmp <- get.clusters.formatted(event.series=input, + response.series=input, + probvalue=prob.value, + event.value="nonreturns", + response.value="nonreturns") + + # Left tail + tmp.left.tail <- tmp[which(tmp$left.tail==1), + "event.series"] + df.left <- t(data.frame(quantile(tmp.left.tail,c(0,0.25,0.5,0.75,1)))) + tmp.left <- round(cbind(df.left,mean(tmp.left.tail)),2) + rownames(tmp.left) <- NULL + colnames(tmp.left) <- c("0%","25%","Median","75%","100%","Mean") + # Right tail + tmp.right.tail <- tmp[which(tmp$right.tail==1), + "event.series"] + df.right <- t(data.frame(quantile(tmp.right.tail,c(0,0.25,0.5,0.75,1)))) + tmp.right <- round(cbind(df.right, + mean(tmp.right.tail)),2) + rownames(tmp.right) <- NULL + colnames(tmp.right) <- c("0%","25%","Median","75%","100%","Mean") + + lower.tail.qnt.value <- tmp.left + upper.tail.qnt.value <- tmp.right + + mylist <- list(lower.tail.qnt.value,upper.tail.qnt.value) + names(mylist) <- c("lower.tail", "upper.tail") + return(mylist) +} + +########################## +# Run length distribution +########################## +#----------------------------------- +# INPUT: +# 'input': Data series in time series format +# Note: The input series expects the input to be in levels not in returns, +# if the some the inputs are already in return formats one has to +# use the other variable 'already.return.series' +# 'already.return.series': column name is to be given which already has +# return series in the data-set +# Functions used: get.clusters.formatted() +# get.cluster.distribution() +# numbers2words() +# OUTPUT: +# Lower tail and Upper tail Run length distribution +#----------------------------------- +runlength.dist <- function(input, prob.value){ + + # Creating an empty frame + no.var <- NCOL(input) + + # Finding maximum Run length + # Seed value + max.runlength <- 0 + #--------------------------- + # Estimating max. Run length + #--------------------------- + tmp <- get.clusters.formatted(event.series=input, + response.series=input, + probvalue=prob.value, + event.value="nonreturns", + response.value="nonreturns") + + tmp.runlength <- get.cluster.distribution(tmp,"event.series") + max.runlength <- max(max.runlength,as.numeric(colnames(tmp.runlength)[NCOL(tmp.runlength)])) + + # Generating empty frame + col.names <- seq(2:max.runlength)+1 + lower.tail.runlength <- data.frame(matrix(NA,nrow=no.var, + ncol=length(col.names))) + upper.tail.runlength <- data.frame(matrix(NA,nrow=no.var, + ncol=length(col.names))) + colnames(lower.tail.runlength) <- col.names + rownames(lower.tail.runlength) <- colnames(input) + colnames(upper.tail.runlength) <- col.names + rownames(upper.tail.runlength) <- colnames(input) + + #---------------------- + # Run length estimation + #---------------------- + tmp.res <- get.cluster.distribution(tmp,"event.series") + for(j in 1:length(colnames(tmp.res))){ + col.number <- colnames(tmp.res)[j] + lower.tail.runlength[1,col.number] <- tmp.res[1,col.number] + upper.tail.runlength[1,col.number] <- tmp.res[2,col.number] + } + + # Replacing NA's with zeroes + lower.tail.runlength[is.na(lower.tail.runlength)] <- 0 + upper.tail.runlength[is.na(upper.tail.runlength)] <- 0 + + # creating column names + word.cn <- NULL + for(i in 1:length(col.names)){ + word.cn[i] <- numbers2words(col.names[i]) + } + colnames(lower.tail.runlength) <- word.cn + colnames(upper.tail.runlength) <- word.cn + mylist <- list(lower.tail.runlength,upper.tail.runlength) + names(mylist) <- c("lower.tail", "upper.tail") + return(mylist) +} + +#------------------------- +# Get cluster distribution +#------------------------- +# Input for this function is the output of get.cluster.formatted +get.cluster.distribution <- function(tmp,variable){ + # Extract cluster category + cp <- tmp[,"cluster.pattern"] + lvl <- as.numeric(levels(as.factor(cp))) + lvl.use <- lvl[which(lvl>1)] + # Get numbers for each category + tb <- data.frame(matrix(NA,2,length(lvl.use))) + colnames(tb) <- as.character(lvl.use) + rownames(tb) <- c(paste(variable,":lower tail"), + paste(variable,":upper tail")) + for(i in 1:length(lvl.use)){ + tb[1,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] + & tmp[,"left.tail"]==1)) + tb[2,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] + & tmp[,"right.tail"]==1)) + + } + return(tb) +} + +#---------------------------- +# Converting numbers to words +#---------------------------- +numbers2words <- function(x){ + helper <- function(x){ + digits <- rev(strsplit(as.character(x), "")[[1]]) + nDigits <- length(digits) + if (nDigits == 1) as.vector(ones[digits]) + else if (nDigits == 2) + if (x <= 19) as.vector(teens[digits[1]]) + else trim(paste(tens[digits[2]], + Recall(as.numeric(digits[1])))) + else if (nDigits == 3) trim(paste(ones[digits[3]], "hundred", + Recall(makeNumber(digits[2:1])))) + else { + nSuffix <- ((nDigits + 2) %/% 3) - 1 + if (nSuffix > length(suffixes)) stop(paste(x, "is too large!")) + trim(paste(Recall(makeNumber(digits[ + nDigits:(3*nSuffix + 1)])), + suffixes[nSuffix], + Recall(makeNumber(digits[(3*nSuffix):1])))) + } + } + trim <- function(text){ + gsub("^\ ", "", gsub("\ *$", "", text)) + } + makeNumber <- function(...) as.numeric(paste(..., collapse="")) + opts <- options(scipen=100) + on.exit(options(opts)) + ones <- c("", "one", "two", "three", "four", "five", "six", "seven", + "eight", "nine") + names(ones) <- 0:9 + teens <- c("ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", + "sixteen", " seventeen", "eighteen", "nineteen") + names(teens) <- 0:9 + tens <- c("twenty", "thirty", "forty", "fifty", "sixty", "seventy", + "eighty", + "ninety") + names(tens) <- 2:9 + x <- round(x) + suffixes <- c("thousand", "million", "billion", "trillion") + if (length(x) > 1) return(sapply(x, helper)) + helper(x) +} Deleted: pkg/R/identifyExtremeEvents.R =================================================================== --- pkg/R/identifyExtremeEvents.R 2013-02-07 07:46:07 UTC (rev 31) +++ pkg/R/identifyExtremeEvents.R 2013-02-07 11:43:29 UTC (rev 32) @@ -1,737 +0,0 @@ - -# Total 16 functions -############################ -# Identifying extreme events -############################ -# libraries required -library(xts) -#---------------------------------------------------------------- -# INPUT: -# 'input' : Data series for which extreme events are -# to be identified. More than one series -# is permissble. The 'input' should be in time -# series format. -# 'prob.value': This is the tail value for which event is -# to be defined. For eg: prob.value=5 will -# consider 5% tail on both sides -#----------------------------------------------------------------- -# OUTPUT: -# Result will be in a list of 3 with following tables: -# 1. Summary statistics -# a. Summary of whole data-set -# 2. Lower tail: Extreme event tables -# a. Distribution of extreme events -# b. Run length distribution -# c. Quantile values -# d. Yearly distribution -# e. Extreme event data -# - Clustered, Un-clustered and Both -# 3. Upper tail: Extreme event tables -# a. Distribution of extreme events -# b. Run length distribution -# c. Quantile values -# d. Yearly distribution -# e. Extreme event data -# - Clustered, Un-clustered and Both -#------------------------------------------------------------------ -# NOTE: -identify.extreme.events <- function(input,prob.value){ - no.var <- NCOL(input) - - #------------------------------------------------ - # Breaking the function if any input is not given - #------------------------------------------------ - # For one variable - # If class of data is not time series - class.input <- class(input)%in%c("xts","zoo") - if(class.input==FALSE){ - stop("Input data is not in time series format. Valid 'input' should be of class xts and zoo") - } - - # Converting an xts object to zoo series - input.class <- length(which(class(input)%in%"xts")) - if(length(input.class)==1){ - input <- zoo(input) - } - - #----------------------------------------- - # Event series: Clustered and un-clustered - #----------------------------------------- - tmp <- get.clusters.formatted(event.series=input, - response.series=input, - probvalue=prob.value, - event.value="nonreturns", - response.value="nonreturns") - tail.events <- tmp[which(tmp$left.tail==1 | tmp$right.tail==1),] - clustered.tail.events <- tmp[which(tmp$cluster.pattern>1),] - unclustered.tail.events <- tmp[-which(tmp$cluster.pattern>1),] - # Left tail data - left.tail.clustered <- clustered.tail.events[which(clustered.tail.events$left.tail==1),c("event.series","cluster.pattern")] - left.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$left.tail==1),c("event.series","cluster.pattern")] - left.all <- tail.events[which(tail.events$left.tail==1),c("event.series","cluster.pattern")] - # Right tail data - right.tail.clustered <- clustered.tail.events[which(clustered.tail.events$right.tail==1),c("event.series","cluster.pattern")] - right.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$right.tail==1),c("event.series","cluster.pattern")] - right.all <- tail.events[which(tail.events$right.tail==1),c("event.series","cluster.pattern")] - - #--------------------- - # Extreme event output - #--------------------- - # Summary statistics - summ.st <- sumstat(input) - - # Distribtution of events - event.dist <- extreme.events.distribution(input,prob.value) - - # Run length distribution - runlength <- runlength.dist(input,prob.value) - - # Quantile extreme values - qnt.values <- quantile.extreme.values(input,prob.value) - - # Yearly distribution of extreme event dates - yearly.exevent <- yearly.exevent.dist(input,prob.value) - - #--------------------- - # Compiling the output - #--------------------- - output <- lower.tail <- upper.tail <- list() - # Compiling lower tail and upper tail separately - # Lower tail - lower.tail$data <- list(left.all,left.tail.clustered, - left.tail.unclustered) - names(lower.tail$data) <- c("All","Clustered","Un-clustered") - lower.tail$extreme.event.distribution <- event.dist$lower.tail - lower.tail$runlength <- runlength$lower.tail - lower.tail$quantile.values <- qnt.values$lower.tail [TRUNCATED] To get the complete diff run: svnlook diff /svnroot/eventstudies -r 32 From noreply at r-forge.r-project.org Thu Feb 7 12:45:30 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Thu, 7 Feb 2013 12:45:30 +0100 (CET) Subject: [Eventstudies-commits] r33 - in pkg/tests: . data Message-ID: <20130207114530.C3FC4184846@r-forge.r-project.org> Author: vikram Date: 2013-02-07 12:45:30 +0100 (Thu, 07 Feb 2013) New Revision: 33 Added: pkg/tests/data/ pkg/tests/data/inr.rda Log: Added data for test cases Added: pkg/tests/data/inr.rda =================================================================== (Binary files differ) Property changes on: pkg/tests/data/inr.rda ___________________________________________________________________ Added: svn:mime-type + application/octet-stream From noreply at r-forge.r-project.org Fri Feb 8 11:12:51 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Fri, 8 Feb 2013 11:12:51 +0100 (CET) Subject: [Eventstudies-commits] r34 - in pkg: . data man tests Message-ID: <20130208101251.6FD18184CC5@r-forge.r-project.org> Author: vikram Date: 2013-02-08 11:12:51 +0100 (Fri, 08 Feb 2013) New Revision: 34 Added: pkg/data/input.data.rda pkg/man/input.data.Rd Removed: pkg/data/IdentifyExevent.rda Modified: pkg/NAMESPACE pkg/man/identify.extreme.events.Rd pkg/tests/subbarao.R Log: Made changes in NAMESPACE; added input.data documentation; and some other documentation changes Modified: pkg/NAMESPACE =================================================================== --- pkg/NAMESPACE 2013-02-07 11:45:30 UTC (rev 33) +++ pkg/NAMESPACE 2013-02-08 10:12:51 UTC (rev 34) @@ -1,3 +1,3 @@ export(inference.Ecar, phys2eventtime, remap.cumsum, remap.cumprod, remap.event.reindex, identify.extreme.events) -#S3method(identify, extreme.events) +S3method(identify, extreme.events) Deleted: pkg/data/IdentifyExevent.rda =================================================================== (Binary files differ) Copied: pkg/data/input.data.rda (from rev 33, pkg/data/IdentifyExevent.rda) =================================================================== (Binary files differ) Modified: pkg/man/identify.extreme.events.Rd =================================================================== --- pkg/man/identify.extreme.events.Rd 2013-02-07 11:45:30 UTC (rev 33) +++ pkg/man/identify.extreme.events.Rd 2013-02-08 10:12:51 UTC (rev 34) @@ -33,17 +33,8 @@ data} } -\seealso{ - get.clusters.formatted - sumstat - extreme.events.distribution - runlength.dist - quantile.extreme.values - yearly.exevent.dist -} - \examples{ -data(IdentifyExevent) +data(input.data) input <- diff(log(input.data[,"sp500"])) output <- identify.extreme.events(input, prob.value=5) } Added: pkg/man/input.data.Rd =================================================================== --- pkg/man/input.data.Rd (rev 0) +++ pkg/man/input.data.Rd 2013-02-08 10:12:51 UTC (rev 34) @@ -0,0 +1,19 @@ +\name{input.data} + +\docType{data} + +\alias{Identify extreme events data} + +\title{Times series data} + +\description{ + The time series data is available for S&P 500, VIX and Net flows of + FII as a percentage of total market capitalisation for India. The + sample range for the daily data is from 2000-02-10 to 2011-07-29. +} + +\usage{input.data} + +\format{An object with class attributes \code{xts} and \code{zoo} containing three variables with 2036 observations.} + +\keyword{datasets} Modified: pkg/tests/subbarao.R =================================================================== --- pkg/tests/subbarao.R 2013-02-07 11:45:30 UTC (rev 33) +++ pkg/tests/subbarao.R 2013-02-08 10:12:51 UTC (rev 34) @@ -1,5 +1,4 @@ -#library(eventstudies) -#library(xts) +library(eventstudies) data(inr) inr_returns<-diff(log(inr))[-1] From noreply at r-forge.r-project.org Mon Feb 11 13:19:25 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 11 Feb 2013 13:19:25 +0100 (CET) Subject: [Eventstudies-commits] r35 - in pkg: . R inst/doc man Message-ID: <20130211121925.C21FB183EF6@r-forge.r-project.org> Author: vikram Date: 2013-02-11 13:19:25 +0100 (Mon, 11 Feb 2013) New Revision: 35 Added: pkg/R/identifyextremeevents.R pkg/man/identifyextremeevents.Rd Removed: pkg/R/identify.extreme.events.R pkg/man/identify.extreme.events.Rd Modified: pkg/NAMESPACE pkg/inst/doc/eventstudies.Rnw pkg/man/eventstudy-package.Rd Log: Written conceptual framework for identify extreme events and example for the same; still need to resolve couple of errors; Work in progress Modified: pkg/NAMESPACE =================================================================== --- pkg/NAMESPACE 2013-02-08 10:12:51 UTC (rev 34) +++ pkg/NAMESPACE 2013-02-11 12:19:25 UTC (rev 35) @@ -1,3 +1,3 @@ -export(inference.Ecar, phys2eventtime, remap.cumsum, remap.cumprod, remap.event.reindex, identify.extreme.events) -S3method(identify, extreme.events) +export(inference.Ecar, phys2eventtime, remap.cumsum, remap.cumprod, remap.event.reindex, identifyextremeevents) + Deleted: pkg/R/identify.extreme.events.R =================================================================== --- pkg/R/identify.extreme.events.R 2013-02-08 10:12:51 UTC (rev 34) +++ pkg/R/identify.extreme.events.R 2013-02-11 12:19:25 UTC (rev 35) @@ -1,737 +0,0 @@ - -# Total 16 functions -############################ -# Identifying extreme events -############################ -# libraries required -library(xts) -#---------------------------------------------------------------- -# INPUT: -# 'input' : Data series for which extreme events are -# to be identified. More than one series -# is permissble. The 'input' should be in time -# series format. -# 'prob.value': This is the tail value for which event is -# to be defined. For eg: prob.value=5 will -# consider 5% tail on both sides -#----------------------------------------------------------------- -# OUTPUT: -# Result will be in a list of 3 with following tables: -# 1. Summary statistics -# a. Summary of whole data-set -# 2. Lower tail: Extreme event tables -# a. Distribution of extreme events -# b. Run length distribution -# c. Quantile values -# d. Yearly distribution -# e. Extreme event data -# - Clustered, Un-clustered and Both -# 3. Upper tail: Extreme event tables -# a. Distribution of extreme events -# b. Run length distribution -# c. Quantile values -# d. Yearly distribution -# e. Extreme event data -# - Clustered, Un-clustered and Both -#------------------------------------------------------------------ -# NOTE: -identify.extreme.events <- function(input,prob.value){ - no.var <- NCOL(input) - - #------------------------------------------------ - # Breaking the function if any input is not given - #------------------------------------------------ - # For one variable - # If class of data is not time series - class.input <- class(input)%in%c("xts","zoo") - if(class.input==FALSE){ - stop("Input data is not in time series format. Valid 'input' should be of class xts and zoo") - } - - # Converting an xts object to zoo series - input.class <- length(which(class(input)%in%"xts")) - if(length(input.class)==1){ - input <- zoo(input) - } - - #----------------------------------------- - # Event series: Clustered and un-clustered - #----------------------------------------- - tmp <- get.clusters.formatted(event.series=input, - response.series=input, - probvalue=prob.value, - event.value="nonreturns", - response.value="nonreturns") - tail.events <- tmp[which(tmp$left.tail==1 | tmp$right.tail==1),] - clustered.tail.events <- tmp[which(tmp$cluster.pattern>1),] - unclustered.tail.events <- tmp[-which(tmp$cluster.pattern>1),] - # Left tail data - left.tail.clustered <- clustered.tail.events[which(clustered.tail.events$left.tail==1),c("event.series","cluster.pattern")] - left.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$left.tail==1),c("event.series","cluster.pattern")] - left.all <- tail.events[which(tail.events$left.tail==1),c("event.series","cluster.pattern")] - # Right tail data - right.tail.clustered <- clustered.tail.events[which(clustered.tail.events$right.tail==1),c("event.series","cluster.pattern")] - right.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$right.tail==1),c("event.series","cluster.pattern")] - right.all <- tail.events[which(tail.events$right.tail==1),c("event.series","cluster.pattern")] - - #--------------------- - # Extreme event output - #--------------------- - # Summary statistics - summ.st <- sumstat(input) - - # Distribtution of events - event.dist <- extreme.events.distribution(input,prob.value) - - # Run length distribution - runlength <- runlength.dist(input,prob.value) - - # Quantile extreme values - qnt.values <- quantile.extreme.values(input,prob.value) - - # Yearly distribution of extreme event dates - yearly.exevent <- yearly.exevent.dist(input,prob.value) - - #--------------------- - # Compiling the output - #--------------------- - output <- lower.tail <- upper.tail <- list() - # Compiling lower tail and upper tail separately - # Lower tail - lower.tail$data <- list(left.all,left.tail.clustered, - left.tail.unclustered) - names(lower.tail$data) <- c("All","Clustered","Un-clustered") - lower.tail$extreme.event.distribution <- event.dist$lower.tail - lower.tail$runlength <- runlength$lower.tail - lower.tail$quantile.values <- qnt.values$lower.tail - lower.tail$yearly.extreme.event <- yearly.exevent$lower.tail - # Upper tail - upper.tail$data <- list(right.all,right.tail.clustered, - right.tail.unclustered) - names(upper.tail$data) <- c("All","Clustered","Un-clustered") - upper.tail$extreme.event.distribution <- event.dist$upper.tail - upper.tail$runlength <- runlength$upper.tail - upper.tail$quantile.values <- qnt.values$upper.tail - upper.tail$yearly.extreme.event <- yearly.exevent$upper.tail - # Output - output$data.summary <- summ.st - output$lower.tail <- lower.tail - output$upper.tail <- upper.tail - return(output) -} - -######################################## -# Functions used for formatting clusters -######################################## -#------------------------ -# Categorzing tail events -# for ES analysis -#------------------------ -# Generates returns for the series -# Mark left tail, right tail events -gen.data <- function(d,probvalue,value="nonreturns"){ - res <- data.frame(dates=index(d),value=coredata(d)) - if(value=="returns"){ - res$returns <- c(NA,coredata(diff(log(d))*100)) - }else{ - res$returns <- d - } - pval <- c(probvalue/100,(1-(probvalue/100))) - pval <- quantile(res$returns,prob=pval,na.rm=TRUE) - res$left.tail <- as.numeric(res$returns < pval[1]) - res$right.tail <- as.numeric(res$returns > pval[2]) - res$both.tails <- res$left.tail + res$right.tail - if(value=="returns"){ - return(res[-1,]) - }else{ - return(res) - } -} - - -#------------------- -# Summarise patterns -summarise.rle <- function(oneseries){ - tp <- rle(oneseries) - tp1 <- data.frame(tp$lengths,tp$values) - tp1 <- subset(tp1,tp1[,2]==1) - summary(tp1[,1]) -} - -# Summarise the pattern of cluster -summarise.cluster <- function(obj){ - rle.both <- summarise.rle(obj$both.tail) - rle.left <- summarise.rle(obj$left.tail) - rle.right <- summarise.rle(obj$right.tail) - rbind(both=rle.both,left=rle.left,right=rle.right) -} - -# Getting location for the length -exact.pattern.location <- function(us,pt,pt.len){ - st <- rle(us) - len <- st$length - loc.cs <- cumsum(st$length) - loc <- loc.cs[which(st$values==pt & st$length==pt.len)]-pt.len+1 - return(loc) -} - -# Identify and mark mixed clusters -identify.mixedclusters <- function(m,j){ - m$remove.mixed <- 0 - rownum <- which(m$pattern==TRUE) - for(i in 1:length(rownum)){ - nextnum <- rownum[i]+j-1 - twonums <- m$returns[c(rownum[i]:nextnum)] > 0 - if(sum(twonums)==j || sum(twonums)==0){ - next - }else{ - m$remove.mixed[c(rownum[i]:nextnum)] <- 5 - } - } - m -} - -#-------------------- -# Formatting clusters -#-------------------- -# This function takes does the following transformation: -#---------------------------------------------------- -# What the function does? -# i. Get extreme events from event.series -# ii. Remove all the mixed clusters -# iii. Get different types cluster -# iv. Further club the clusters for event series and -# corresponding response series to get -# clustered returns -# v. Throw the output in timeseries format -#---------------------------------------------------- -# Input for the function -# event.series = Series in levels or returns on events -# is to be defined -# response.series = Series in levels or returns on which -# response is to be generated -# prob.value = Tail value for defining an event -# event.value = What value is to be studied -# returns or levels -# Similarly for response.value -#---------------------------------------------------- -# Output = Formatted clusters in time series format -#---------------------------------------------------- -get.clusters.formatted <- function(event.series, - response.series, - probvalue=5, - event.value="returns", - response.value="returns"){ - # Getting levels in event format - tmp <- gen.data(event.series, - probvalue=probvalue, - value=event.value) - res.ser <- gen.data(response.series, - probvalue=probvalue, - value=response.value) - # Storing old data points - tmp.old <- tmp - - # Get pattern with maximum length - res <- summarise.cluster(tmp) - max.len <- max(res[,"Max."]) - - #------------------------ - # Removing mixed clusters - #------------------------ - for(i in max.len:2){ - which.pattern <- rep(1,i) - patrn <- exact.pattern.location(tmp$both.tails,1,i) - # If pattern does not exist move to next pattern - if(length(patrn)==0){next} - tmp$pattern <- FALSE - tmp$pattern[patrn] <- TRUE - tmp <- identify.mixedclusters(m=tmp,i) - me <- length(which(tmp$remove.mixed==5)) - - if(me!=0){ - tmp <- tmp[-which(tmp$remove.mixed==5),] - cat("Pattern of:",i,";", - "Disarded event:",me/i,"\n") - } - } - tmp.nc <- tmp - - # Merging event and response series - tmp.es <- xts(tmp[,-1],as.Date(tmp$dates)) - tmp.rs <- xts(res.ser[,-1],as.Date(res.ser$dates)) - tmp.m <- merge(tmp.es,res.ser=tmp.rs[,c("value","returns")], - all=F) - - # Formatting - if(event.value=="returns"){ - which.value <- event.value - }else{ - which.value <- "value" - } - # Converting to data.frame - temp <- as.data.frame(tmp.m) - temp$dates <- rownames(temp) - n <- temp - # Get pattern with maximum length - res <- summarise.cluster(temp) - max.len <- max(res[,"Max."]) - cat("Maximum length after removing mixed clusters is", - max.len,"\n") - # Marking clusters - n$cluster.pattern <- n$both.tails - for(pt.len in max.len:1){ - mark <- exact.pattern.location(n$both.tails,1,pt.len) - if(length(mark)==0){next} - n$cluster.pattern[mark] <- pt.len - } - - #------------------- - # Clustering returns - #------------------- - print("Clustering events.") - for(pt.len in max.len:2){ - rownum <- exact.pattern.location(n$both.tails,1,pt.len) - # If pattern does not exist - if(length(rownum)==0){ - cat("Pattern",pt.len,"does not exist.","\n");next - } - # Clustering - while(length(rownum)>0){ - prevnum <- rownum[1]-1 - lastnum <- rownum[1]+pt.len-1 - # Clustering event series - if(event.value=="returns"){ - newreturns <- (n$value[lastnum]-n$value[prevnum])*100/n$value[prevnum] - n[rownum[1],c("value","returns")] <- c(n$value[lastnum],newreturns) - }else{ - newreturns <- sum(n$value[rownum[1]:lastnum],na.rm=T) - n[rownum[1],c("value","returns")] <- c(n$value[lastnum],newreturns) - } - # Clustering response series - if(response.value=="returns"){ - newreturns.rs <- (n$value.1[lastnum]-n$value.1[prevnum])*100/n$value.1[prevnum] - n[rownum[1],c("value.1","returns.1")] <- c(n$value.1[lastnum],newreturns.rs) - }else{ - newreturns <- sum(n$value.1[rownum[1]:lastnum],na.rm=T) - n[rownum[1],c("value.1","returns.1")] <- c(n$value.1[lastnum],newreturns) - } - n <- n[-c((rownum[1]+1):lastnum),] - rownum <- exact.pattern.location(n$both.tails,1,pt.len) - } - } - # Columns to keep - cn <- c(which.value,"left.tail","right.tail", - "returns.1","cluster.pattern") - tmp.ts <- zoo(n[,cn],order.by=as.Date(n$dates)) - colnames(tmp.ts) <- c("event.series","left.tail","right.tail", - "response.series","cluster.pattern") - - # Results - return(tmp.ts) -} - -############################## -# Summary statistics functions -############################## -#--------------------------------------------- -# Table 1: Summary statistics -# INPUT: Time series data-set for which -# summary statistics is to be estimated -# OUTPUT: A data frame with: -# - Values: "Minimum", 5%,"25%","Median", -# "Mean","75%","95%","Maximum", -# "Standard deviation","IQR", -# "Observations" -#---------------------------------------------- -sumstat <- function(input){ - no.var <- NCOL(input) - if(no.var==1){input <- xts(input)} - # Creating empty frame: chassis - tmp <- data.frame(matrix(NA,nrow=11,ncol=NCOL(input))) - colnames(tmp) <- colnames(input) - rownames(tmp) <- c("Min","5%","25%","Median","Mean","75%","95%", - "Max","sd","IQR","Obs.") - # Estimating summary statistics - tmp[1,] <- apply(input,2,function(x){min(x,na.rm=TRUE)}) - tmp[2,] <- apply(input,2,function(x){quantile(x,0.05,na.rm=TRUE)}) - tmp[3,] <- apply(input,2,function(x){quantile(x,0.25,na.rm=TRUE)}) - tmp[4,] <- apply(input,2,function(x){median(x,na.rm=TRUE)}) - tmp[5,] <- apply(input,2,function(x){mean(x,na.rm=TRUE)}) - tmp[6,] <- apply(input,2,function(x){quantile(x,0.75,na.rm=TRUE)}) - tmp[7,] <- apply(input,2,function(x){quantile(x,0.95,na.rm=TRUE)}) - tmp[8,] <- apply(input,2,function(x){max(x,na.rm=TRUE)}) - tmp[9,] <- apply(input,2,function(x){sd(x,na.rm=TRUE)}) - tmp[10,] <- apply(input,2,function(x){IQR(x,na.rm=TRUE)}) - tmp[11,] <- apply(input,2,function(x){NROW(x)}) - tmp <- round(tmp,2) - - return(tmp) -} - -###################### -# Yearly summary stats -###################### -#---------------------------- -# INPUT: -# 'input': Data series for which event cluster distribution -# is to be calculated; -# 'prob.value': Probility value for which tail is to be constructed this -# value is equivalent to one side tail for eg. if prob.value=5 -# then we have values of 5% tail on both sides -# Functions used: yearly.exevent.summary() -# OUTPUT: -# Yearly distribution of extreme events -#---------------------------- -yearly.exevent.dist <- function(input, prob.value){ - no.var <- NCOL(input) - mylist <- list() - # Estimating cluster count - #-------------------- - # Formatting clusters - #-------------------- - tmp <- get.clusters.formatted(event.series=input, - response.series=input, - probvalue=prob.value, - event.value="nonreturns", - response.value="nonreturns") - - tmp.res <- yearly.exevent.summary(tmp) - tmp.res[is.na(tmp.res)] <- 0 - # Left and right tail - lower.tail.yearly.exevent <- tmp.res[,1:2] - upper.tail.yearly.exevent <- tmp.res[,3:4] - output <- list() - output$lower.tail <- lower.tail.yearly.exevent - output$upper.tail <- upper.tail.yearly.exevent - mylist <- output - - return(mylist) -} - -#------------------------------------------------ -# Get yearly no. and median for good and bad days -#------------------------------------------------ -yearly.exevent.summary <- function(tmp){ - tmp.bad <- tmp[which(tmp[,"left.tail"]==1),] - tmp.good <- tmp[which(tmp[,"right.tail"]==1),] - # Bad days - tmp.bad.y <- apply.yearly(xts(tmp.bad),function(x)nrow(x)) - tmp.bad.y <- merge(tmp.bad.y,apply.yearly(xts(tmp.bad[,1]),function(x)median(x,na.rm=T))) - index(tmp.bad.y) <- as.yearmon(as.Date(substr(index(tmp.bad.y),1,4),"%Y")) - # Good days - tmp.good.y <- apply.yearly(xts(tmp.good),function(x)nrow(x)) - tmp.good.y <- merge(tmp.good.y,apply.yearly(xts(tmp.good[,1]),function(x)median(x,na.rm=T))) - index(tmp.good.y) <- as.yearmon(as.Date(substr(index(tmp.good.y),1,4),"%Y")) - tmp.res <- merge(tmp.bad.y,tmp.good.y) - colnames(tmp.res) <- c("number.baddays","median.baddays", - "number.gooddays","median.goodays") - output <- as.data.frame(tmp.res) - cn <- rownames(output) - rownames(output) <- sapply(rownames(output), - function(x)substr(x,nchar(x)-3,nchar(x))) - return(output) -} - -############################# -# Getting event segregation -# - clustered and unclustered -############################# -#---------------------------- -# INPUT: -# 'input': Data series for which event cluster distribution -# is to be calculated; -# Note: The input series expects the input to be in levels not in returns, -# if the some the inputs are already in return formats one has to -# use the other variable 'already.return.series' -# 'already.return.series': column name is to be given which already has -# return series in the data-set -# 'prob.value': Probility value for which tail is to be constructed this -# value is equivalent to one side tail for eg. if prob.value=5 -# then we have values of 5% tail on both sides -# Functions used: get.event.count() -# OUTPUT: -# Distribution of extreme events -#---------------------------- - -extreme.events.distribution <- function(input,prob.value){ - # Creating an empty frame - no.var <- NCOL(input) - lower.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6)) - upper.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6)) - colnames(lower.tail.dist) <- c("Unclustered","Used clusters", - "Removed clusters","Total clusters", - "Total","Total used clusters") - rownames(lower.tail.dist) <- colnames(input) - colnames(upper.tail.dist) <- c("Unclustered","Used clusters", - "Removed clusters","Total clusters", - "Total","Total used clusters") - rownames(upper.tail.dist) <- colnames(input) - # Estimating cluster count - #-------------- - # Cluster count - #-------------- - # Non-returns (if it is already in return format) - tmp <- get.event.count(input,probvalue=prob.value, - value="nonreturns") - lower.tail.dist <- tmp[1,] - upper.tail.dist <- tmp[2,] - - #----------------------------- - # Naming the tail distribution - #----------------------------- - mylist <- list(lower.tail.dist,upper.tail.dist) - names(mylist) <- c("lower.tail", "upper.tail") - return(mylist) -} - -# Functions used in event count calculation -get.event.count <- function(series, - probvalue=5, - value="returns"){ - # Extracting dataset - tmp.old <- gen.data(series,probvalue,value) - tmp <- get.clusters.formatted(event.series=series, - response.series=series, - probvalue, - event.value=value, - response.value=value) - - cp <- tmp[,"cluster.pattern"] - lvl <- as.numeric(levels(as.factor(cp))) - lvl.use <- lvl[which(lvl>1)] - # Calculating Total events - tot.ev.l <- length(which(tmp.old[,"left.tail"]==1)) - tot.ev.r <- length(which(tmp.old[,"right.tail"]==1)) - # Calculating Unclustered events - un.clstr.l <- length(which(tmp[,"left.tail"]==1 & - tmp[,"cluster.pattern"]==1)) - un.clstr.r <- length(which(tmp[,"right.tail"]==1 & - tmp[,"cluster.pattern"]==1)) - # Calculating Used clusters - us.cl.l <- us.cl.r <- NULL - for(i in 1:length(lvl.use)){ - tmp1 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] & - tmp[,"left.tail"]==1))*lvl.use[i] - tmp2 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] & - tmp[,"right.tail"]==1))*lvl.use[i] - us.cl.l <- sum(us.cl.l,tmp1,na.rm=TRUE) - us.cl.r <- sum(us.cl.r,tmp2,na.rm=TRUE) - } - - # Making a table - tb <- data.frame(matrix(NA,2,6)) - colnames(tb) <- c("unclstr","used.clstr","removed.clstr","tot.clstr","Tot","Tot.used") - rownames(tb) <- c("lower","upper") - tb[,"Tot"] <- c(tot.ev.l,tot.ev.r) - tb[,"unclstr"] <- c(un.clstr.l,un.clstr.r) - tb[,"used.clstr"] <- c(us.cl.l,us.cl.r) - tb[,"Tot.used"] <- tb$unclstr+tb$used.clstr - tb[,"tot.clstr"] <- tb$Tot-tb$unclstr - tb[,"removed.clstr"] <- tb$tot.clstr-tb$used.clstr - - return(tb) -} - -#################################### -# Quantile values for extreme events -#################################### -#----------------------------------- -# INPUT: -# 'input': Data series in time series format -# Note: The input series expects the input to be in levels not in returns, -# if the some the inputs are already in return formats one has to -# use the other variable 'already.return.series' -# 'already.return.series': column name is to be given which already has -# return series in the data-set -# Functions used: get.clusters.formatted() -# OUTPUT: -# Lower tail and Upper tail quantile values -#----------------------------------- -quantile.extreme.values <- function(input, prob.value){ - # Creating an empty frame - no.var <- NCOL(input) - lower.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6)) - upper.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6)) - colnames(lower.tail.qnt.value) <- c("Min","25%","Median","75%","Max", - "Mean") - rownames(lower.tail.qnt.value) <- colnames(input) - colnames(upper.tail.qnt.value) <- c("Min","25%","Median","75%","Max", - "Mean") - rownames(upper.tail.qnt.value) <- colnames(input) - # Estimating cluster count - #-------------------- - # Formatting clusters - #-------------------- - tmp <- get.clusters.formatted(event.series=input, - response.series=input, - probvalue=prob.value, - event.value="nonreturns", - response.value="nonreturns") - - # Left tail - tmp.left.tail <- tmp[which(tmp$left.tail==1), - "event.series"] - df.left <- t(data.frame(quantile(tmp.left.tail,c(0,0.25,0.5,0.75,1)))) - tmp.left <- round(cbind(df.left,mean(tmp.left.tail)),2) - rownames(tmp.left) <- NULL - colnames(tmp.left) <- c("0%","25%","Median","75%","100%","Mean") - # Right tail - tmp.right.tail <- tmp[which(tmp$right.tail==1), - "event.series"] - df.right <- t(data.frame(quantile(tmp.right.tail,c(0,0.25,0.5,0.75,1)))) - tmp.right <- round(cbind(df.right, - mean(tmp.right.tail)),2) - rownames(tmp.right) <- NULL - colnames(tmp.right) <- c("0%","25%","Median","75%","100%","Mean") - - lower.tail.qnt.value <- tmp.left - upper.tail.qnt.value <- tmp.right - - mylist <- list(lower.tail.qnt.value,upper.tail.qnt.value) - names(mylist) <- c("lower.tail", "upper.tail") - return(mylist) -} - -########################## -# Run length distribution -########################## -#----------------------------------- -# INPUT: -# 'input': Data series in time series format -# Note: The input series expects the input to be in levels not in returns, -# if the some the inputs are already in return formats one has to -# use the other variable 'already.return.series' -# 'already.return.series': column name is to be given which already has -# return series in the data-set -# Functions used: get.clusters.formatted() -# get.cluster.distribution() -# numbers2words() -# OUTPUT: -# Lower tail and Upper tail Run length distribution -#----------------------------------- -runlength.dist <- function(input, prob.value){ - - # Creating an empty frame - no.var <- NCOL(input) - - # Finding maximum Run length - # Seed value - max.runlength <- 0 - #--------------------------- - # Estimating max. Run length - #--------------------------- - tmp <- get.clusters.formatted(event.series=input, - response.series=input, - probvalue=prob.value, - event.value="nonreturns", - response.value="nonreturns") - - tmp.runlength <- get.cluster.distribution(tmp,"event.series") - max.runlength <- max(max.runlength,as.numeric(colnames(tmp.runlength)[NCOL(tmp.runlength)])) - - # Generating empty frame - col.names <- seq(2:max.runlength)+1 - lower.tail.runlength <- data.frame(matrix(NA,nrow=no.var, - ncol=length(col.names))) - upper.tail.runlength <- data.frame(matrix(NA,nrow=no.var, - ncol=length(col.names))) - colnames(lower.tail.runlength) <- col.names - rownames(lower.tail.runlength) <- colnames(input) - colnames(upper.tail.runlength) <- col.names - rownames(upper.tail.runlength) <- colnames(input) - - #---------------------- - # Run length estimation - #---------------------- - tmp.res <- get.cluster.distribution(tmp,"event.series") - for(j in 1:length(colnames(tmp.res))){ - col.number <- colnames(tmp.res)[j] - lower.tail.runlength[1,col.number] <- tmp.res[1,col.number] - upper.tail.runlength[1,col.number] <- tmp.res[2,col.number] - } - - # Replacing NA's with zeroes - lower.tail.runlength[is.na(lower.tail.runlength)] <- 0 - upper.tail.runlength[is.na(upper.tail.runlength)] <- 0 - - # creating column names - word.cn <- NULL - for(i in 1:length(col.names)){ - word.cn[i] <- numbers2words(col.names[i]) - } - colnames(lower.tail.runlength) <- word.cn - colnames(upper.tail.runlength) <- word.cn - mylist <- list(lower.tail.runlength,upper.tail.runlength) - names(mylist) <- c("lower.tail", "upper.tail") - return(mylist) -} - -#------------------------- -# Get cluster distribution -#------------------------- -# Input for this function is the output of get.cluster.formatted -get.cluster.distribution <- function(tmp,variable){ - # Extract cluster category - cp <- tmp[,"cluster.pattern"] - lvl <- as.numeric(levels(as.factor(cp))) - lvl.use <- lvl[which(lvl>1)] - # Get numbers for each category - tb <- data.frame(matrix(NA,2,length(lvl.use))) - colnames(tb) <- as.character(lvl.use) - rownames(tb) <- c(paste(variable,":lower tail"), - paste(variable,":upper tail")) - for(i in 1:length(lvl.use)){ - tb[1,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] - & tmp[,"left.tail"]==1)) - tb[2,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] - & tmp[,"right.tail"]==1)) - - } - return(tb) -} - -#---------------------------- -# Converting numbers to words -#---------------------------- -numbers2words <- function(x){ - helper <- function(x){ - digits <- rev(strsplit(as.character(x), "")[[1]]) - nDigits <- length(digits) - if (nDigits == 1) as.vector(ones[digits]) - else if (nDigits == 2) - if (x <= 19) as.vector(teens[digits[1]]) - else trim(paste(tens[digits[2]], - Recall(as.numeric(digits[1])))) - else if (nDigits == 3) trim(paste(ones[digits[3]], "hundred", - Recall(makeNumber(digits[2:1])))) - else { - nSuffix <- ((nDigits + 2) %/% 3) - 1 - if (nSuffix > length(suffixes)) stop(paste(x, "is too large!")) - trim(paste(Recall(makeNumber(digits[ - nDigits:(3*nSuffix + 1)])), - suffixes[nSuffix], - Recall(makeNumber(digits[(3*nSuffix):1])))) - } - } - trim <- function(text){ - gsub("^\ ", "", gsub("\ *$", "", text)) - } - makeNumber <- function(...) as.numeric(paste(..., collapse="")) - opts <- options(scipen=100) - on.exit(options(opts)) - ones <- c("", "one", "two", "three", "four", "five", "six", "seven", - "eight", "nine") - names(ones) <- 0:9 - teens <- c("ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", - "sixteen", " seventeen", "eighteen", "nineteen") - names(teens) <- 0:9 - tens <- c("twenty", "thirty", "forty", "fifty", "sixty", "seventy", - "eighty", - "ninety") - names(tens) <- 2:9 - x <- round(x) - suffixes <- c("thousand", "million", "billion", "trillion") - if (length(x) > 1) return(sapply(x, helper)) - helper(x) -} Copied: pkg/R/identifyextremeevents.R (from rev 33, pkg/R/identify.extreme.events.R) =================================================================== --- pkg/R/identifyextremeevents.R (rev 0) +++ pkg/R/identifyextremeevents.R 2013-02-11 12:19:25 UTC (rev 35) @@ -0,0 +1,737 @@ + +# Total 16 functions +############################ +# Identifying extreme events +############################ +# libraries required +library(xts) +#---------------------------------------------------------------- +# INPUT: +# 'input' : Data series for which extreme events are +# to be identified. More than one series +# is permissble. The 'input' should be in time +# series format. +# 'prob.value': This is the tail value for which event is +# to be defined. For eg: prob.value=5 will +# consider 5% tail on both sides +#----------------------------------------------------------------- +# OUTPUT: +# Result will be in a list of 3 with following tables: +# 1. Summary statistics +# a. Summary of whole data-set +# 2. Lower tail: Extreme event tables +# a. Distribution of extreme events +# b. Run length distribution +# c. Quantile values +# d. Yearly distribution +# e. Extreme event data +# - Clustered, Un-clustered and Both +# 3. Upper tail: Extreme event tables +# a. Distribution of extreme events +# b. Run length distribution +# c. Quantile values +# d. Yearly distribution +# e. Extreme event data +# - Clustered, Un-clustered and Both +#------------------------------------------------------------------ +# NOTE: +identifyextremeevents <- function(input,prob.value){ + no.var <- NCOL(input) + + #------------------------------------------------ + # Breaking the function if any input is not given + #------------------------------------------------ + # For one variable + # If class of data is not time series + class.input <- class(input)%in%c("xts","zoo") + if(class.input==FALSE){ + stop("Input data is not in time series format. Valid 'input' should be of class xts and zoo") + } + + # Converting an xts object to zoo series + input.class <- length(which(class(input)%in%"xts")) + if(length(input.class)==1){ + input <- zoo(input) + } + + #----------------------------------------- + # Event series: Clustered and un-clustered + #----------------------------------------- + tmp <- get.clusters.formatted(event.series=input, + response.series=input, + probvalue=prob.value, + event.value="nonreturns", + response.value="nonreturns") + tail.events <- tmp[which(tmp$left.tail==1 | tmp$right.tail==1),] + clustered.tail.events <- tmp[which(tmp$cluster.pattern>1),] + unclustered.tail.events <- tmp[-which(tmp$cluster.pattern>1),] + # Left tail data + left.tail.clustered <- clustered.tail.events[which(clustered.tail.events$left.tail==1),c("event.series","cluster.pattern")] + left.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$left.tail==1),c("event.series","cluster.pattern")] + left.all <- tail.events[which(tail.events$left.tail==1),c("event.series","cluster.pattern")] + # Right tail data + right.tail.clustered <- clustered.tail.events[which(clustered.tail.events$right.tail==1),c("event.series","cluster.pattern")] + right.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$right.tail==1),c("event.series","cluster.pattern")] + right.all <- tail.events[which(tail.events$right.tail==1),c("event.series","cluster.pattern")] + + #--------------------- + # Extreme event output + #--------------------- + # Summary statistics + summ.st <- sumstat(input) + + # Distribtution of events + event.dist <- extreme.events.distribution(input,prob.value) + + # Run length distribution + runlength <- runlength.dist(input,prob.value) + + # Quantile extreme values + qnt.values <- quantile.extreme.values(input,prob.value) + + # Yearly distribution of extreme event dates + yearly.exevent <- yearly.exevent.dist(input,prob.value) + + #--------------------- + # Compiling the output + #--------------------- + output <- lower.tail <- upper.tail <- list() + # Compiling lower tail and upper tail separately + # Lower tail + lower.tail$data <- list(left.all,left.tail.clustered, + left.tail.unclustered) + names(lower.tail$data) <- c("All","Clustered","Un-clustered") + lower.tail$extreme.event.distribution <- event.dist$lower.tail [TRUNCATED] To get the complete diff run: svnlook diff /svnroot/eventstudies -r 35 From noreply at r-forge.r-project.org Mon Feb 11 14:00:26 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 11 Feb 2013 14:00:26 +0100 (CET) Subject: [Eventstudies-commits] r36 - pkg Message-ID: <20130211130026.EA748184C73@r-forge.r-project.org> Author: vikram Date: 2013-02-11 14:00:26 +0100 (Mon, 11 Feb 2013) New Revision: 36 Modified: pkg/DESCRIPTION Log: Small modification in DESCRIPTION file Modified: pkg/DESCRIPTION =================================================================== --- pkg/DESCRIPTION 2013-02-11 12:19:25 UTC (rev 35) +++ pkg/DESCRIPTION 2013-02-11 13:00:26 UTC (rev 36) @@ -1,12 +1,12 @@ Package: eventstudies Type: Package -Title: Event study analysis -Version: 0.03 +Title: Event study and extreme event analysis +Version: 0.04 Date: 2011-06-20 -Author: Ajay Shah, Vimal Balasubramaniam +Author: Ajay Shah, Vimal Balasubramaniam, Vikram Bahure Maintainer: Vimal Balasubramaniam Depends: R (>= 2.12.0), zoo, xts, boot Description: Implementation of short and long term event study methodology License: GPL-2 LazyLoad: yes -Packaged: 2011-07-13 10:33:06 UTC; t136 +Packaged: 2013-02-14 10:33:06 UTC; t136 From noreply at r-forge.r-project.org Mon Feb 11 18:04:48 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 11 Feb 2013 18:04:48 +0100 (CET) Subject: [Eventstudies-commits] r37 - in pkg: data man Message-ID: <20130211170448.6E7F9183F3C@r-forge.r-project.org> Author: vikram Date: 2013-02-11 18:04:48 +0100 (Mon, 11 Feb 2013) New Revision: 37 Added: pkg/data/inputdata.rda pkg/man/inputdata.Rd Removed: pkg/data/input.data.rda pkg/man/input.data.Rd Modified: pkg/man/identifyextremeevents.Rd Log: Changing the name of the dataset as it cannot have dot in the name and renaming it Deleted: pkg/data/input.data.rda =================================================================== (Binary files differ) Copied: pkg/data/inputdata.rda (from rev 36, pkg/data/input.data.rda) =================================================================== (Binary files differ) Modified: pkg/man/identifyextremeevents.Rd =================================================================== --- pkg/man/identifyextremeevents.Rd 2013-02-11 13:00:26 UTC (rev 36) +++ pkg/man/identifyextremeevents.Rd 2013-02-11 17:04:48 UTC (rev 37) @@ -34,7 +34,7 @@ } \examples{ -data(input.data) +data(inputdata) input <- diff(log(input.data[,"sp500"])) output <- identifyextremeevents(input, prob.value=5) } Deleted: pkg/man/input.data.Rd =================================================================== --- pkg/man/input.data.Rd 2013-02-11 13:00:26 UTC (rev 36) +++ pkg/man/input.data.Rd 2013-02-11 17:04:48 UTC (rev 37) @@ -1,19 +0,0 @@ -\name{input.data} - -\docType{data} - -\alias{Identify extreme events data} - -\title{Times series data} - -\description{ - The time series data is available for S&P 500, VIX and Net flows of - FII as a percentage of total market capitalisation for India. The - sample range for the daily data is from 2000-02-10 to 2011-07-29. -} - -\usage{input.data} - -\format{An object with class attributes \code{xts} and \code{zoo} containing three variables with 2036 observations.} - -\keyword{datasets} Copied: pkg/man/inputdata.Rd (from rev 36, pkg/man/input.data.Rd) =================================================================== --- pkg/man/inputdata.Rd (rev 0) +++ pkg/man/inputdata.Rd 2013-02-11 17:04:48 UTC (rev 37) @@ -0,0 +1,19 @@ +\name{inputdata} + +\docType{data} + +\alias{Identify extreme events data} + +\title{Times series data} + +\description{ + The time series data is available for S&P 500, VIX and Net flows of + FII as a percentage of total market capitalisation for India. The + sample range for the daily data is from 2000-02-10 to 2011-07-29. +} + +\usage{inputdata} + +\format{An object with class attributes \code{xts} and \code{zoo} containing three variables with 2036 observations.} + +\keyword{datasets} From noreply at r-forge.r-project.org Mon Feb 11 18:07:18 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 11 Feb 2013 18:07:18 +0100 (CET) Subject: [Eventstudies-commits] r38 - in pkg: data man Message-ID: <20130211170718.ECDD2183F3C@r-forge.r-project.org> Author: vikram Date: 2013-02-11 18:07:18 +0100 (Mon, 11 Feb 2013) New Revision: 38 Added: pkg/data/inputdata.rda Removed: pkg/data/inputdata.rda Modified: pkg/man/identifyextremeevents.Rd Log: replaced input data Deleted: pkg/data/inputdata.rda =================================================================== (Binary files differ) Added: pkg/data/inputdata.rda =================================================================== (Binary files differ) Property changes on: pkg/data/inputdata.rda ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: pkg/man/identifyextremeevents.Rd =================================================================== --- pkg/man/identifyextremeevents.Rd 2013-02-11 17:04:48 UTC (rev 37) +++ pkg/man/identifyextremeevents.Rd 2013-02-11 17:07:18 UTC (rev 38) @@ -35,6 +35,6 @@ \examples{ data(inputdata) -input <- diff(log(input.data[,"sp500"])) +input <- diff(log(inputdata[,"sp500"])) output <- identifyextremeevents(input, prob.value=5) } From noreply at r-forge.r-project.org Mon Feb 11 18:15:42 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 11 Feb 2013 18:15:42 +0100 (CET) Subject: [Eventstudies-commits] r39 - in pkg: . inst/doc vignettes Message-ID: <20130211171542.1411C183F3C@r-forge.r-project.org> Author: chiraganand Date: 2013-02-11 18:15:41 +0100 (Mon, 11 Feb 2013) New Revision: 39 Added: pkg/vignettes/ pkg/vignettes/eventstudies.Rnw Removed: pkg/inst/doc/eventstudies.Rnw Log: Moved package vignette to vignettes directory as inst/doc is now deprecated. Deleted: pkg/inst/doc/eventstudies.Rnw =================================================================== --- pkg/inst/doc/eventstudies.Rnw 2013-02-11 17:07:18 UTC (rev 38) +++ pkg/inst/doc/eventstudies.Rnw 2013-02-11 17:15:41 UTC (rev 39) @@ -1,198 +0,0 @@ -\documentclass[a4paper,11pt]{article} -\usepackage{graphicx} -\usepackage{a4wide} -\usepackage[colorlinks,linkcolor=blue,citecolor=red]{hyperref} -\usepackage{natbib} -\usepackage{float} -\title{Introduction to the \textbf{eventstudies} package in R} -\author{} -\begin{document} -%\VignetteIndexEntry{eventstudies: A package with functionality to do Event Studies} -%\VignetteDepends{} -%\VignetteKeywords{event studies} -%\VignettePackage{eventstudies} - -\maketitle -\newpage -\SweaveOpts{engine=R,pdf=TRUE} -\section{Introduction} -This is an introduction to eventstudies, a package in R which has functionality to convert a given dataset into -an event-time frame and to undertake further parametric/non-parametric analysis using various inference procedures. - -This paper describes how this package is used and provides several examples illustrating the use of -the functionality within this package. - -\section{phys2eventtime} -phys2eventtime is a function which takes a zoo object containing input data, a data frame containing -the date of occurance of the events and creates a data frame which is indexed according to the event -time. - -The following illustrates the use of phys2eventtime. -<<>>= -options(useFancyQuotes=FALSE) -library(eventstudies) -input.zoo.object <- structure(c(33.16, 34.0967, 35.3683, 34.46, 34.17, 35.89, 36.19, - 37.1317, 36.7033, 37.7933, 37.8533, 285.325, 292.6, - 290.025, 286.2, 290.075, 295.05, 289.325, 285.625, - 293.7, 298.5, 289.05, 704.5438, 708.35, 735.8375, - 710.625, 711.65, 731.0125, 727.575, 715.0187, 724.2, - 713.1875, 695.1812), .Dim = c(11L, 3L), .Dimnames = - list( NULL, c("ITC", "Reliance", "Infosys")), index = - structure(c(12418, 12419, 12422, 12423, 12424, 12425, - 12426, 12429, 12430, 12431, 12432), class = "Date"), - class = "zoo") -input.zoo.object -eventslist <- data.frame(unit=c("ITC","Reliance","Infosys", - "ITC","Reliance","Junk"), - when=as.Date(c( - "2004-01-02", "2004-01-08", "2004-01-14", - "2005-01-15", "2004-01-01", "2005-01-01"))) - -eventslist$unit <- as.character(eventslist$unit) -eventslist -@ -In this example we note the following about input.zoo.object and the data frame eventslist:- -\begin{enumerate} -\item Prior to event date(in eventslist\$when) there is only 1 reading in the corresponding unit in input.zoo.object. -\item The event date is within the range of available dates for the corresponding unit(this is the ideal case). -\item After the event date there's only 1 reading. -\item The date is not within the range. -\item There is no data prior to this date. -\item Unit does not exist in input.zoo.object. -\end{enumerate} - -This is exactly what the second component of phys2eventtime namely outcomes,reports. The first component of the result of phys2eventtime -is a zoo object which is the event time data frame. -<<>>== -a <- phys2eventtime(input.zoo.object, eventslist,width=0) -str(a) -a$z.e -a$outcomes - -@ -phys2eventtime has a third parameter namely width which allows for checking that no more than 4 consecutive missing observations -are there within the given width from the event time. - -What we expect if we don't use width handling:- -<<>>== -rawres <- structure(list(z.e = structure(c(NA, NA, NA, NA, NA, NA, - NA, NA, 33.16, 34.0967, 35.3683, 34.46, 34.17, 35.89, 36.19, - 37.1317, 36.7033, 37.7933, 37.8533, NA, NA, NA, NA, 285.325, 292.6, - 290.025, 286.2, 290.075, 295.05, 289.325, 285.625, 293.7, 298.5, - 289.05, NA, NA, NA, NA, 704.5438, 708.35, 735.8375, 710.625, 711.65, - 731.0125, 727.575, 715.0187, 724.2, 713.1875, 695.1812, NA, NA, NA, - NA, NA, NA, NA, NA), .Dim = c(19L, 3L), .Dimnames = list( NULL, - c("1", "2", "3")), index = -9:9, class = "zoo"), outcomes = - structure(c(1L, 1L, 1L, 3L, 3L, 2L), .Label = c("success", - "unitmissing", "wrongspan" ), class = "factor")), .Names = c("z.e", - "outcomes")) -rawres -@ -Check without the width handling -- -<<>>== -a <- phys2eventtime(input.zoo.object, eventslist,width=0) -a -all.equal(a, rawres) -@ -Check with width of 1 -- -<<>>== -a <- phys2eventtime(input.zoo.object, eventslist,width=1) -a -all.equal(a, rawres) -@ -But when we go to width=2, column 1 and 3 drop off because they have -only 1 obs before and after the event date respectively. - -<<>>== -a <- phys2eventtime(input.zoo.object, eventslist,width=2) -a -all.equal(a, structure(list(z.e = structure(c(NA, NA, NA, NA, 285.325, - 292.6, 290.025, 286.2, 290.075, 295.05, - 289.325, 285.625, 293.7, 298.5, 289.05, - NA, NA, NA, NA), index = -9:9, class = - "zoo"), outcomes = structure(c(3L, 1L, - 3L, 4L, 4L, 2L), .Label = c("success", - "unitmissing", "wdatamissing", - "wrongspan"), class = "factor")), .Names - = c("z.e", "outcomes" ))) -@ -\section{inference.Ecar} -Once we have an event time frame returned by phys2eventtime we may use inference.Ecar to do -bootstrap inference for the main graph of the event study. This is illustrated in the following example. -<<>>== -library(xts) -load(paste(system.file(package="eventstudies"),"data","inr.rda",sep="/")) -inr.returns<-diff(log(inr))[-1] -eventslist<-data.frame(unit=rep("inr",10), - when=as.Date(c( - "2010-04-20","2010-07-02","2010-07-27", - "2010-09-16","2010-11-02","2011-01-25", - "2011-03-17","2011-05-03","2011-06-16", - "2011-07-26"))) -event.time.data<-phys2eventtime(inr.returns,eventslist,width=10) -w<-window(event.time.data$z.e,start=-10,end=10) -inference.Ecar(w) -@ - -\section{identifyextremeevents} -% Conceptual framework -This package does eventstudy analysis but if the eventstudy analysis -is done on extreme events then it is very essential to understand the -basic pattern and distribution of the extreme events. Here, we define -extreme events as the upper or lower tail values which are estimated -with certain probability value. - -There are two further issues to consider. First, matters are -complicated by the fact that extreme (tail) values may cluster: for -example, there may be two or three consecutive days of very high or -very low daily returns, or these extremes may occur in two out of -three days. If the extreme values are all in the same tail of the -distribution, it might make sense to consider the cluster of extreme -values as a single event. - -We approach this problem through two paths. The data has following -events: clustered, un-clustered and mixed clusters. For simplicity, we -remove all the mixed clusters and deal with the rest. Un-clustered or -uncontaminated events are those where there is no other event within -the event window. Clustered events are defined by fusing all -consecutive extreme events, of the same direction, into a single -event. In event time, date +1 is then the first day after the run of -extreme events, and date -1 is the last day prior to the start of the -run. This strategy avoids losing observations of some of the most -important crises, which have clustered extreme events in the same -direction. - -This function gives following output: -\begin{itemize} -\item Summary statistics for complete data-set -\item Extreme event analysis for lower and upper tail i.e. bad-days - and good-days - \begin{itemize} - \item Dataset: Extreme events - \begin{itemize} - \item All extreme events - \item Clustered extreme events - \item Un-clustered extreme events - \end{itemize} - \item Distribution of clustered and un-clustered data: This measures - the number of cluster and un-cluster extreme events in the - data-set. Here, cluster which are not used are mixed clusters. - \item Run-length distribution of clusters: If there is a cluster - then run-length measures the length of consecutive extreme events - in a cluster. - \item Quantile values of extreme events: - \item Yearly distribution of extreme events: Here we have year-wise - distribution of extreme events and their median values. - \end{itemize} -\end{itemize} - -% Example for understanding -<<>>== -library(xts) -data(input.data) -input <- diff(log(input.data[,"sp500"])) -output <- identifyextremeevents(input, prob.value=5) -output -@ - -\end{document} Copied: pkg/vignettes/eventstudies.Rnw (from rev 36, pkg/inst/doc/eventstudies.Rnw) =================================================================== --- pkg/vignettes/eventstudies.Rnw (rev 0) +++ pkg/vignettes/eventstudies.Rnw 2013-02-11 17:15:41 UTC (rev 39) @@ -0,0 +1,198 @@ +\documentclass[a4paper,11pt]{article} +\usepackage{graphicx} +\usepackage{a4wide} +\usepackage[colorlinks,linkcolor=blue,citecolor=red]{hyperref} +\usepackage{natbib} +\usepackage{float} +\title{Introduction to the \textbf{eventstudies} package in R} +\author{} +\begin{document} +%\VignetteIndexEntry{eventstudies: A package with functionality to do Event Studies} +%\VignetteDepends{} +%\VignetteKeywords{event studies} +%\VignettePackage{eventstudies} + +\maketitle +\newpage +\SweaveOpts{engine=R,pdf=TRUE} +\section{Introduction} +This is an introduction to eventstudies, a package in R which has functionality to convert a given dataset into +an event-time frame and to undertake further parametric/non-parametric analysis using various inference procedures. + +This paper describes how this package is used and provides several examples illustrating the use of +the functionality within this package. + +\section{phys2eventtime} +phys2eventtime is a function which takes a zoo object containing input data, a data frame containing +the date of occurance of the events and creates a data frame which is indexed according to the event +time. + +The following illustrates the use of phys2eventtime. +<<>>= +options(useFancyQuotes=FALSE) +library(eventstudies) +input.zoo.object <- structure(c(33.16, 34.0967, 35.3683, 34.46, 34.17, 35.89, 36.19, + 37.1317, 36.7033, 37.7933, 37.8533, 285.325, 292.6, + 290.025, 286.2, 290.075, 295.05, 289.325, 285.625, + 293.7, 298.5, 289.05, 704.5438, 708.35, 735.8375, + 710.625, 711.65, 731.0125, 727.575, 715.0187, 724.2, + 713.1875, 695.1812), .Dim = c(11L, 3L), .Dimnames = + list( NULL, c("ITC", "Reliance", "Infosys")), index = + structure(c(12418, 12419, 12422, 12423, 12424, 12425, + 12426, 12429, 12430, 12431, 12432), class = "Date"), + class = "zoo") +input.zoo.object +eventslist <- data.frame(unit=c("ITC","Reliance","Infosys", + "ITC","Reliance","Junk"), + when=as.Date(c( + "2004-01-02", "2004-01-08", "2004-01-14", + "2005-01-15", "2004-01-01", "2005-01-01"))) + +eventslist$unit <- as.character(eventslist$unit) +eventslist +@ +In this example we note the following about input.zoo.object and the data frame eventslist:- +\begin{enumerate} +\item Prior to event date(in eventslist\$when) there is only 1 reading in the corresponding unit in input.zoo.object. +\item The event date is within the range of available dates for the corresponding unit(this is the ideal case). +\item After the event date there's only 1 reading. +\item The date is not within the range. +\item There is no data prior to this date. +\item Unit does not exist in input.zoo.object. +\end{enumerate} + +This is exactly what the second component of phys2eventtime namely outcomes,reports. The first component of the result of phys2eventtime +is a zoo object which is the event time data frame. +<<>>== +a <- phys2eventtime(input.zoo.object, eventslist,width=0) +str(a) +a$z.e +a$outcomes + +@ +phys2eventtime has a third parameter namely width which allows for checking that no more than 4 consecutive missing observations +are there within the given width from the event time. + +What we expect if we don't use width handling:- +<<>>== +rawres <- structure(list(z.e = structure(c(NA, NA, NA, NA, NA, NA, + NA, NA, 33.16, 34.0967, 35.3683, 34.46, 34.17, 35.89, 36.19, + 37.1317, 36.7033, 37.7933, 37.8533, NA, NA, NA, NA, 285.325, 292.6, + 290.025, 286.2, 290.075, 295.05, 289.325, 285.625, 293.7, 298.5, + 289.05, NA, NA, NA, NA, 704.5438, 708.35, 735.8375, 710.625, 711.65, + 731.0125, 727.575, 715.0187, 724.2, 713.1875, 695.1812, NA, NA, NA, + NA, NA, NA, NA, NA), .Dim = c(19L, 3L), .Dimnames = list( NULL, + c("1", "2", "3")), index = -9:9, class = "zoo"), outcomes = + structure(c(1L, 1L, 1L, 3L, 3L, 2L), .Label = c("success", + "unitmissing", "wrongspan" ), class = "factor")), .Names = c("z.e", + "outcomes")) +rawres +@ +Check without the width handling -- +<<>>== +a <- phys2eventtime(input.zoo.object, eventslist,width=0) +a +all.equal(a, rawres) +@ +Check with width of 1 -- +<<>>== +a <- phys2eventtime(input.zoo.object, eventslist,width=1) +a +all.equal(a, rawres) +@ +But when we go to width=2, column 1 and 3 drop off because they have +only 1 obs before and after the event date respectively. + +<<>>== +a <- phys2eventtime(input.zoo.object, eventslist,width=2) +a +all.equal(a, structure(list(z.e = structure(c(NA, NA, NA, NA, 285.325, + 292.6, 290.025, 286.2, 290.075, 295.05, + 289.325, 285.625, 293.7, 298.5, 289.05, + NA, NA, NA, NA), index = -9:9, class = + "zoo"), outcomes = structure(c(3L, 1L, + 3L, 4L, 4L, 2L), .Label = c("success", + "unitmissing", "wdatamissing", + "wrongspan"), class = "factor")), .Names + = c("z.e", "outcomes" ))) +@ +\section{inference.Ecar} +Once we have an event time frame returned by phys2eventtime we may use inference.Ecar to do +bootstrap inference for the main graph of the event study. This is illustrated in the following example. +<<>>== +library(xts) +load(paste(system.file(package="eventstudies"),"data","inr.rda",sep="/")) +inr.returns<-diff(log(inr))[-1] +eventslist<-data.frame(unit=rep("inr",10), + when=as.Date(c( + "2010-04-20","2010-07-02","2010-07-27", + "2010-09-16","2010-11-02","2011-01-25", + "2011-03-17","2011-05-03","2011-06-16", + "2011-07-26"))) +event.time.data<-phys2eventtime(inr.returns,eventslist,width=10) +w<-window(event.time.data$z.e,start=-10,end=10) +inference.Ecar(w) +@ + +\section{identifyextremeevents} +% Conceptual framework +This package does eventstudy analysis but if the eventstudy analysis +is done on extreme events then it is very essential to understand the +basic pattern and distribution of the extreme events. Here, we define +extreme events as the upper or lower tail values which are estimated +with certain probability value. + +There are two further issues to consider. First, matters are +complicated by the fact that extreme (tail) values may cluster: for +example, there may be two or three consecutive days of very high or +very low daily returns, or these extremes may occur in two out of +three days. If the extreme values are all in the same tail of the +distribution, it might make sense to consider the cluster of extreme +values as a single event. + +We approach this problem through two paths. The data has following +events: clustered, un-clustered and mixed clusters. For simplicity, we +remove all the mixed clusters and deal with the rest. Un-clustered or +uncontaminated events are those where there is no other event within +the event window. Clustered events are defined by fusing all +consecutive extreme events, of the same direction, into a single +event. In event time, date +1 is then the first day after the run of +extreme events, and date -1 is the last day prior to the start of the +run. This strategy avoids losing observations of some of the most +important crises, which have clustered extreme events in the same +direction. + +This function gives following output: +\begin{itemize} +\item Summary statistics for complete data-set +\item Extreme event analysis for lower and upper tail i.e. bad-days + and good-days + \begin{itemize} + \item Dataset: Extreme events + \begin{itemize} + \item All extreme events + \item Clustered extreme events + \item Un-clustered extreme events + \end{itemize} + \item Distribution of clustered and un-clustered data: This measures + the number of cluster and un-cluster extreme events in the + data-set. Here, cluster which are not used are mixed clusters. + \item Run-length distribution of clusters: If there is a cluster + then run-length measures the length of consecutive extreme events + in a cluster. + \item Quantile values of extreme events: + \item Yearly distribution of extreme events: Here we have year-wise + distribution of extreme events and their median values. + \end{itemize} +\end{itemize} + +% Example for understanding +<<>>== +library(xts) +data(input.data) +input <- diff(log(input.data[,"sp500"])) +output <- identifyextremeevents(input, prob.value=5) +output +@ + +\end{document} From noreply at r-forge.r-project.org Mon Feb 11 18:30:05 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Mon, 11 Feb 2013 18:30:05 +0100 (CET) Subject: [Eventstudies-commits] r40 - pkg/vignettes Message-ID: <20130211173005.203A71844EC@r-forge.r-project.org> Author: vikram Date: 2013-02-11 18:30:04 +0100 (Mon, 11 Feb 2013) New Revision: 40 Modified: pkg/vignettes/eventstudies.Rnw Log: created a vignette folder Modified: pkg/vignettes/eventstudies.Rnw =================================================================== --- pkg/vignettes/eventstudies.Rnw 2013-02-11 17:15:41 UTC (rev 39) +++ pkg/vignettes/eventstudies.Rnw 2013-02-11 17:30:04 UTC (rev 40) @@ -189,8 +189,8 @@ % Example for understanding <<>>== library(xts) -data(input.data) -input <- diff(log(input.data[,"sp500"])) +data(inputdata) +input <- diff(log(inputdata[,"sp500"])) output <- identifyextremeevents(input, prob.value=5) output @ From noreply at r-forge.r-project.org Tue Feb 12 11:02:03 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 12 Feb 2013 11:02:03 +0100 (CET) Subject: [Eventstudies-commits] r41 - in pkg: data man vignettes Message-ID: <20130212100203.43BBE184950@r-forge.r-project.org> Author: vikram Date: 2013-02-12 11:02:02 +0100 (Tue, 12 Feb 2013) New Revision: 41 Added: pkg/data/sp500.rda pkg/man/sp500.Rd Removed: pkg/data/inputdata.rda pkg/man/inputdata.Rd Modified: pkg/man/identifyextremeevents.Rd pkg/vignettes/eventstudies.Rnw Log: Proper naming: Replaced inputdata to sp500 data Deleted: pkg/data/inputdata.rda =================================================================== (Binary files differ) Added: pkg/data/sp500.rda =================================================================== (Binary files differ) Property changes on: pkg/data/sp500.rda ___________________________________________________________________ Added: svn:mime-type + application/octet-stream Modified: pkg/man/identifyextremeevents.Rd =================================================================== --- pkg/man/identifyextremeevents.Rd 2013-02-11 17:30:04 UTC (rev 40) +++ pkg/man/identifyextremeevents.Rd 2013-02-12 10:02:02 UTC (rev 41) @@ -34,7 +34,7 @@ } \examples{ -data(inputdata) -input <- diff(log(inputdata[,"sp500"])) +data(sp500) +input <- diff(log(sp500)) output <- identifyextremeevents(input, prob.value=5) } Deleted: pkg/man/inputdata.Rd =================================================================== --- pkg/man/inputdata.Rd 2013-02-11 17:30:04 UTC (rev 40) +++ pkg/man/inputdata.Rd 2013-02-12 10:02:02 UTC (rev 41) @@ -1,19 +0,0 @@ -\name{inputdata} - -\docType{data} - -\alias{Identify extreme events data} - -\title{Times series data} - -\description{ - The time series data is available for S&P 500, VIX and Net flows of - FII as a percentage of total market capitalisation for India. The - sample range for the daily data is from 2000-02-10 to 2011-07-29. -} - -\usage{inputdata} - -\format{An object with class attributes \code{xts} and \code{zoo} containing three variables with 2036 observations.} - -\keyword{datasets} Copied: pkg/man/sp500.Rd (from rev 39, pkg/man/inputdata.Rd) =================================================================== --- pkg/man/sp500.Rd (rev 0) +++ pkg/man/sp500.Rd 2013-02-12 10:02:02 UTC (rev 41) @@ -0,0 +1,17 @@ +\name{sp500} + +\docType{data} + +\alias{Identify extreme events data} + +\title{Times series data} + +\description{ + It is time series data for S&P 500 from 2000-02-10 to 2011-07-29. +} + +\usage{sp500} + +\format{An object with class attributes \code{xts} and \code{zoo} containing three variables with 2036 observations.} + +\keyword{datasets} Modified: pkg/vignettes/eventstudies.Rnw =================================================================== --- pkg/vignettes/eventstudies.Rnw 2013-02-11 17:30:04 UTC (rev 40) +++ pkg/vignettes/eventstudies.Rnw 2013-02-12 10:02:02 UTC (rev 41) @@ -189,8 +189,8 @@ % Example for understanding <<>>== library(xts) -data(inputdata) -input <- diff(log(inputdata[,"sp500"])) +data(sp500) +input <- diff(log(sp500))*100 output <- identifyextremeevents(input, prob.value=5) output @ From noreply at r-forge.r-project.org Tue Feb 12 14:19:00 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 12 Feb 2013 14:19:00 +0100 (CET) Subject: [Eventstudies-commits] r42 - in pkg: inst man vignettes Message-ID: <20130212131900.B22F8184BC9@r-forge.r-project.org> Author: vikram Date: 2013-02-12 14:19:00 +0100 (Tue, 12 Feb 2013) New Revision: 42 Removed: pkg/inst/doc/ Modified: pkg/man/sp500.Rd pkg/vignettes/eventstudies.Rnw Log: Minor modifications Modified: pkg/man/sp500.Rd =================================================================== --- pkg/man/sp500.Rd 2013-02-12 10:02:02 UTC (rev 41) +++ pkg/man/sp500.Rd 2013-02-12 13:19:00 UTC (rev 42) @@ -12,6 +12,6 @@ \usage{sp500} -\format{An object with class attributes \code{xts} and \code{zoo} containing three variables with 2036 observations.} +\format{An object with class attributes \code{xts} and \code{zoo} containing a variable with 2036 observations.} \keyword{datasets} Modified: pkg/vignettes/eventstudies.Rnw =================================================================== --- pkg/vignettes/eventstudies.Rnw 2013-02-12 10:02:02 UTC (rev 41) +++ pkg/vignettes/eventstudies.Rnw 2013-02-12 13:19:00 UTC (rev 42) @@ -189,6 +189,7 @@ % Example for understanding <<>>== library(xts) +library(eventstudies) data(sp500) input <- diff(log(sp500))*100 output <- identifyextremeevents(input, prob.value=5) From noreply at r-forge.r-project.org Tue Feb 12 14:55:48 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Tue, 12 Feb 2013 14:55:48 +0100 (CET) Subject: [Eventstudies-commits] r43 - pkg/man Message-ID: <20130212135548.58A36181128@r-forge.r-project.org> Author: chiraganand Date: 2013-02-12 14:55:47 +0100 (Tue, 12 Feb 2013) New Revision: 43 Modified: pkg/man/sp500.Rd Log: Fixed the alias of the dataset, keeping CMD check happy. Modified: pkg/man/sp500.Rd =================================================================== --- pkg/man/sp500.Rd 2013-02-12 13:19:00 UTC (rev 42) +++ pkg/man/sp500.Rd 2013-02-12 13:55:47 UTC (rev 43) @@ -2,7 +2,7 @@ \docType{data} -\alias{Identify extreme events data} +\alias{sp500} \title{Times series data} @@ -10,8 +10,11 @@ It is time series data for S&P 500 from 2000-02-10 to 2011-07-29. } -\usage{sp500} +\usage{data(sp500)} \format{An object with class attributes \code{xts} and \code{zoo} containing a variable with 2036 observations.} +\examples{ + data(sp500) +} \keyword{datasets} From noreply at r-forge.r-project.org Wed Feb 13 12:15:07 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Wed, 13 Feb 2013 12:15:07 +0100 (CET) Subject: [Eventstudies-commits] r44 - in pkg: man vignettes Message-ID: <20130213111510.0541C1841A5@r-forge.r-project.org> Author: vikram Date: 2013-02-13 12:14:59 +0100 (Wed, 13 Feb 2013) New Revision: 44 Modified: pkg/man/identifyextremeevents.Rd pkg/vignettes/eventstudies.Rnw Log: Wrote conceptual framework, usage and output details for identifyextremeevents Modified: pkg/man/identifyextremeevents.Rd =================================================================== --- pkg/man/identifyextremeevents.Rd 2013-02-12 13:55:47 UTC (rev 43) +++ pkg/man/identifyextremeevents.Rd 2013-02-13 11:14:59 UTC (rev 44) @@ -18,8 +18,8 @@ \arguments{ \item{input}{'input' is the time-series on which extreme event analysis is done. This series should in returns format.} - \item{prob.value}{This is tail value for which the extreme event is to - be defined. For eg: prob.value of 5 will consider 5\% tail on both sides.} + \item{prob.value}{It is the tail value on the basis of which the + extreme event are defined. For eg: prob.value of 5 will consider 5\% tail on both sides.} } \value{ Modified: pkg/vignettes/eventstudies.Rnw =================================================================== --- pkg/vignettes/eventstudies.Rnw 2013-02-12 13:55:47 UTC (rev 43) +++ pkg/vignettes/eventstudies.Rnw 2013-02-13 11:14:59 UTC (rev 44) @@ -4,6 +4,7 @@ \usepackage[colorlinks,linkcolor=blue,citecolor=red]{hyperref} \usepackage{natbib} \usepackage{float} +\usepackage{tikz} \title{Introduction to the \textbf{eventstudies} package in R} \author{} \begin{document} @@ -136,11 +137,10 @@ \section{identifyextremeevents} % Conceptual framework -This package does eventstudy analysis but if the eventstudy analysis -is done on extreme events then it is very essential to understand the -basic pattern and distribution of the extreme events. Here, we define -extreme events as the upper or lower tail values which are estimated -with certain probability value. +\subsection{Conceptual framework} +This package identifies and interprets extreme events along with +eventstudy analysis. The upper tail and lower tail values are defined +as extreme events at certain probability. There are two further issues to consider. First, matters are complicated by the fact that extreme (tail) values may cluster: for @@ -162,38 +162,94 @@ important crises, which have clustered extreme events in the same direction. -This function gives following output: -\begin{itemize} -\item Summary statistics for complete data-set -\item Extreme event analysis for lower and upper tail i.e. bad-days - and good-days - \begin{itemize} - \item Dataset: Extreme events - \begin{itemize} - \item All extreme events - \item Clustered extreme events - \item Un-clustered extreme events - \end{itemize} - \item Distribution of clustered and un-clustered data: This measures - the number of cluster and un-cluster extreme events in the - data-set. Here, cluster which are not used are mixed clusters. - \item Run-length distribution of clusters: If there is a cluster - then run-length measures the length of consecutive extreme events - in a cluster. - \item Quantile values of extreme events: - \item Yearly distribution of extreme events: Here we have year-wise - distribution of extreme events and their median values. - \end{itemize} -\end{itemize} - % Example for understanding +\subsection{Usage} +This function does extreme event analysis on the returns of the +data. There are only two arguments the function has: +\begin{enumerate} +\item \textit{input}: Data on which extreme event analysis is done. Note: + \textit{input} should be in returns format. +\item \textit{prob.value}: It is the tail value on basis of which the + extreme event is are defined. For eg: prob.value of 5 will consider + 5\% tail on both sides. +\end{enumerate} <<>>== -library(xts) -library(eventstudies) data(sp500) input <- diff(log(sp500))*100 output <- identifyextremeevents(input, prob.value=5) -output @ +\subsection{Output} +Output is in list format. Primarily it consists of three lists, +summary statistics for complete data-set, extreme event analysis for +lower tail and extreme event analysis for upper tail. Further, these +lower tail and upper tail list objects consists of 5 more list objects with +following output: +\begin{enumerate} +\item Extreme events dataset +\item Distribution of clustered and unclustered +\item Run length distribution +\item Quantile values of extreme events +\item Yearly distribution of extreme events +\end{enumerate} +The complete set of analysis is done on the returns of S\&P500 and +these results are in tandem with Table 1,2,3,4 and 5 of Patnaik, Shah +and Singh (2013). + +\subsubsection{Summary statistics} +Here we have data summary for the complete data-set which shows +minimum, 5\%, 25\%, median, mean, 75\%, 95\%, maximum, standard +deviation (sd), inter-quartile range (IQR) and number of +observations. The output is shown below: +<<>>== +output$data.summary +@ +\subsubsection{Extreme events dataset} +The output for upper tail and lower tail are in the same format as +mentioned above. The data-set is an time series object which has 2 +columns. The first column is \textit{event.series} column which has +returns for extreme events and the second column is +\textit{cluster.pattern} which signifies the number of consecutive +days in the cluster. So, here we just show results for lower tail. +<<>>= +output$lower.tail$data +@ + +\subsubsection{Distribution of clustered and clustered events} +In the analysis we have clustered, unclustered and mixed clusters. We +remove the mixed clusters and study the rest of the clusters by fusing +them. Here we show, number of clustered and unclustered data used in +the analysis. The \textit{removed.clstr} refers to mixed cluster which +are removed and not used in the analysis.\textit{Tot.used} represents +total number of extreme events used for the analysis which is sum of +\textit{unclstr} (unclustered events) and \textit{used.clstr} (Used +clustered events). \textit{Tot} +are the total number of extreme events in the data-set. +<<>>= +output$lower.tail$extreme.event.distribution +@ + +\subsubsection{Run length distribution of clusters} +Clusters used in the analysis are defined as consecutive extreme +events. Run length shows total number of clusters with \textit{n} consecutive +days. In the example below we have 3 clusters with \textit{two} +consecutive events and 0 clusters with \textit{three} consecutive +events. +<<>>= +output$lower.tail$runlength +@ + +\subsubsection{Extreme event quantile values} +Quantile values show 0\%, 25\%, median, 75\%,100\% and mean values for +the extreme events data. +<<>>= +output$lower.tail$quantile.values +@ + +\subsubsection{Yearly distribution of extreme events} +This table shows the yearly wise distribution and +the median value for extreme events data. +<<>>= +output$lower.tail$yearly.extreme.event +@ \end{document} From noreply at r-forge.r-project.org Thu Feb 14 18:52:15 2013 From: noreply at r-forge.r-project.org (noreply at r-forge.r-project.org) Date: Thu, 14 Feb 2013 18:52:15 +0100 (CET) Subject: [Eventstudies-commits] r45 - in pkg: . R inst vignettes Message-ID: <20130214175215.9194B184B37@r-forge.r-project.org> Author: vikram Date: 2013-02-14 18:52:15 +0100 (Thu, 14 Feb 2013) New Revision: 45 Modified: pkg/DESCRIPTION pkg/R/identifyextremeevents.R pkg/inst/CITATION pkg/vignettes/eventstudies.Rnw Log: Minor modification in citation, DESCRIPTION Modified: pkg/DESCRIPTION =================================================================== --- pkg/DESCRIPTION 2013-02-13 11:14:59 UTC (rev 44) +++ pkg/DESCRIPTION 2013-02-14 17:52:15 UTC (rev 45) @@ -4,7 +4,7 @@ Version: 0.04 Date: 2011-06-20 Author: Ajay Shah, Vimal Balasubramaniam, Vikram Bahure -Maintainer: Vimal Balasubramaniam +Maintainer: Vimal Balasubramaniam , Vikram Bahure Depends: R (>= 2.12.0), zoo, xts, boot Description: Implementation of short and long term event study methodology License: GPL-2 Modified: pkg/R/identifyextremeevents.R =================================================================== --- pkg/R/identifyextremeevents.R 2013-02-13 11:14:59 UTC (rev 44) +++ pkg/R/identifyextremeevents.R 2013-02-14 17:52:15 UTC (rev 45) @@ -100,7 +100,7 @@ # Lower tail lower.tail$data <- list(left.all,left.tail.clustered, left.tail.unclustered) - names(lower.tail$data) <- c("All","Clustered","Un-clustered") + names(lower.tail$data) <- c("All","Clustered","Unclustered") lower.tail$extreme.event.distribution <- event.dist$lower.tail lower.tail$runlength <- runlength$lower.tail lower.tail$quantile.values <- qnt.values$lower.tail @@ -108,7 +108,7 @@ # Upper tail upper.tail$data <- list(right.all,right.tail.clustered, right.tail.unclustered) - names(upper.tail$data) <- c("All","Clustered","Un-clustered") + names(upper.tail$data) <- c("All","Clustered","Unclustered") upper.tail$extreme.event.distribution <- event.dist$upper.tail upper.tail$runlength <- runlength$upper.tail upper.tail$quantile.values <- qnt.values$upper.tail @@ -349,7 +349,7 @@ if(no.var==1){input <- xts(input)} # Creating empty frame: chassis tmp <- data.frame(matrix(NA,nrow=11,ncol=NCOL(input))) - colnames(tmp) <- colnames(input) + colnames(tmp) <- "summary" rownames(tmp) <- c("Min","5%","25%","Median","Mean","75%","95%", "Max","sd","IQR","Obs.") # Estimating summary statistics @@ -424,8 +424,8 @@ tmp.good.y <- merge(tmp.good.y,apply.yearly(xts(tmp.good[,1]),function(x)median(x,na.rm=T))) index(tmp.good.y) <- as.yearmon(as.Date(substr(index(tmp.good.y),1,4),"%Y")) tmp.res <- merge(tmp.bad.y,tmp.good.y) - colnames(tmp.res) <- c("number.baddays","median.baddays", - "number.gooddays","median.goodays") + colnames(tmp.res) <- c("number.lowertail","median.lowertail", + "number.uppertail","median.uppertail") output <- as.data.frame(tmp.res) cn <- rownames(output) rownames(output) <- sapply(rownames(output), @@ -521,13 +521,13 @@ # Making a table tb <- data.frame(matrix(NA,2,6)) - colnames(tb) <- c("unclstr","used.clstr","removed.clstr","tot.clstr","Tot","Tot.used") + colnames(tb) <- c("unclstr","used.clstr","removed.clstr","tot.clstr","tot","tot.used") rownames(tb) <- c("lower","upper") - tb[,"Tot"] <- c(tot.ev.l,tot.ev.r) + tb[,"tot"] <- c(tot.ev.l,tot.ev.r) tb[,"unclstr"] <- c(un.clstr.l,un.clstr.r) tb[,"used.clstr"] <- c(us.cl.l,us.cl.r) - tb[,"Tot.used"] <- tb$unclstr+tb$used.clstr - tb[,"tot.clstr"] <- tb$Tot-tb$unclstr + tb[,"tot.used"] <- tb$unclstr+tb$used.clstr + tb[,"tot.clstr"] <- tb$tot-tb$unclstr tb[,"removed.clstr"] <- tb$tot.clstr-tb$used.clstr return(tb) @@ -555,10 +555,10 @@ upper.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6)) colnames(lower.tail.qnt.value) <- c("Min","25%","Median","75%","Max", "Mean") - rownames(lower.tail.qnt.value) <- colnames(input) + rownames(lower.tail.qnt.value) <- "extreme.events" colnames(upper.tail.qnt.value) <- c("Min","25%","Median","75%","Max", "Mean") - rownames(upper.tail.qnt.value) <- colnames(input) + rownames(upper.tail.qnt.value) <- "extreme.events" # Estimating cluster count #-------------------- # Formatting clusters @@ -574,7 +574,7 @@ "event.series"] df.left <- t(data.frame(quantile(tmp.left.tail,c(0,0.25,0.5,0.75,1)))) tmp.left <- round(cbind(df.left,mean(tmp.left.tail)),2) - rownames(tmp.left) <- NULL + rownames(tmp.left) <- "extreme.events" colnames(tmp.left) <- c("0%","25%","Median","75%","100%","Mean") # Right tail tmp.right.tail <- tmp[which(tmp$right.tail==1), @@ -582,7 +582,7 @@ df.right <- t(data.frame(quantile(tmp.right.tail,c(0,0.25,0.5,0.75,1)))) tmp.right <- round(cbind(df.right, mean(tmp.right.tail)),2) - rownames(tmp.right) <- NULL + rownames(tmp.right) <- "extreme.events" colnames(tmp.right) <- c("0%","25%","Median","75%","100%","Mean") lower.tail.qnt.value <- tmp.left @@ -637,9 +637,9 @@ upper.tail.runlength <- data.frame(matrix(NA,nrow=no.var, ncol=length(col.names))) colnames(lower.tail.runlength) <- col.names - rownames(lower.tail.runlength) <- colnames(input) + rownames(lower.tail.runlength) <- "clustered.events" colnames(upper.tail.runlength) <- col.names - rownames(upper.tail.runlength) <- colnames(input) + rownames(upper.tail.runlength) <- "clustered.events" #---------------------- # Run length estimation Modified: pkg/inst/CITATION =================================================================== --- pkg/inst/CITATION 2013-02-13 11:14:59 UTC (rev 44) +++ pkg/inst/CITATION 2013-02-14 17:52:15 UTC (rev 45) @@ -3,16 +3,16 @@ citEntry(entry="Article", title = "eventstudies: Infrastructure for performing Event Studies with R", author = personList(as.person("Ajay Shah"), - as.person("Ashim Kapoor"), + as.person("Vikram Bahure"), as.person("Vimal Balasubramaniam")), journal = "Journal of Statistical Software", - year = "2011", + year = "2013", volume = "", number = "", pages = "", url = "", textVersion = - paste("Ajay Shah,Ashim Kapoor,Vimal Balasubramaniam", + paste("Ajay Shah,Vikram Bahure,Vimal Balasubramaniam", "eventstudies: Infrastructure for performing Event Studies with R") ) Modified: pkg/vignettes/eventstudies.Rnw =================================================================== --- pkg/vignettes/eventstudies.Rnw 2013-02-13 11:14:59 UTC (rev 44) +++ pkg/vignettes/eventstudies.Rnw 2013-02-14 17:52:15 UTC (rev 45) @@ -138,9 +138,9 @@ \section{identifyextremeevents} % Conceptual framework \subsection{Conceptual framework} -This package identifies and interprets extreme events along with -eventstudy analysis. The upper tail and lower tail values are defined -as extreme events at certain probability. +This function of the package identifies extreme event and does data +analysis. The upper tail and lower tail values are defined as extreme +events at certain probability. There are two further issues to consider. First, matters are complicated by the fact that extreme (tail) values may cluster: for @@ -151,8 +151,8 @@ values as a single event. We approach this problem through two paths. The data has following -events: clustered, un-clustered and mixed clusters. For simplicity, we -remove all the mixed clusters and deal with the rest. Un-clustered or +events: clustered, unclustered and mixed clusters. For simplicity, we +remove all the mixed clusters and deal with the rest. Unclustered or uncontaminated events are those where there is no other event within the event window. Clustered events are defined by fusing all consecutive extreme events, of the same direction, into a single @@ -165,12 +165,12 @@ % Example for understanding \subsection{Usage} This function does extreme event analysis on the returns of the -data. There are only two arguments the function has: +data. Function has following two arguments: \begin{enumerate} \item \textit{input}: Data on which extreme event analysis is done. Note: \textit{input} should be in returns format. \item \textit{prob.value}: It is the tail value on basis of which the - extreme event is are defined. For eg: prob.value of 5 will consider + extreme events are defined. For eg: \textit{prob.value} of 5 will consider 5\% tail on both sides. \end{enumerate} <<>>==