[Eventstudies-commits] r31 - in pkg: R data man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Thu Feb 7 08:46:07 CET 2013
Author: vikram
Date: 2013-02-07 08:46:07 +0100 (Thu, 07 Feb 2013)
New Revision: 31
Added:
pkg/R/identifyExtremeEvents.R
pkg/data/IdentifyExevent.rda
pkg/man/exact.pattern.location.Rd
pkg/man/extreme.events.distribution.Rd
pkg/man/gen.data.Rd
pkg/man/get.cluster.distribution.Rd
pkg/man/get.clusters.formatted.Rd
pkg/man/get.event.count.Rd
pkg/man/identify.extreme.events.Rd
pkg/man/identify.mixedclusters.Rd
pkg/man/numbers2words.Rd
pkg/man/quantlie.extreme.values.Rd
pkg/man/runlength.dist.Rd
pkg/man/summarise.cluster.Rd
pkg/man/summarise.rle.Rd
pkg/man/sumstat.Rd
pkg/man/yearly.exevent.dist.Rd
pkg/man/yearly.exevent.summary.Rd
Log:
Added Rd documentation files, data and functions for Identify extreme events
Added: pkg/R/identifyExtremeEvents.R
===================================================================
--- pkg/R/identifyExtremeEvents.R (rev 0)
+++ pkg/R/identifyExtremeEvents.R 2013-02-07 07:46:07 UTC (rev 31)
@@ -0,0 +1,737 @@
+
+# Total 16 functions
+############################
+# Identifying extreme events
+############################
+# libraries required
+library(xts)
+#----------------------------------------------------------------
+# INPUT:
+# 'input' : Data series for which extreme events are
+# to be identified. More than one series
+# is permissble. The 'input' should be in time
+# series format.
+# 'prob.value': This is the tail value for which event is
+# to be defined. For eg: prob.value=5 will
+# consider 5% tail on both sides
+#-----------------------------------------------------------------
+# OUTPUT:
+# Result will be in a list of 3 with following tables:
+# 1. Summary statistics
+# a. Summary of whole data-set
+# 2. Lower tail: Extreme event tables
+# a. Distribution of extreme events
+# b. Run length distribution
+# c. Quantile values
+# d. Yearly distribution
+# e. Extreme event data
+# - Clustered, Un-clustered and Both
+# 3. Upper tail: Extreme event tables
+# a. Distribution of extreme events
+# b. Run length distribution
+# c. Quantile values
+# d. Yearly distribution
+# e. Extreme event data
+# - Clustered, Un-clustered and Both
+#------------------------------------------------------------------
+# NOTE:
+identify.extreme.events <- function(input,prob.value){
+ no.var <- NCOL(input)
+
+ #------------------------------------------------
+ # Breaking the function if any input is not given
+ #------------------------------------------------
+ # For one variable
+ # If class of data is not time series
+ class.input <- class(input)%in%c("xts","zoo")
+ if(class.input==FALSE){
+ stop("Input data is not in time series format. Valid 'input' should be of class xts and zoo")
+ }
+
+ # Converting an xts object to zoo series
+ input.class <- length(which(class(input)%in%"xts"))
+ if(length(input.class)==1){
+ input <- zoo(input)
+ }
+
+ #-----------------------------------------
+ # Event series: Clustered and un-clustered
+ #-----------------------------------------
+ tmp <- get.clusters.formatted(event.series=input,
+ response.series=input,
+ probvalue=prob.value,
+ event.value="nonreturns",
+ response.value="nonreturns")
+ tail.events <- tmp[which(tmp$left.tail==1 | tmp$right.tail==1),]
+ clustered.tail.events <- tmp[which(tmp$cluster.pattern>1),]
+ unclustered.tail.events <- tmp[-which(tmp$cluster.pattern>1),]
+ # Left tail data
+ left.tail.clustered <- clustered.tail.events[which(clustered.tail.events$left.tail==1),c("event.series","cluster.pattern")]
+ left.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$left.tail==1),c("event.series","cluster.pattern")]
+ left.all <- tail.events[which(tail.events$left.tail==1),c("event.series","cluster.pattern")]
+ # Right tail data
+ right.tail.clustered <- clustered.tail.events[which(clustered.tail.events$right.tail==1),c("event.series","cluster.pattern")]
+ right.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$right.tail==1),c("event.series","cluster.pattern")]
+ right.all <- tail.events[which(tail.events$right.tail==1),c("event.series","cluster.pattern")]
+
+ #---------------------
+ # Extreme event output
+ #---------------------
+ # Summary statistics
+ summ.st <- sumstat(input)
+
+ # Distribtution of events
+ event.dist <- extreme.events.distribution(input,prob.value)
+
+ # Run length distribution
+ runlength <- runlength.dist(input,prob.value)
+
+ # Quantile extreme values
+ qnt.values <- quantile.extreme.values(input,prob.value)
+
+ # Yearly distribution of extreme event dates
+ yearly.exevent <- yearly.exevent.dist(input,prob.value)
+
+ #---------------------
+ # Compiling the output
+ #---------------------
+ output <- lower.tail <- upper.tail <- list()
+ # Compiling lower tail and upper tail separately
+ # Lower tail
+ lower.tail$data <- list(left.all,left.tail.clustered,
+ left.tail.unclustered)
+ names(lower.tail$data) <- c("All","Clustered","Un-clustered")
+ lower.tail$extreme.event.distribution <- event.dist$lower.tail
+ lower.tail$runlength <- runlength$lower.tail
+ lower.tail$quantile.values <- qnt.values$lower.tail
+ lower.tail$yearly.extreme.event <- yearly.exevent$lower.tail
+ # Upper tail
+ upper.tail$data <- list(right.all,right.tail.clustered,
+ right.tail.unclustered)
+ names(upper.tail$data) <- c("All","Clustered","Un-clustered")
+ upper.tail$extreme.event.distribution <- event.dist$upper.tail
+ upper.tail$runlength <- runlength$upper.tail
+ upper.tail$quantile.values <- qnt.values$upper.tail
+ upper.tail$yearly.extreme.event <- yearly.exevent$upper.tail
+ # Output
+ output$data.summary <- summ.st
+ output$lower.tail <- lower.tail
+ output$upper.tail <- upper.tail
+ return(output)
+}
+
+########################################
+# Functions used for formatting clusters
+########################################
+#------------------------
+# Categorzing tail events
+# for ES analysis
+#------------------------
+# Generates returns for the series
+# Mark left tail, right tail events
+gen.data <- function(d,probvalue,value="nonreturns"){
+ res <- data.frame(dates=index(d),value=coredata(d))
+ if(value=="returns"){
+ res$returns <- c(NA,coredata(diff(log(d))*100))
+ }else{
+ res$returns <- d
+ }
+ pval <- c(probvalue/100,(1-(probvalue/100)))
+ pval <- quantile(res$returns,prob=pval,na.rm=TRUE)
+ res$left.tail <- as.numeric(res$returns < pval[1])
+ res$right.tail <- as.numeric(res$returns > pval[2])
+ res$both.tails <- res$left.tail + res$right.tail
+ if(value=="returns"){
+ return(res[-1,])
+ }else{
+ return(res)
+ }
+}
+
+
+#-------------------
+# Summarise patterns
+summarise.rle <- function(oneseries){
+ tp <- rle(oneseries)
+ tp1 <- data.frame(tp$lengths,tp$values)
+ tp1 <- subset(tp1,tp1[,2]==1)
+ summary(tp1[,1])
+}
+
+# Summarise the pattern of cluster
+summarise.cluster <- function(obj){
+ rle.both <- summarise.rle(obj$both.tail)
+ rle.left <- summarise.rle(obj$left.tail)
+ rle.right <- summarise.rle(obj$right.tail)
+ rbind(both=rle.both,left=rle.left,right=rle.right)
+}
+
+# Getting location for the length
+exact.pattern.location <- function(us,pt,pt.len){
+ st <- rle(us)
+ len <- st$length
+ loc.cs <- cumsum(st$length)
+ loc <- loc.cs[which(st$values==pt & st$length==pt.len)]-pt.len+1
+ return(loc)
+}
+
+# Identify and mark mixed clusters
+identify.mixedclusters <- function(m,j){
+ m$remove.mixed <- 0
+ rownum <- which(m$pattern==TRUE)
+ for(i in 1:length(rownum)){
+ nextnum <- rownum[i]+j-1
+ twonums <- m$returns[c(rownum[i]:nextnum)] > 0
+ if(sum(twonums)==j || sum(twonums)==0){
+ next
+ }else{
+ m$remove.mixed[c(rownum[i]:nextnum)] <- 5
+ }
+ }
+ m
+}
+
+#--------------------
+# Formatting clusters
+#--------------------
+# This function takes does the following transformation:
+#----------------------------------------------------
+# What the function does?
+# i. Get extreme events from event.series
+# ii. Remove all the mixed clusters
+# iii. Get different types cluster
+# iv. Further club the clusters for event series and
+# corresponding response series to get
+# clustered returns
+# v. Throw the output in timeseries format
+#----------------------------------------------------
+# Input for the function
+# event.series = Series in levels or returns on events
+# is to be defined
+# response.series = Series in levels or returns on which
+# response is to be generated
+# prob.value = Tail value for defining an event
+# event.value = What value is to be studied
+# returns or levels
+# Similarly for response.value
+#----------------------------------------------------
+# Output = Formatted clusters in time series format
+#----------------------------------------------------
+get.clusters.formatted <- function(event.series,
+ response.series,
+ probvalue=5,
+ event.value="returns",
+ response.value="returns"){
+ # Getting levels in event format
+ tmp <- gen.data(event.series,
+ probvalue=probvalue,
+ value=event.value)
+ res.ser <- gen.data(response.series,
+ probvalue=probvalue,
+ value=response.value)
+ # Storing old data points
+ tmp.old <- tmp
+
+ # Get pattern with maximum length
+ res <- summarise.cluster(tmp)
+ max.len <- max(res[,"Max."])
+
+ #------------------------
+ # Removing mixed clusters
+ #------------------------
+ for(i in max.len:2){
+ which.pattern <- rep(1,i)
+ patrn <- exact.pattern.location(tmp$both.tails,1,i)
+ # If pattern does not exist move to next pattern
+ if(length(patrn)==0){next}
+ tmp$pattern <- FALSE
+ tmp$pattern[patrn] <- TRUE
+ tmp <- identify.mixedclusters(m=tmp,i)
+ me <- length(which(tmp$remove.mixed==5))
+
+ if(me!=0){
+ tmp <- tmp[-which(tmp$remove.mixed==5),]
+ cat("Pattern of:",i,";",
+ "Disarded event:",me/i,"\n")
+ }
+ }
+ tmp.nc <- tmp
+
+ # Merging event and response series
+ tmp.es <- xts(tmp[,-1],as.Date(tmp$dates))
+ tmp.rs <- xts(res.ser[,-1],as.Date(res.ser$dates))
+ tmp.m <- merge(tmp.es,res.ser=tmp.rs[,c("value","returns")],
+ all=F)
+
+ # Formatting
+ if(event.value=="returns"){
+ which.value <- event.value
+ }else{
+ which.value <- "value"
+ }
+ # Converting to data.frame
+ temp <- as.data.frame(tmp.m)
+ temp$dates <- rownames(temp)
+ n <- temp
+ # Get pattern with maximum length
+ res <- summarise.cluster(temp)
+ max.len <- max(res[,"Max."])
+ cat("Maximum length after removing mixed clusters is",
+ max.len,"\n")
+ # Marking clusters
+ n$cluster.pattern <- n$both.tails
+ for(pt.len in max.len:1){
+ mark <- exact.pattern.location(n$both.tails,1,pt.len)
+ if(length(mark)==0){next}
+ n$cluster.pattern[mark] <- pt.len
+ }
+
+ #-------------------
+ # Clustering returns
+ #-------------------
+ print("Clustering events.")
+ for(pt.len in max.len:2){
+ rownum <- exact.pattern.location(n$both.tails,1,pt.len)
+ # If pattern does not exist
+ if(length(rownum)==0){
+ cat("Pattern",pt.len,"does not exist.","\n");next
+ }
+ # Clustering
+ while(length(rownum)>0){
+ prevnum <- rownum[1]-1
+ lastnum <- rownum[1]+pt.len-1
+ # Clustering event series
+ if(event.value=="returns"){
+ newreturns <- (n$value[lastnum]-n$value[prevnum])*100/n$value[prevnum]
+ n[rownum[1],c("value","returns")] <- c(n$value[lastnum],newreturns)
+ }else{
+ newreturns <- sum(n$value[rownum[1]:lastnum],na.rm=T)
+ n[rownum[1],c("value","returns")] <- c(n$value[lastnum],newreturns)
+ }
+ # Clustering response series
+ if(response.value=="returns"){
+ newreturns.rs <- (n$value.1[lastnum]-n$value.1[prevnum])*100/n$value.1[prevnum]
+ n[rownum[1],c("value.1","returns.1")] <- c(n$value.1[lastnum],newreturns.rs)
+ }else{
+ newreturns <- sum(n$value.1[rownum[1]:lastnum],na.rm=T)
+ n[rownum[1],c("value.1","returns.1")] <- c(n$value.1[lastnum],newreturns)
+ }
+ n <- n[-c((rownum[1]+1):lastnum),]
+ rownum <- exact.pattern.location(n$both.tails,1,pt.len)
+ }
+ }
+ # Columns to keep
+ cn <- c(which.value,"left.tail","right.tail",
+ "returns.1","cluster.pattern")
+ tmp.ts <- zoo(n[,cn],order.by=as.Date(n$dates))
+ colnames(tmp.ts) <- c("event.series","left.tail","right.tail",
+ "response.series","cluster.pattern")
+
+ # Results
+ return(tmp.ts)
+}
+
+##############################
+# Summary statistics functions
+##############################
+#---------------------------------------------
+# Table 1: Summary statistics
+# INPUT: Time series data-set for which
+# summary statistics is to be estimated
+# OUTPUT: A data frame with:
+# - Values: "Minimum", 5%,"25%","Median",
+# "Mean","75%","95%","Maximum",
+# "Standard deviation","IQR",
+# "Observations"
+#----------------------------------------------
+sumstat <- function(input){
+ no.var <- NCOL(input)
+ if(no.var==1){input <- xts(input)}
+ # Creating empty frame: chassis
+ tmp <- data.frame(matrix(NA,nrow=11,ncol=NCOL(input)))
+ colnames(tmp) <- colnames(input)
+ rownames(tmp) <- c("Min","5%","25%","Median","Mean","75%","95%",
+ "Max","sd","IQR","Obs.")
+ # Estimating summary statistics
+ tmp[1,] <- apply(input,2,function(x){min(x,na.rm=TRUE)})
+ tmp[2,] <- apply(input,2,function(x){quantile(x,0.05,na.rm=TRUE)})
+ tmp[3,] <- apply(input,2,function(x){quantile(x,0.25,na.rm=TRUE)})
+ tmp[4,] <- apply(input,2,function(x){median(x,na.rm=TRUE)})
+ tmp[5,] <- apply(input,2,function(x){mean(x,na.rm=TRUE)})
+ tmp[6,] <- apply(input,2,function(x){quantile(x,0.75,na.rm=TRUE)})
+ tmp[7,] <- apply(input,2,function(x){quantile(x,0.95,na.rm=TRUE)})
+ tmp[8,] <- apply(input,2,function(x){max(x,na.rm=TRUE)})
+ tmp[9,] <- apply(input,2,function(x){sd(x,na.rm=TRUE)})
+ tmp[10,] <- apply(input,2,function(x){IQR(x,na.rm=TRUE)})
+ tmp[11,] <- apply(input,2,function(x){NROW(x)})
+ tmp <- round(tmp,2)
+
+ return(tmp)
+}
+
+######################
+# Yearly summary stats
+######################
+#----------------------------
+# INPUT:
+# 'input': Data series for which event cluster distribution
+# is to be calculated;
+# 'prob.value': Probility value for which tail is to be constructed this
+# value is equivalent to one side tail for eg. if prob.value=5
+# then we have values of 5% tail on both sides
+# Functions used: yearly.exevent.summary()
+# OUTPUT:
+# Yearly distribution of extreme events
+#----------------------------
+yearly.exevent.dist <- function(input, prob.value){
+ no.var <- NCOL(input)
+ mylist <- list()
+ # Estimating cluster count
+ #--------------------
+ # Formatting clusters
+ #--------------------
+ tmp <- get.clusters.formatted(event.series=input,
+ response.series=input,
+ probvalue=prob.value,
+ event.value="nonreturns",
+ response.value="nonreturns")
+
+ tmp.res <- yearly.exevent.summary(tmp)
+ tmp.res[is.na(tmp.res)] <- 0
+ # Left and right tail
+ lower.tail.yearly.exevent <- tmp.res[,1:2]
+ upper.tail.yearly.exevent <- tmp.res[,3:4]
+ output <- list()
+ output$lower.tail <- lower.tail.yearly.exevent
+ output$upper.tail <- upper.tail.yearly.exevent
+ mylist <- output
+
+ return(mylist)
+}
+
+#------------------------------------------------
+# Get yearly no. and median for good and bad days
+#------------------------------------------------
+yearly.exevent.summary <- function(tmp){
+ tmp.bad <- tmp[which(tmp[,"left.tail"]==1),]
+ tmp.good <- tmp[which(tmp[,"right.tail"]==1),]
+ # Bad days
+ tmp.bad.y <- apply.yearly(xts(tmp.bad),function(x)nrow(x))
+ tmp.bad.y <- merge(tmp.bad.y,apply.yearly(xts(tmp.bad[,1]),function(x)median(x,na.rm=T)))
+ index(tmp.bad.y) <- as.yearmon(as.Date(substr(index(tmp.bad.y),1,4),"%Y"))
+ # Good days
+ tmp.good.y <- apply.yearly(xts(tmp.good),function(x)nrow(x))
+ tmp.good.y <- merge(tmp.good.y,apply.yearly(xts(tmp.good[,1]),function(x)median(x,na.rm=T)))
+ index(tmp.good.y) <- as.yearmon(as.Date(substr(index(tmp.good.y),1,4),"%Y"))
+ tmp.res <- merge(tmp.bad.y,tmp.good.y)
+ colnames(tmp.res) <- c("number.baddays","median.baddays",
+ "number.gooddays","median.goodays")
+ output <- as.data.frame(tmp.res)
+ cn <- rownames(output)
+ rownames(output) <- sapply(rownames(output),
+ function(x)substr(x,nchar(x)-3,nchar(x)))
+ return(output)
+}
+
+#############################
+# Getting event segregation
+# - clustered and unclustered
+#############################
+#----------------------------
+# INPUT:
+# 'input': Data series for which event cluster distribution
+# is to be calculated;
+# Note: The input series expects the input to be in levels not in returns,
+# if the some the inputs are already in return formats one has to
+# use the other variable 'already.return.series'
+# 'already.return.series': column name is to be given which already has
+# return series in the data-set
+# 'prob.value': Probility value for which tail is to be constructed this
+# value is equivalent to one side tail for eg. if prob.value=5
+# then we have values of 5% tail on both sides
+# Functions used: get.event.count()
+# OUTPUT:
+# Distribution of extreme events
+#----------------------------
+
+extreme.events.distribution <- function(input,prob.value){
+ # Creating an empty frame
+ no.var <- NCOL(input)
+ lower.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6))
+ upper.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6))
+ colnames(lower.tail.dist) <- c("Unclustered","Used clusters",
+ "Removed clusters","Total clusters",
+ "Total","Total used clusters")
+ rownames(lower.tail.dist) <- colnames(input)
+ colnames(upper.tail.dist) <- c("Unclustered","Used clusters",
+ "Removed clusters","Total clusters",
+ "Total","Total used clusters")
+ rownames(upper.tail.dist) <- colnames(input)
+ # Estimating cluster count
+ #--------------
+ # Cluster count
+ #--------------
+ # Non-returns (if it is already in return format)
+ tmp <- get.event.count(input,probvalue=prob.value,
+ value="nonreturns")
+ lower.tail.dist <- tmp[1,]
+ upper.tail.dist <- tmp[2,]
+
+ #-----------------------------
+ # Naming the tail distribution
+ #-----------------------------
+ mylist <- list(lower.tail.dist,upper.tail.dist)
+ names(mylist) <- c("lower.tail", "upper.tail")
+ return(mylist)
+}
+
+# Functions used in event count calculation
+get.event.count <- function(series,
+ probvalue=5,
+ value="returns"){
+ # Extracting dataset
+ tmp.old <- gen.data(series,probvalue,value)
+ tmp <- get.clusters.formatted(event.series=series,
+ response.series=series,
+ probvalue,
+ event.value=value,
+ response.value=value)
+
+ cp <- tmp[,"cluster.pattern"]
+ lvl <- as.numeric(levels(as.factor(cp)))
+ lvl.use <- lvl[which(lvl>1)]
+ # Calculating Total events
+ tot.ev.l <- length(which(tmp.old[,"left.tail"]==1))
+ tot.ev.r <- length(which(tmp.old[,"right.tail"]==1))
+ # Calculating Unclustered events
+ un.clstr.l <- length(which(tmp[,"left.tail"]==1 &
+ tmp[,"cluster.pattern"]==1))
+ un.clstr.r <- length(which(tmp[,"right.tail"]==1 &
+ tmp[,"cluster.pattern"]==1))
+ # Calculating Used clusters
+ us.cl.l <- us.cl.r <- NULL
+ for(i in 1:length(lvl.use)){
+ tmp1 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] &
+ tmp[,"left.tail"]==1))*lvl.use[i]
+ tmp2 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] &
+ tmp[,"right.tail"]==1))*lvl.use[i]
+ us.cl.l <- sum(us.cl.l,tmp1,na.rm=TRUE)
+ us.cl.r <- sum(us.cl.r,tmp2,na.rm=TRUE)
+ }
+
+ # Making a table
+ tb <- data.frame(matrix(NA,2,6))
+ colnames(tb) <- c("unclstr","used.clstr","removed.clstr","tot.clstr","Tot","Tot.used")
+ rownames(tb) <- c("lower","upper")
+ tb[,"Tot"] <- c(tot.ev.l,tot.ev.r)
+ tb[,"unclstr"] <- c(un.clstr.l,un.clstr.r)
+ tb[,"used.clstr"] <- c(us.cl.l,us.cl.r)
+ tb[,"Tot.used"] <- tb$unclstr+tb$used.clstr
+ tb[,"tot.clstr"] <- tb$Tot-tb$unclstr
+ tb[,"removed.clstr"] <- tb$tot.clstr-tb$used.clstr
+
+ return(tb)
+}
+
+####################################
+# Quantile values for extreme events
+####################################
+#-----------------------------------
+# INPUT:
+# 'input': Data series in time series format
+# Note: The input series expects the input to be in levels not in returns,
+# if the some the inputs are already in return formats one has to
+# use the other variable 'already.return.series'
+# 'already.return.series': column name is to be given which already has
+# return series in the data-set
+# Functions used: get.clusters.formatted()
+# OUTPUT:
+# Lower tail and Upper tail quantile values
+#-----------------------------------
+quantile.extreme.values <- function(input, prob.value){
+ # Creating an empty frame
+ no.var <- NCOL(input)
+ lower.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6))
+ upper.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6))
+ colnames(lower.tail.qnt.value) <- c("Min","25%","Median","75%","Max",
+ "Mean")
+ rownames(lower.tail.qnt.value) <- colnames(input)
+ colnames(upper.tail.qnt.value) <- c("Min","25%","Median","75%","Max",
+ "Mean")
+ rownames(upper.tail.qnt.value) <- colnames(input)
+ # Estimating cluster count
+ #--------------------
+ # Formatting clusters
+ #--------------------
+ tmp <- get.clusters.formatted(event.series=input,
+ response.series=input,
+ probvalue=prob.value,
+ event.value="nonreturns",
+ response.value="nonreturns")
+
+ # Left tail
+ tmp.left.tail <- tmp[which(tmp$left.tail==1),
+ "event.series"]
+ df.left <- t(data.frame(quantile(tmp.left.tail,c(0,0.25,0.5,0.75,1))))
+ tmp.left <- round(cbind(df.left,mean(tmp.left.tail)),2)
+ rownames(tmp.left) <- NULL
+ colnames(tmp.left) <- c("0%","25%","Median","75%","100%","Mean")
+ # Right tail
+ tmp.right.tail <- tmp[which(tmp$right.tail==1),
+ "event.series"]
+ df.right <- t(data.frame(quantile(tmp.right.tail,c(0,0.25,0.5,0.75,1))))
+ tmp.right <- round(cbind(df.right,
+ mean(tmp.right.tail)),2)
+ rownames(tmp.right) <- NULL
+ colnames(tmp.right) <- c("0%","25%","Median","75%","100%","Mean")
+
+ lower.tail.qnt.value <- tmp.left
+ upper.tail.qnt.value <- tmp.right
+
+ mylist <- list(lower.tail.qnt.value,upper.tail.qnt.value)
+ names(mylist) <- c("lower.tail", "upper.tail")
+ return(mylist)
+}
+
+##########################
+# Run length distribution
+##########################
+#-----------------------------------
+# INPUT:
+# 'input': Data series in time series format
+# Note: The input series expects the input to be in levels not in returns,
+# if the some the inputs are already in return formats one has to
+# use the other variable 'already.return.series'
+# 'already.return.series': column name is to be given which already has
+# return series in the data-set
+# Functions used: get.clusters.formatted()
+# get.cluster.distribution()
+# numbers2words()
+# OUTPUT:
+# Lower tail and Upper tail Run length distribution
+#-----------------------------------
+runlength.dist <- function(input, prob.value){
+
+ # Creating an empty frame
+ no.var <- NCOL(input)
+
+ # Finding maximum Run length
+ # Seed value
+ max.runlength <- 0
+ #---------------------------
+ # Estimating max. Run length
+ #---------------------------
+ tmp <- get.clusters.formatted(event.series=input,
+ response.series=input,
+ probvalue=prob.value,
+ event.value="nonreturns",
+ response.value="nonreturns")
+
+ tmp.runlength <- get.cluster.distribution(tmp,"event.series")
+ max.runlength <- max(max.runlength,as.numeric(colnames(tmp.runlength)[NCOL(tmp.runlength)]))
+
+ # Generating empty frame
+ col.names <- seq(2:max.runlength)+1
+ lower.tail.runlength <- data.frame(matrix(NA,nrow=no.var,
+ ncol=length(col.names)))
+ upper.tail.runlength <- data.frame(matrix(NA,nrow=no.var,
+ ncol=length(col.names)))
+ colnames(lower.tail.runlength) <- col.names
+ rownames(lower.tail.runlength) <- colnames(input)
+ colnames(upper.tail.runlength) <- col.names
+ rownames(upper.tail.runlength) <- colnames(input)
+
+ #----------------------
+ # Run length estimation
+ #----------------------
+ tmp.res <- get.cluster.distribution(tmp,"event.series")
+ for(j in 1:length(colnames(tmp.res))){
+ col.number <- colnames(tmp.res)[j]
+ lower.tail.runlength[1,col.number] <- tmp.res[1,col.number]
+ upper.tail.runlength[1,col.number] <- tmp.res[2,col.number]
+ }
+
+ # Replacing NA's with zeroes
+ lower.tail.runlength[is.na(lower.tail.runlength)] <- 0
+ upper.tail.runlength[is.na(upper.tail.runlength)] <- 0
+
+ # creating column names
+ word.cn <- NULL
+ for(i in 1:length(col.names)){
+ word.cn[i] <- numbers2words(col.names[i])
+ }
+ colnames(lower.tail.runlength) <- word.cn
+ colnames(upper.tail.runlength) <- word.cn
+ mylist <- list(lower.tail.runlength,upper.tail.runlength)
+ names(mylist) <- c("lower.tail", "upper.tail")
+ return(mylist)
+}
+
+#-------------------------
+# Get cluster distribution
+#-------------------------
+# Input for this function is the output of get.cluster.formatted
+get.cluster.distribution <- function(tmp,variable){
+ # Extract cluster category
+ cp <- tmp[,"cluster.pattern"]
+ lvl <- as.numeric(levels(as.factor(cp)))
+ lvl.use <- lvl[which(lvl>1)]
+ # Get numbers for each category
+ tb <- data.frame(matrix(NA,2,length(lvl.use)))
+ colnames(tb) <- as.character(lvl.use)
+ rownames(tb) <- c(paste(variable,":lower tail"),
+ paste(variable,":upper tail"))
+ for(i in 1:length(lvl.use)){
+ tb[1,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i]
+ & tmp[,"left.tail"]==1))
+ tb[2,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i]
+ & tmp[,"right.tail"]==1))
+
+ }
+ return(tb)
+}
+
+#----------------------------
+# Converting numbers to words
+#----------------------------
+numbers2words <- function(x){
+ helper <- function(x){
+ digits <- rev(strsplit(as.character(x), "")[[1]])
+ nDigits <- length(digits)
+ if (nDigits == 1) as.vector(ones[digits])
+ else if (nDigits == 2)
+ if (x <= 19) as.vector(teens[digits[1]])
+ else trim(paste(tens[digits[2]],
+ Recall(as.numeric(digits[1]))))
+ else if (nDigits == 3) trim(paste(ones[digits[3]], "hundred",
+ Recall(makeNumber(digits[2:1]))))
+ else {
+ nSuffix <- ((nDigits + 2) %/% 3) - 1
+ if (nSuffix > length(suffixes)) stop(paste(x, "is too large!"))
+ trim(paste(Recall(makeNumber(digits[
+ nDigits:(3*nSuffix + 1)])),
+ suffixes[nSuffix],
+ Recall(makeNumber(digits[(3*nSuffix):1]))))
+ }
+ }
+ trim <- function(text){
+ gsub("^\ ", "", gsub("\ *$", "", text))
+ }
+ makeNumber <- function(...) as.numeric(paste(..., collapse=""))
+ opts <- options(scipen=100)
+ on.exit(options(opts))
+ ones <- c("", "one", "two", "three", "four", "five", "six", "seven",
+ "eight", "nine")
+ names(ones) <- 0:9
+ teens <- c("ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
+ "sixteen", " seventeen", "eighteen", "nineteen")
+ names(teens) <- 0:9
+ tens <- c("twenty", "thirty", "forty", "fifty", "sixty", "seventy",
+ "eighty",
+ "ninety")
+ names(tens) <- 2:9
+ x <- round(x)
+ suffixes <- c("thousand", "million", "billion", "trillion")
+ if (length(x) > 1) return(sapply(x, helper))
+ helper(x)
+}
Added: pkg/data/IdentifyExevent.rda
===================================================================
(Binary files differ)
Property changes on: pkg/data/IdentifyExevent.rda
___________________________________________________________________
Added: svn:mime-type
+ application/octet-stream
Added: pkg/man/exact.pattern.location.Rd
===================================================================
--- pkg/man/exact.pattern.location.Rd (rev 0)
+++ pkg/man/exact.pattern.location.Rd 2013-02-07 07:46:07 UTC (rev 31)
@@ -0,0 +1,39 @@
+\name{exact.pattern.location}
+\alias{exact.pattern.location}
+
+\title{
+ Gets the location of the pattern
+}
+
+\description{
+ The function gives the exact location of the exact pattern in the series.
+ }
+
+\usage{
+exact.pattern.location(us,pt,pt.len)
+}
+
+\arguments{
+ \item{us}{It is the series in which the location of the pattern is to
+ be found}
+ \item{pt}{It is the pattern which is to be searched in the series for
+ eg. 1}
+ \item{pt.len}{It is the length of the pattern which is to be searched
+ for}
+}
+
+\value{
+Output is the row number of the start of the pattern
+}
+
+
+\examples{
+# Loading data
+data(IdentifyExevent)
+# Series input
+input <- diff(log(input.data[,"sp500"]))
+# Marking left tail and right tail
+res <- gen.data(d=input,probvalue=5,value="nonreturns")
+# Getting summarised run length
+output <- exact.pattern.location(res$left.tail, pt=1, pt.len=2)
+}
\ No newline at end of file
Added: pkg/man/extreme.events.distribution.Rd
===================================================================
--- pkg/man/extreme.events.distribution.Rd (rev 0)
+++ pkg/man/extreme.events.distribution.Rd 2013-02-07 07:46:07 UTC (rev 31)
@@ -0,0 +1,40 @@
+\name{extreme.events.distribution}
+\alias{extreme.events.distribution}
+
+\title{
+ Estimating extreme event distribution of clustered and unclustered data
+}
+
+\description{
+ It is the wrapper over the get.event.count function for estimating
+ extreme event distribution of clustered and unclustered data
+
+ }
+
+\usage{
+extreme.events.distribution(input, prob.value)
+}
+
+\arguments{
+ \item{input}{Series for which cluster distribution is to be checked}
+ \item{prob.value}{It is tail value for which the extreme event is to
+ be defined. For eg: prob.value of 5 will consider 5\% tail on both
+ sides}
+}
+
+\value{
+Output will be the distribution of clustered and unclustered extreme
+event of the series
+}
+
+\seealso{
+ get.event.count
+}
+
+\examples{
+data(IdentifyExevent)
+# Series input
+input <- diff(log(input.data[,"sp500"]))
+output <- extreme.events.distribution(input,prob.value=5)
+
+}
\ No newline at end of file
Added: pkg/man/gen.data.Rd
===================================================================
--- pkg/man/gen.data.Rd (rev 0)
+++ pkg/man/gen.data.Rd 2013-02-07 07:46:07 UTC (rev 31)
@@ -0,0 +1,41 @@
+\name{gen.data}
+\alias{gen.data}
+
+\title{
+Marking upper and lower tail events for extreme event analysis.
+}
+
+\description{
+This function generates a column as left.tail and right.tail which has
+binary numbers. If the observation belongs to left tail then the
+left.tail will be 1 else 0.
+ }
+
+\usage{
+gen.data(d,probvalue,value)
+}
+
+\arguments{
+ \item{d}{'d' is the time-series on which extreme event analysis is
+ done.}
+ \item{probvalue}{It is tail value for which the extreme event is to
+ be defined. For eg: prob.value of 5 will consider 5\% tail on both
+ sides.}
+ \item{value}{If the series 'd' is in returns format then
+ value="nonreturns" else value="returns"}
+}
+
+\value{
+ Output is a data frame with columns as date, value, returns, left.tail
+ which is left tail dummy simmilarly for right tail and both tails.
+
+}
+
+
+\examples{
+data(IdentifyExevent)
+# Series input
+input <- diff(log(input.data[,"sp500"]))
+# Marking left tail and right tail
+res <- gen.data(d=input,probvalue=5,value="nonreturns")
+}
\ No newline at end of file
Added: pkg/man/get.cluster.distribution.Rd
===================================================================
--- pkg/man/get.cluster.distribution.Rd (rev 0)
+++ pkg/man/get.cluster.distribution.Rd 2013-02-07 07:46:07 UTC (rev 31)
@@ -0,0 +1,38 @@
+\name{get.cluster.distribution}
+\alias{get.cluster.distribution}
+
+\title{
+ Estimating runlength distribution for the clusters
+}
+
+\description{
+ Estimating runlength distribution of the clusters in the extreme
+ event.
+ }
+
+\usage{
+get.cluster.distribution(tmp, variable)
+}
+
+\arguments{
+ \item{tmp}{It is the output of the get.clusters.formatted}
+ \item{variable}{Variable on which cluster distribution is to be
+ estimated; variable="event.series"}
+}
+
+\value{
+ Output is the runlength distribution for the clusters
+}
+
+\seealso{
+ get.clusters.formatted
+}
+
+\examples{
+data(IdentifyExevent)
+# Series input
+input <- diff(log(input.data[,"sp500"]))
+tmp <- get.clusters.formatted(event.series=input,response.series=input,
+probvalue=5,event.value="nonreturns",response.value="nonreturns")
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/eventstudies -r 31
More information about the Eventstudies-commits
mailing list