[Eventstudies-commits] r35 - in pkg: . R inst/doc man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Feb 11 13:19:25 CET 2013
Author: vikram
Date: 2013-02-11 13:19:25 +0100 (Mon, 11 Feb 2013)
New Revision: 35
Added:
pkg/R/identifyextremeevents.R
pkg/man/identifyextremeevents.Rd
Removed:
pkg/R/identify.extreme.events.R
pkg/man/identify.extreme.events.Rd
Modified:
pkg/NAMESPACE
pkg/inst/doc/eventstudies.Rnw
pkg/man/eventstudy-package.Rd
Log:
Written conceptual framework for identify extreme events and example for the same; still need to resolve couple of errors; Work in progress
Modified: pkg/NAMESPACE
===================================================================
--- pkg/NAMESPACE 2013-02-08 10:12:51 UTC (rev 34)
+++ pkg/NAMESPACE 2013-02-11 12:19:25 UTC (rev 35)
@@ -1,3 +1,3 @@
-export(inference.Ecar, phys2eventtime, remap.cumsum, remap.cumprod, remap.event.reindex, identify.extreme.events)
-S3method(identify, extreme.events)
+export(inference.Ecar, phys2eventtime, remap.cumsum, remap.cumprod, remap.event.reindex, identifyextremeevents)
+
Deleted: pkg/R/identify.extreme.events.R
===================================================================
--- pkg/R/identify.extreme.events.R 2013-02-08 10:12:51 UTC (rev 34)
+++ pkg/R/identify.extreme.events.R 2013-02-11 12:19:25 UTC (rev 35)
@@ -1,737 +0,0 @@
-
-# Total 16 functions
-############################
-# Identifying extreme events
-############################
-# libraries required
-library(xts)
-#----------------------------------------------------------------
-# INPUT:
-# 'input' : Data series for which extreme events are
-# to be identified. More than one series
-# is permissble. The 'input' should be in time
-# series format.
-# 'prob.value': This is the tail value for which event is
-# to be defined. For eg: prob.value=5 will
-# consider 5% tail on both sides
-#-----------------------------------------------------------------
-# OUTPUT:
-# Result will be in a list of 3 with following tables:
-# 1. Summary statistics
-# a. Summary of whole data-set
-# 2. Lower tail: Extreme event tables
-# a. Distribution of extreme events
-# b. Run length distribution
-# c. Quantile values
-# d. Yearly distribution
-# e. Extreme event data
-# - Clustered, Un-clustered and Both
-# 3. Upper tail: Extreme event tables
-# a. Distribution of extreme events
-# b. Run length distribution
-# c. Quantile values
-# d. Yearly distribution
-# e. Extreme event data
-# - Clustered, Un-clustered and Both
-#------------------------------------------------------------------
-# NOTE:
-identify.extreme.events <- function(input,prob.value){
- no.var <- NCOL(input)
-
- #------------------------------------------------
- # Breaking the function if any input is not given
- #------------------------------------------------
- # For one variable
- # If class of data is not time series
- class.input <- class(input)%in%c("xts","zoo")
- if(class.input==FALSE){
- stop("Input data is not in time series format. Valid 'input' should be of class xts and zoo")
- }
-
- # Converting an xts object to zoo series
- input.class <- length(which(class(input)%in%"xts"))
- if(length(input.class)==1){
- input <- zoo(input)
- }
-
- #-----------------------------------------
- # Event series: Clustered and un-clustered
- #-----------------------------------------
- tmp <- get.clusters.formatted(event.series=input,
- response.series=input,
- probvalue=prob.value,
- event.value="nonreturns",
- response.value="nonreturns")
- tail.events <- tmp[which(tmp$left.tail==1 | tmp$right.tail==1),]
- clustered.tail.events <- tmp[which(tmp$cluster.pattern>1),]
- unclustered.tail.events <- tmp[-which(tmp$cluster.pattern>1),]
- # Left tail data
- left.tail.clustered <- clustered.tail.events[which(clustered.tail.events$left.tail==1),c("event.series","cluster.pattern")]
- left.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$left.tail==1),c("event.series","cluster.pattern")]
- left.all <- tail.events[which(tail.events$left.tail==1),c("event.series","cluster.pattern")]
- # Right tail data
- right.tail.clustered <- clustered.tail.events[which(clustered.tail.events$right.tail==1),c("event.series","cluster.pattern")]
- right.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$right.tail==1),c("event.series","cluster.pattern")]
- right.all <- tail.events[which(tail.events$right.tail==1),c("event.series","cluster.pattern")]
-
- #---------------------
- # Extreme event output
- #---------------------
- # Summary statistics
- summ.st <- sumstat(input)
-
- # Distribtution of events
- event.dist <- extreme.events.distribution(input,prob.value)
-
- # Run length distribution
- runlength <- runlength.dist(input,prob.value)
-
- # Quantile extreme values
- qnt.values <- quantile.extreme.values(input,prob.value)
-
- # Yearly distribution of extreme event dates
- yearly.exevent <- yearly.exevent.dist(input,prob.value)
-
- #---------------------
- # Compiling the output
- #---------------------
- output <- lower.tail <- upper.tail <- list()
- # Compiling lower tail and upper tail separately
- # Lower tail
- lower.tail$data <- list(left.all,left.tail.clustered,
- left.tail.unclustered)
- names(lower.tail$data) <- c("All","Clustered","Un-clustered")
- lower.tail$extreme.event.distribution <- event.dist$lower.tail
- lower.tail$runlength <- runlength$lower.tail
- lower.tail$quantile.values <- qnt.values$lower.tail
- lower.tail$yearly.extreme.event <- yearly.exevent$lower.tail
- # Upper tail
- upper.tail$data <- list(right.all,right.tail.clustered,
- right.tail.unclustered)
- names(upper.tail$data) <- c("All","Clustered","Un-clustered")
- upper.tail$extreme.event.distribution <- event.dist$upper.tail
- upper.tail$runlength <- runlength$upper.tail
- upper.tail$quantile.values <- qnt.values$upper.tail
- upper.tail$yearly.extreme.event <- yearly.exevent$upper.tail
- # Output
- output$data.summary <- summ.st
- output$lower.tail <- lower.tail
- output$upper.tail <- upper.tail
- return(output)
-}
-
-########################################
-# Functions used for formatting clusters
-########################################
-#------------------------
-# Categorzing tail events
-# for ES analysis
-#------------------------
-# Generates returns for the series
-# Mark left tail, right tail events
-gen.data <- function(d,probvalue,value="nonreturns"){
- res <- data.frame(dates=index(d),value=coredata(d))
- if(value=="returns"){
- res$returns <- c(NA,coredata(diff(log(d))*100))
- }else{
- res$returns <- d
- }
- pval <- c(probvalue/100,(1-(probvalue/100)))
- pval <- quantile(res$returns,prob=pval,na.rm=TRUE)
- res$left.tail <- as.numeric(res$returns < pval[1])
- res$right.tail <- as.numeric(res$returns > pval[2])
- res$both.tails <- res$left.tail + res$right.tail
- if(value=="returns"){
- return(res[-1,])
- }else{
- return(res)
- }
-}
-
-
-#-------------------
-# Summarise patterns
-summarise.rle <- function(oneseries){
- tp <- rle(oneseries)
- tp1 <- data.frame(tp$lengths,tp$values)
- tp1 <- subset(tp1,tp1[,2]==1)
- summary(tp1[,1])
-}
-
-# Summarise the pattern of cluster
-summarise.cluster <- function(obj){
- rle.both <- summarise.rle(obj$both.tail)
- rle.left <- summarise.rle(obj$left.tail)
- rle.right <- summarise.rle(obj$right.tail)
- rbind(both=rle.both,left=rle.left,right=rle.right)
-}
-
-# Getting location for the length
-exact.pattern.location <- function(us,pt,pt.len){
- st <- rle(us)
- len <- st$length
- loc.cs <- cumsum(st$length)
- loc <- loc.cs[which(st$values==pt & st$length==pt.len)]-pt.len+1
- return(loc)
-}
-
-# Identify and mark mixed clusters
-identify.mixedclusters <- function(m,j){
- m$remove.mixed <- 0
- rownum <- which(m$pattern==TRUE)
- for(i in 1:length(rownum)){
- nextnum <- rownum[i]+j-1
- twonums <- m$returns[c(rownum[i]:nextnum)] > 0
- if(sum(twonums)==j || sum(twonums)==0){
- next
- }else{
- m$remove.mixed[c(rownum[i]:nextnum)] <- 5
- }
- }
- m
-}
-
-#--------------------
-# Formatting clusters
-#--------------------
-# This function takes does the following transformation:
-#----------------------------------------------------
-# What the function does?
-# i. Get extreme events from event.series
-# ii. Remove all the mixed clusters
-# iii. Get different types cluster
-# iv. Further club the clusters for event series and
-# corresponding response series to get
-# clustered returns
-# v. Throw the output in timeseries format
-#----------------------------------------------------
-# Input for the function
-# event.series = Series in levels or returns on events
-# is to be defined
-# response.series = Series in levels or returns on which
-# response is to be generated
-# prob.value = Tail value for defining an event
-# event.value = What value is to be studied
-# returns or levels
-# Similarly for response.value
-#----------------------------------------------------
-# Output = Formatted clusters in time series format
-#----------------------------------------------------
-get.clusters.formatted <- function(event.series,
- response.series,
- probvalue=5,
- event.value="returns",
- response.value="returns"){
- # Getting levels in event format
- tmp <- gen.data(event.series,
- probvalue=probvalue,
- value=event.value)
- res.ser <- gen.data(response.series,
- probvalue=probvalue,
- value=response.value)
- # Storing old data points
- tmp.old <- tmp
-
- # Get pattern with maximum length
- res <- summarise.cluster(tmp)
- max.len <- max(res[,"Max."])
-
- #------------------------
- # Removing mixed clusters
- #------------------------
- for(i in max.len:2){
- which.pattern <- rep(1,i)
- patrn <- exact.pattern.location(tmp$both.tails,1,i)
- # If pattern does not exist move to next pattern
- if(length(patrn)==0){next}
- tmp$pattern <- FALSE
- tmp$pattern[patrn] <- TRUE
- tmp <- identify.mixedclusters(m=tmp,i)
- me <- length(which(tmp$remove.mixed==5))
-
- if(me!=0){
- tmp <- tmp[-which(tmp$remove.mixed==5),]
- cat("Pattern of:",i,";",
- "Disarded event:",me/i,"\n")
- }
- }
- tmp.nc <- tmp
-
- # Merging event and response series
- tmp.es <- xts(tmp[,-1],as.Date(tmp$dates))
- tmp.rs <- xts(res.ser[,-1],as.Date(res.ser$dates))
- tmp.m <- merge(tmp.es,res.ser=tmp.rs[,c("value","returns")],
- all=F)
-
- # Formatting
- if(event.value=="returns"){
- which.value <- event.value
- }else{
- which.value <- "value"
- }
- # Converting to data.frame
- temp <- as.data.frame(tmp.m)
- temp$dates <- rownames(temp)
- n <- temp
- # Get pattern with maximum length
- res <- summarise.cluster(temp)
- max.len <- max(res[,"Max."])
- cat("Maximum length after removing mixed clusters is",
- max.len,"\n")
- # Marking clusters
- n$cluster.pattern <- n$both.tails
- for(pt.len in max.len:1){
- mark <- exact.pattern.location(n$both.tails,1,pt.len)
- if(length(mark)==0){next}
- n$cluster.pattern[mark] <- pt.len
- }
-
- #-------------------
- # Clustering returns
- #-------------------
- print("Clustering events.")
- for(pt.len in max.len:2){
- rownum <- exact.pattern.location(n$both.tails,1,pt.len)
- # If pattern does not exist
- if(length(rownum)==0){
- cat("Pattern",pt.len,"does not exist.","\n");next
- }
- # Clustering
- while(length(rownum)>0){
- prevnum <- rownum[1]-1
- lastnum <- rownum[1]+pt.len-1
- # Clustering event series
- if(event.value=="returns"){
- newreturns <- (n$value[lastnum]-n$value[prevnum])*100/n$value[prevnum]
- n[rownum[1],c("value","returns")] <- c(n$value[lastnum],newreturns)
- }else{
- newreturns <- sum(n$value[rownum[1]:lastnum],na.rm=T)
- n[rownum[1],c("value","returns")] <- c(n$value[lastnum],newreturns)
- }
- # Clustering response series
- if(response.value=="returns"){
- newreturns.rs <- (n$value.1[lastnum]-n$value.1[prevnum])*100/n$value.1[prevnum]
- n[rownum[1],c("value.1","returns.1")] <- c(n$value.1[lastnum],newreturns.rs)
- }else{
- newreturns <- sum(n$value.1[rownum[1]:lastnum],na.rm=T)
- n[rownum[1],c("value.1","returns.1")] <- c(n$value.1[lastnum],newreturns)
- }
- n <- n[-c((rownum[1]+1):lastnum),]
- rownum <- exact.pattern.location(n$both.tails,1,pt.len)
- }
- }
- # Columns to keep
- cn <- c(which.value,"left.tail","right.tail",
- "returns.1","cluster.pattern")
- tmp.ts <- zoo(n[,cn],order.by=as.Date(n$dates))
- colnames(tmp.ts) <- c("event.series","left.tail","right.tail",
- "response.series","cluster.pattern")
-
- # Results
- return(tmp.ts)
-}
-
-##############################
-# Summary statistics functions
-##############################
-#---------------------------------------------
-# Table 1: Summary statistics
-# INPUT: Time series data-set for which
-# summary statistics is to be estimated
-# OUTPUT: A data frame with:
-# - Values: "Minimum", 5%,"25%","Median",
-# "Mean","75%","95%","Maximum",
-# "Standard deviation","IQR",
-# "Observations"
-#----------------------------------------------
-sumstat <- function(input){
- no.var <- NCOL(input)
- if(no.var==1){input <- xts(input)}
- # Creating empty frame: chassis
- tmp <- data.frame(matrix(NA,nrow=11,ncol=NCOL(input)))
- colnames(tmp) <- colnames(input)
- rownames(tmp) <- c("Min","5%","25%","Median","Mean","75%","95%",
- "Max","sd","IQR","Obs.")
- # Estimating summary statistics
- tmp[1,] <- apply(input,2,function(x){min(x,na.rm=TRUE)})
- tmp[2,] <- apply(input,2,function(x){quantile(x,0.05,na.rm=TRUE)})
- tmp[3,] <- apply(input,2,function(x){quantile(x,0.25,na.rm=TRUE)})
- tmp[4,] <- apply(input,2,function(x){median(x,na.rm=TRUE)})
- tmp[5,] <- apply(input,2,function(x){mean(x,na.rm=TRUE)})
- tmp[6,] <- apply(input,2,function(x){quantile(x,0.75,na.rm=TRUE)})
- tmp[7,] <- apply(input,2,function(x){quantile(x,0.95,na.rm=TRUE)})
- tmp[8,] <- apply(input,2,function(x){max(x,na.rm=TRUE)})
- tmp[9,] <- apply(input,2,function(x){sd(x,na.rm=TRUE)})
- tmp[10,] <- apply(input,2,function(x){IQR(x,na.rm=TRUE)})
- tmp[11,] <- apply(input,2,function(x){NROW(x)})
- tmp <- round(tmp,2)
-
- return(tmp)
-}
-
-######################
-# Yearly summary stats
-######################
-#----------------------------
-# INPUT:
-# 'input': Data series for which event cluster distribution
-# is to be calculated;
-# 'prob.value': Probility value for which tail is to be constructed this
-# value is equivalent to one side tail for eg. if prob.value=5
-# then we have values of 5% tail on both sides
-# Functions used: yearly.exevent.summary()
-# OUTPUT:
-# Yearly distribution of extreme events
-#----------------------------
-yearly.exevent.dist <- function(input, prob.value){
- no.var <- NCOL(input)
- mylist <- list()
- # Estimating cluster count
- #--------------------
- # Formatting clusters
- #--------------------
- tmp <- get.clusters.formatted(event.series=input,
- response.series=input,
- probvalue=prob.value,
- event.value="nonreturns",
- response.value="nonreturns")
-
- tmp.res <- yearly.exevent.summary(tmp)
- tmp.res[is.na(tmp.res)] <- 0
- # Left and right tail
- lower.tail.yearly.exevent <- tmp.res[,1:2]
- upper.tail.yearly.exevent <- tmp.res[,3:4]
- output <- list()
- output$lower.tail <- lower.tail.yearly.exevent
- output$upper.tail <- upper.tail.yearly.exevent
- mylist <- output
-
- return(mylist)
-}
-
-#------------------------------------------------
-# Get yearly no. and median for good and bad days
-#------------------------------------------------
-yearly.exevent.summary <- function(tmp){
- tmp.bad <- tmp[which(tmp[,"left.tail"]==1),]
- tmp.good <- tmp[which(tmp[,"right.tail"]==1),]
- # Bad days
- tmp.bad.y <- apply.yearly(xts(tmp.bad),function(x)nrow(x))
- tmp.bad.y <- merge(tmp.bad.y,apply.yearly(xts(tmp.bad[,1]),function(x)median(x,na.rm=T)))
- index(tmp.bad.y) <- as.yearmon(as.Date(substr(index(tmp.bad.y),1,4),"%Y"))
- # Good days
- tmp.good.y <- apply.yearly(xts(tmp.good),function(x)nrow(x))
- tmp.good.y <- merge(tmp.good.y,apply.yearly(xts(tmp.good[,1]),function(x)median(x,na.rm=T)))
- index(tmp.good.y) <- as.yearmon(as.Date(substr(index(tmp.good.y),1,4),"%Y"))
- tmp.res <- merge(tmp.bad.y,tmp.good.y)
- colnames(tmp.res) <- c("number.baddays","median.baddays",
- "number.gooddays","median.goodays")
- output <- as.data.frame(tmp.res)
- cn <- rownames(output)
- rownames(output) <- sapply(rownames(output),
- function(x)substr(x,nchar(x)-3,nchar(x)))
- return(output)
-}
-
-#############################
-# Getting event segregation
-# - clustered and unclustered
-#############################
-#----------------------------
-# INPUT:
-# 'input': Data series for which event cluster distribution
-# is to be calculated;
-# Note: The input series expects the input to be in levels not in returns,
-# if the some the inputs are already in return formats one has to
-# use the other variable 'already.return.series'
-# 'already.return.series': column name is to be given which already has
-# return series in the data-set
-# 'prob.value': Probility value for which tail is to be constructed this
-# value is equivalent to one side tail for eg. if prob.value=5
-# then we have values of 5% tail on both sides
-# Functions used: get.event.count()
-# OUTPUT:
-# Distribution of extreme events
-#----------------------------
-
-extreme.events.distribution <- function(input,prob.value){
- # Creating an empty frame
- no.var <- NCOL(input)
- lower.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6))
- upper.tail.dist <- data.frame(matrix(NA,nrow=no.var,ncol=6))
- colnames(lower.tail.dist) <- c("Unclustered","Used clusters",
- "Removed clusters","Total clusters",
- "Total","Total used clusters")
- rownames(lower.tail.dist) <- colnames(input)
- colnames(upper.tail.dist) <- c("Unclustered","Used clusters",
- "Removed clusters","Total clusters",
- "Total","Total used clusters")
- rownames(upper.tail.dist) <- colnames(input)
- # Estimating cluster count
- #--------------
- # Cluster count
- #--------------
- # Non-returns (if it is already in return format)
- tmp <- get.event.count(input,probvalue=prob.value,
- value="nonreturns")
- lower.tail.dist <- tmp[1,]
- upper.tail.dist <- tmp[2,]
-
- #-----------------------------
- # Naming the tail distribution
- #-----------------------------
- mylist <- list(lower.tail.dist,upper.tail.dist)
- names(mylist) <- c("lower.tail", "upper.tail")
- return(mylist)
-}
-
-# Functions used in event count calculation
-get.event.count <- function(series,
- probvalue=5,
- value="returns"){
- # Extracting dataset
- tmp.old <- gen.data(series,probvalue,value)
- tmp <- get.clusters.formatted(event.series=series,
- response.series=series,
- probvalue,
- event.value=value,
- response.value=value)
-
- cp <- tmp[,"cluster.pattern"]
- lvl <- as.numeric(levels(as.factor(cp)))
- lvl.use <- lvl[which(lvl>1)]
- # Calculating Total events
- tot.ev.l <- length(which(tmp.old[,"left.tail"]==1))
- tot.ev.r <- length(which(tmp.old[,"right.tail"]==1))
- # Calculating Unclustered events
- un.clstr.l <- length(which(tmp[,"left.tail"]==1 &
- tmp[,"cluster.pattern"]==1))
- un.clstr.r <- length(which(tmp[,"right.tail"]==1 &
- tmp[,"cluster.pattern"]==1))
- # Calculating Used clusters
- us.cl.l <- us.cl.r <- NULL
- for(i in 1:length(lvl.use)){
- tmp1 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] &
- tmp[,"left.tail"]==1))*lvl.use[i]
- tmp2 <- length(which(tmp[,"cluster.pattern"]==lvl.use[i] &
- tmp[,"right.tail"]==1))*lvl.use[i]
- us.cl.l <- sum(us.cl.l,tmp1,na.rm=TRUE)
- us.cl.r <- sum(us.cl.r,tmp2,na.rm=TRUE)
- }
-
- # Making a table
- tb <- data.frame(matrix(NA,2,6))
- colnames(tb) <- c("unclstr","used.clstr","removed.clstr","tot.clstr","Tot","Tot.used")
- rownames(tb) <- c("lower","upper")
- tb[,"Tot"] <- c(tot.ev.l,tot.ev.r)
- tb[,"unclstr"] <- c(un.clstr.l,un.clstr.r)
- tb[,"used.clstr"] <- c(us.cl.l,us.cl.r)
- tb[,"Tot.used"] <- tb$unclstr+tb$used.clstr
- tb[,"tot.clstr"] <- tb$Tot-tb$unclstr
- tb[,"removed.clstr"] <- tb$tot.clstr-tb$used.clstr
-
- return(tb)
-}
-
-####################################
-# Quantile values for extreme events
-####################################
-#-----------------------------------
-# INPUT:
-# 'input': Data series in time series format
-# Note: The input series expects the input to be in levels not in returns,
-# if the some the inputs are already in return formats one has to
-# use the other variable 'already.return.series'
-# 'already.return.series': column name is to be given which already has
-# return series in the data-set
-# Functions used: get.clusters.formatted()
-# OUTPUT:
-# Lower tail and Upper tail quantile values
-#-----------------------------------
-quantile.extreme.values <- function(input, prob.value){
- # Creating an empty frame
- no.var <- NCOL(input)
- lower.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6))
- upper.tail.qnt.value <- data.frame(matrix(NA,nrow=no.var,ncol=6))
- colnames(lower.tail.qnt.value) <- c("Min","25%","Median","75%","Max",
- "Mean")
- rownames(lower.tail.qnt.value) <- colnames(input)
- colnames(upper.tail.qnt.value) <- c("Min","25%","Median","75%","Max",
- "Mean")
- rownames(upper.tail.qnt.value) <- colnames(input)
- # Estimating cluster count
- #--------------------
- # Formatting clusters
- #--------------------
- tmp <- get.clusters.formatted(event.series=input,
- response.series=input,
- probvalue=prob.value,
- event.value="nonreturns",
- response.value="nonreturns")
-
- # Left tail
- tmp.left.tail <- tmp[which(tmp$left.tail==1),
- "event.series"]
- df.left <- t(data.frame(quantile(tmp.left.tail,c(0,0.25,0.5,0.75,1))))
- tmp.left <- round(cbind(df.left,mean(tmp.left.tail)),2)
- rownames(tmp.left) <- NULL
- colnames(tmp.left) <- c("0%","25%","Median","75%","100%","Mean")
- # Right tail
- tmp.right.tail <- tmp[which(tmp$right.tail==1),
- "event.series"]
- df.right <- t(data.frame(quantile(tmp.right.tail,c(0,0.25,0.5,0.75,1))))
- tmp.right <- round(cbind(df.right,
- mean(tmp.right.tail)),2)
- rownames(tmp.right) <- NULL
- colnames(tmp.right) <- c("0%","25%","Median","75%","100%","Mean")
-
- lower.tail.qnt.value <- tmp.left
- upper.tail.qnt.value <- tmp.right
-
- mylist <- list(lower.tail.qnt.value,upper.tail.qnt.value)
- names(mylist) <- c("lower.tail", "upper.tail")
- return(mylist)
-}
-
-##########################
-# Run length distribution
-##########################
-#-----------------------------------
-# INPUT:
-# 'input': Data series in time series format
-# Note: The input series expects the input to be in levels not in returns,
-# if the some the inputs are already in return formats one has to
-# use the other variable 'already.return.series'
-# 'already.return.series': column name is to be given which already has
-# return series in the data-set
-# Functions used: get.clusters.formatted()
-# get.cluster.distribution()
-# numbers2words()
-# OUTPUT:
-# Lower tail and Upper tail Run length distribution
-#-----------------------------------
-runlength.dist <- function(input, prob.value){
-
- # Creating an empty frame
- no.var <- NCOL(input)
-
- # Finding maximum Run length
- # Seed value
- max.runlength <- 0
- #---------------------------
- # Estimating max. Run length
- #---------------------------
- tmp <- get.clusters.formatted(event.series=input,
- response.series=input,
- probvalue=prob.value,
- event.value="nonreturns",
- response.value="nonreturns")
-
- tmp.runlength <- get.cluster.distribution(tmp,"event.series")
- max.runlength <- max(max.runlength,as.numeric(colnames(tmp.runlength)[NCOL(tmp.runlength)]))
-
- # Generating empty frame
- col.names <- seq(2:max.runlength)+1
- lower.tail.runlength <- data.frame(matrix(NA,nrow=no.var,
- ncol=length(col.names)))
- upper.tail.runlength <- data.frame(matrix(NA,nrow=no.var,
- ncol=length(col.names)))
- colnames(lower.tail.runlength) <- col.names
- rownames(lower.tail.runlength) <- colnames(input)
- colnames(upper.tail.runlength) <- col.names
- rownames(upper.tail.runlength) <- colnames(input)
-
- #----------------------
- # Run length estimation
- #----------------------
- tmp.res <- get.cluster.distribution(tmp,"event.series")
- for(j in 1:length(colnames(tmp.res))){
- col.number <- colnames(tmp.res)[j]
- lower.tail.runlength[1,col.number] <- tmp.res[1,col.number]
- upper.tail.runlength[1,col.number] <- tmp.res[2,col.number]
- }
-
- # Replacing NA's with zeroes
- lower.tail.runlength[is.na(lower.tail.runlength)] <- 0
- upper.tail.runlength[is.na(upper.tail.runlength)] <- 0
-
- # creating column names
- word.cn <- NULL
- for(i in 1:length(col.names)){
- word.cn[i] <- numbers2words(col.names[i])
- }
- colnames(lower.tail.runlength) <- word.cn
- colnames(upper.tail.runlength) <- word.cn
- mylist <- list(lower.tail.runlength,upper.tail.runlength)
- names(mylist) <- c("lower.tail", "upper.tail")
- return(mylist)
-}
-
-#-------------------------
-# Get cluster distribution
-#-------------------------
-# Input for this function is the output of get.cluster.formatted
-get.cluster.distribution <- function(tmp,variable){
- # Extract cluster category
- cp <- tmp[,"cluster.pattern"]
- lvl <- as.numeric(levels(as.factor(cp)))
- lvl.use <- lvl[which(lvl>1)]
- # Get numbers for each category
- tb <- data.frame(matrix(NA,2,length(lvl.use)))
- colnames(tb) <- as.character(lvl.use)
- rownames(tb) <- c(paste(variable,":lower tail"),
- paste(variable,":upper tail"))
- for(i in 1:length(lvl.use)){
- tb[1,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i]
- & tmp[,"left.tail"]==1))
- tb[2,i] <- length(which(tmp[,"cluster.pattern"]==lvl.use[i]
- & tmp[,"right.tail"]==1))
-
- }
- return(tb)
-}
-
-#----------------------------
-# Converting numbers to words
-#----------------------------
-numbers2words <- function(x){
- helper <- function(x){
- digits <- rev(strsplit(as.character(x), "")[[1]])
- nDigits <- length(digits)
- if (nDigits == 1) as.vector(ones[digits])
- else if (nDigits == 2)
- if (x <= 19) as.vector(teens[digits[1]])
- else trim(paste(tens[digits[2]],
- Recall(as.numeric(digits[1]))))
- else if (nDigits == 3) trim(paste(ones[digits[3]], "hundred",
- Recall(makeNumber(digits[2:1]))))
- else {
- nSuffix <- ((nDigits + 2) %/% 3) - 1
- if (nSuffix > length(suffixes)) stop(paste(x, "is too large!"))
- trim(paste(Recall(makeNumber(digits[
- nDigits:(3*nSuffix + 1)])),
- suffixes[nSuffix],
- Recall(makeNumber(digits[(3*nSuffix):1]))))
- }
- }
- trim <- function(text){
- gsub("^\ ", "", gsub("\ *$", "", text))
- }
- makeNumber <- function(...) as.numeric(paste(..., collapse=""))
- opts <- options(scipen=100)
- on.exit(options(opts))
- ones <- c("", "one", "two", "three", "four", "five", "six", "seven",
- "eight", "nine")
- names(ones) <- 0:9
- teens <- c("ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
- "sixteen", " seventeen", "eighteen", "nineteen")
- names(teens) <- 0:9
- tens <- c("twenty", "thirty", "forty", "fifty", "sixty", "seventy",
- "eighty",
- "ninety")
- names(tens) <- 2:9
- x <- round(x)
- suffixes <- c("thousand", "million", "billion", "trillion")
- if (length(x) > 1) return(sapply(x, helper))
- helper(x)
-}
Copied: pkg/R/identifyextremeevents.R (from rev 33, pkg/R/identify.extreme.events.R)
===================================================================
--- pkg/R/identifyextremeevents.R (rev 0)
+++ pkg/R/identifyextremeevents.R 2013-02-11 12:19:25 UTC (rev 35)
@@ -0,0 +1,737 @@
+
+# Total 16 functions
+############################
+# Identifying extreme events
+############################
+# libraries required
+library(xts)
+#----------------------------------------------------------------
+# INPUT:
+# 'input' : Data series for which extreme events are
+# to be identified. More than one series
+# is permissble. The 'input' should be in time
+# series format.
+# 'prob.value': This is the tail value for which event is
+# to be defined. For eg: prob.value=5 will
+# consider 5% tail on both sides
+#-----------------------------------------------------------------
+# OUTPUT:
+# Result will be in a list of 3 with following tables:
+# 1. Summary statistics
+# a. Summary of whole data-set
+# 2. Lower tail: Extreme event tables
+# a. Distribution of extreme events
+# b. Run length distribution
+# c. Quantile values
+# d. Yearly distribution
+# e. Extreme event data
+# - Clustered, Un-clustered and Both
+# 3. Upper tail: Extreme event tables
+# a. Distribution of extreme events
+# b. Run length distribution
+# c. Quantile values
+# d. Yearly distribution
+# e. Extreme event data
+# - Clustered, Un-clustered and Both
+#------------------------------------------------------------------
+# NOTE:
+identifyextremeevents <- function(input,prob.value){
+ no.var <- NCOL(input)
+
+ #------------------------------------------------
+ # Breaking the function if any input is not given
+ #------------------------------------------------
+ # For one variable
+ # If class of data is not time series
+ class.input <- class(input)%in%c("xts","zoo")
+ if(class.input==FALSE){
+ stop("Input data is not in time series format. Valid 'input' should be of class xts and zoo")
+ }
+
+ # Converting an xts object to zoo series
+ input.class <- length(which(class(input)%in%"xts"))
+ if(length(input.class)==1){
+ input <- zoo(input)
+ }
+
+ #-----------------------------------------
+ # Event series: Clustered and un-clustered
+ #-----------------------------------------
+ tmp <- get.clusters.formatted(event.series=input,
+ response.series=input,
+ probvalue=prob.value,
+ event.value="nonreturns",
+ response.value="nonreturns")
+ tail.events <- tmp[which(tmp$left.tail==1 | tmp$right.tail==1),]
+ clustered.tail.events <- tmp[which(tmp$cluster.pattern>1),]
+ unclustered.tail.events <- tmp[-which(tmp$cluster.pattern>1),]
+ # Left tail data
+ left.tail.clustered <- clustered.tail.events[which(clustered.tail.events$left.tail==1),c("event.series","cluster.pattern")]
+ left.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$left.tail==1),c("event.series","cluster.pattern")]
+ left.all <- tail.events[which(tail.events$left.tail==1),c("event.series","cluster.pattern")]
+ # Right tail data
+ right.tail.clustered <- clustered.tail.events[which(clustered.tail.events$right.tail==1),c("event.series","cluster.pattern")]
+ right.tail.unclustered <- unclustered.tail.events[which(unclustered.tail.events$right.tail==1),c("event.series","cluster.pattern")]
+ right.all <- tail.events[which(tail.events$right.tail==1),c("event.series","cluster.pattern")]
+
+ #---------------------
+ # Extreme event output
+ #---------------------
+ # Summary statistics
+ summ.st <- sumstat(input)
+
+ # Distribtution of events
+ event.dist <- extreme.events.distribution(input,prob.value)
+
+ # Run length distribution
+ runlength <- runlength.dist(input,prob.value)
+
+ # Quantile extreme values
+ qnt.values <- quantile.extreme.values(input,prob.value)
+
+ # Yearly distribution of extreme event dates
+ yearly.exevent <- yearly.exevent.dist(input,prob.value)
+
+ #---------------------
+ # Compiling the output
+ #---------------------
+ output <- lower.tail <- upper.tail <- list()
+ # Compiling lower tail and upper tail separately
+ # Lower tail
+ lower.tail$data <- list(left.all,left.tail.clustered,
+ left.tail.unclustered)
+ names(lower.tail$data) <- c("All","Clustered","Un-clustered")
+ lower.tail$extreme.event.distribution <- event.dist$lower.tail
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/eventstudies -r 35
More information about the Eventstudies-commits
mailing list