[Blotter-commits] r1120 - pkg/FinancialInstrument/sandbox
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Sun Aug 5 19:09:37 CEST 2012
Author: braverock
Date: 2012-08-05 19:09:37 +0200 (Sun, 05 Aug 2012)
New Revision: 1120
Added:
pkg/FinancialInstrument/sandbox/calcHedgeRatio.R
Log:
- initial version of hedge ratio wrapper, not done enough for main repository yet
Added: pkg/FinancialInstrument/sandbox/calcHedgeRatio.R
===================================================================
--- pkg/FinancialInstrument/sandbox/calcHedgeRatio.R (rev 0)
+++ pkg/FinancialInstrument/sandbox/calcHedgeRatio.R 2012-08-05 17:09:37 UTC (rev 1120)
@@ -0,0 +1,323 @@
+
+minSpreadVarWeights <- function(series,var.method='var',interval=c(-10,10)) {
+ #returns weight of 1st symbol. Weight of 2nd Symbol = (1-weight of prc1)
+ #if (length(symbols) < 2) stop("You must provide a list containing at least 2 instrument names")
+ #if (length(symbols) > 2) warning("only using 1st 2 symbols")
+ ab.prc<-series
+ if (length(ab.prc) > 0) {
+ if (var.method == 'sd') {
+ f <- function(x) {
+ sd(x * ab.prc[,1] - ((1-x) * ab.prc[,2]))
+ }
+ } else {
+ f <- function(x) {
+ var(x * ab.prc[,1] - ((1-x) * ab.prc[,2]))
+ }
+ }
+ tmp <- optimize(f, interval)$minimum
+ if(tmp < -10 || tmp > 10) warning('solution at extreme, use larger interval')
+
+ #tmp <- nlm(f,guess)$estimate
+ out <- c(tmp, 1-tmp)
+ } else {
+ out <- NA
+ }
+ out
+}
+
+
+#minSpreadVarRatio: solve for weights where variance of ( w1*p1 - (1-w2)*p2 ) is at a minimum
+m1 <- minSpreadVarRatio <- function(series,var.method='var',interval=c(-10,10)) {
+ #Returns number of 2nd symbol to short for 1 long in 1st symbol
+ w <- minSpreadVarWeights(series,var.method=var.method,interval=interval)
+ w[2]/w[1]
+}
+
+#stdVarRatio: qty of instrument 2 that you need for long 1 in instrument 1 = ((p1*var(p1))/(p2*var(p2)))
+stdVarRatio <- function(priceSeries,retSeries,use.price='last',var.method='var') {
+ #returns weight of 1st symbol. Weight of 2nd Symbol = (1-weight of prc1)
+ ab.prc<-priceSeries
+ ab.ret<-retSeries
+ if (length(ab.prc) > 0) {
+ names(ab.prc) <- c('prc1','prc2')
+
+ #if (use == 'price' | use =='Price') ab.ret <- ab.prc
+ #else ab.ret <- diff(log(ab.prc))[-1]
+ #returns qty of second instrument you should short for each unit of 1st instrument
+ #implicicly assumes 100% correlation
+ #I use var below because it's faster, but maybe sd is better
+ if (var.method=='sd') {
+ if (use.price=='first') tmp <- as.numeric((first(ab.prc[,1]) * sd(ab.ret[,1]))/(first(ab.prc[,2]) * sd(ab.ret[,2])))
+ else tmp <- as.numeric((last(ab.prc[,1]) * sd(ab.ret[,1]))/(last(ab.prc[,2]) * sd(ab.ret[,2])))
+ } else {
+ if (use.price=='first') tmp <- as.numeric((first(ab.prc[,1]) * var(ab.ret[,1]))/(first(ab.prc[,2]) * var(ab.ret[,2])))
+ else tmp <- as.numeric((last(ab.prc[,1]) * var(ab.ret[,1]))/(last(ab.prc[,2]) * var(ab.ret[,2])))
+ }
+ } else tmp <- NA
+ tmp
+}
+m2 <- stdPriceVarRatio <- function(priceSeries,retSeries,use.price='last',var.method='var')
+ stdVarRatio(priceSeries=priceSeries,retSeries=retSeries,use.price=use.price,var.method='var')
+m3 <- stdReturnVarRatio <- function(priceSeries,retSeries,use.price='last',var.method='var')
+ stdVarRatio(priceSeries=priceSeries,retSeries=retSeries,use.price=use.price,var.method='var')
+m4 <- stdPriceStdevRatio <- function(priceSeries,retSeries,use.price='last',var.method='sd')
+ stdVarRatio(priceSeries=priceSeries,retSeries=retSeries,use.price=use.price,var.method='sd')
+m5 <- stdReturnStdevRatio <- function(priceSeries,retSeries,use.price='last',var.method='sd')
+ stdVarRatio(priceSeries=priceSeries,retSeries=retSeries,use.price=use.price,var.method='sd')
+
+
+#stdVarRatio(c('CO_4','QS_7'))
+
+#beta (price,or return): regress instrument 2 on instrument 1. qty of instrument 2 that you need = coefficient of correlation
+betaRatio <- function(series) {
+ ab.prc <- series
+ #merge(prc1,prc2,all=F)
+ if (length(ab.prc) > 0) {
+
+ # if (use=='return' | use=='ret') ab.prc <- diff(log(ab.prc),na.pad=FALSE)
+ # if (use=='change' | use=='chg') ab.prc <- diff(ab.prc,na.pad=FALSE)
+ if (length(ab.prc[!any(is.nan(ab.prc))]) > 0) {
+ names(ab.prc) <- c('prc1','prc2')
+ m <- lm(prc1 ~ prc2 + 0, data=ab.prc)
+ tmp <- as.numeric(coef(m)[1])
+ #equivalently:
+ #tmp <- as.numeric(cov(ab.prc[,1],ab.prc[,2])/var(ab.prc[,2]))
+ } else tmp <- NA
+ } else tmp <- NA
+ tmp
+}
+regPriceRatio <- function(series) betaRatio(series,use=use) #==?
+regPrcChgRatio <- function(series) betaRatio(series,use=use) #==onChg
+regReturnRatio <- function(series) betaRatio(series,use=use) #==onLogRets
+
+
+pcaRatio <- function(series) {
+ ab.prc <- series
+
+ if (length(ab.prc[!any(is.nan(ab.prc))]) > 0) {
+
+ names(ab.prc) <- c('prc1','prc2')
+ r <- princomp(ab.prc)
+ tmp <- r$loadings[2,1] / r$loadings[1,1] #slope
+ } else tmp <- NA
+ tmp
+}
+
+
+#equalizedRatio: solve for ratio that makes price of spread at first time equal to price of spread at last time.
+equalizedRatio <- function(series) {
+ ab.prc <- series
+
+ if (length(ab.prc) > 0) {
+ names(ab.prc) <- c('prc1','prc2')
+
+ tmp <- (as.numeric(first(ab.prc[,1]))-as.numeric(last(ab.prc[,1])))/
+ (as.numeric(first(ab.prc[,2]))-as.numeric(last(ab.prc[,2])))
+ } else tmp <- NA
+ tmp
+}
+
+MADratio<-function(barData){
+ #prc<-getPrice(rawdata)
+ #ohlcprc<-to.period(rawdata,on=minutes',k=10)
+ #ohlcprc$range<-abs(Hi(ohlcprc)-Lo(ohlcprc))
+ colnames(barData)<-c("O1","H1","L1","C1","O2","H2","L2","C2")
+ barData$Range1=abs(barData$H1-barData$L1)
+ barData$Range2=abs(barData$H2-barData$L2)
+ Ratio=mean(barData$Range1)/mean(barData$Range2)
+ Ratio
+}
+
+covRatio <-function(series) {
+ xtmp<-cov(series)
+ covRat<-sqrt(xtmp[1,1])/sqrt(xtmp[2,2])
+ covRat
+}
+
+#' The calcHedgeRatio function takes in data on symbols, a vector of choices of methods to calculate hedge ratios, and a price.method ('price','return', or 'diff').
+#' Some other arguments include the on and k arguments, with on taking an argument of a certain frequency ('microseconds','milliseconds','seconds',minutes',etc.) and k stating how many units of the on argument pass between each print.
+#' The type of price the time series data will use gets subset by prefer.price.
+#' Use.price determines whether open ('first') prices are used, or anything else (defaults at 'last')
+#' Interval is for the minVar/minSd functions only.
+#' from and to set the date
+#' these are the different hedge ratio options:
+#' minVar/minSd: spread = [weight(A) * price.method(A) * multiplier(A)] - [(1-weight(A)) * price.method(B) * multiplier(B) ]
+#' solve for the value of weight(A) that minimizes the variance/sd of spread. hedgeRatio=weight(B)/weight(A)
+#' 'stdVar'/'stdDev'
+#' calculate the variances of price.method(A) and price.method(B) and set equal to var(A) and var(B)
+#' hedgeRatio = [last(price.method(A)) * var(A) * multiplier(A)] / [last(price.method(B)) * var(B) * multiplier(B)]
+#' where last(price.method(A)) and last(price.method(B)) are the most recent price.methods of A and B.
+#' stdDev is identical, except with var being sd
+#' 'betaRatio'
+#' regress price.method of B on price.method of A. hedgeRatio is the coefficient
+#' set up model like this price.method(A) ~ price.method(B) + 0
+#' pcaRatio--ratio of principal component price methods
+#' equalizedRatio--ratio that has sread values be identical on the first and last days of the sample. Performance drops on out of sample data. Use with caution.
+#' MADRatio--Mean Absolute Deviation--a ratio of ranges of the instruments for the specified time frame.
+#' covRatio--computes the ratio of standard deviations
+#'
+#' @param symbols
+#' @param methods
+#' @param interval
+#' @param use.price
+#' @param price.method
+#' @param prefer.price
+#' @param on period to calculate on, see \code{\link{endpoints}}
+#' @param k number of sub periods to aggregate on
+#' @param from Retrieve data no earlier than this date.
+#' @param to Retrieve data through this date
+#' @param session xts style ISO-8601 time subsetting for intraday sessions
+#' @param \dots any other passthru parameters
+#' @author Ilya Kipnis, Garrett See, Brian G. Peterson
+#' @export
+calcHedgeRatio<-function(symbols,
+ methods=c("minVar","minSd","stdVar","stdDev","betaRatio","pcaRatio","equal","MADratio","covRatio"),
+ # methods=c("minVar"),
+ interval=c(-10,10),
+ use.price='last',
+ price.method='price',
+ prefer.price=NULL,
+ on="minutes",k=1,
+ from='1970-01-01',
+ to=Sys.Date()-1,
+ session=NULL,
+ ...
+)
+{
+
+ if (length(symbols) < 2) stop("You must provide a list containing at least 2 instrument names")
+ if (length(symbols) > 2) warning("only using 1st 2 symbols. patches welcome.")
+
+ mult <- NULL
+ for (Symbol in symbols) {
+ tmp_instr <- try(getInstrument(Symbol))
+ if (inherits(tmp_instr, "try-error") | !is.instrument(tmp_instr)) {
+ warning(paste("Instrument", Symbol, " not found, using contract multiplier of 1"))
+ mult <- c(mult,1)
+ } else {
+ mult <- c(mult,tmp_instr$multiplier)
+ }
+ }
+
+ prc1 <- getSymbols(symbols[1], from=from, to=to, auto.assign=FALSE, ...=...)
+ prc2 <- getSymbols(symbols[2], from=from, to=to, auto.assign=FALSE, ...=...)
+
+ if(is.null(prefer.price)){
+ if(is.BBO(prc1)){
+ prc1<-getPrice(prc1,NULL,prefer="Bid")
+ prc2<-getPrice(prc2,NULL,prefer="Bid")
+ } else {
+ prc1<-getPrice(prc1,NULL,prefer="Ad")
+ prc2<-getPrice(prc2,NULL,prefer="Ad")
+ }
+ } else {
+ prc1<-getPrice(prc1,NULL,prefer=prefer.price)
+ prc2<-getPrice(prc2,NULL,prefer=prefer.price)
+ }
+
+ DF<-cbind(prc1,prc2)
+
+ fn_split_highFreq <- function(DF,session=NULL)
+ {
+ DF.split <- split(DF,"days")
+ ret <- NULL
+
+ for(d in 1:length(DF.split))
+ {
+ tmp <- na.locf(DF.split[[d]])
+ tmp <- na.omit(tmp)
+ ret <- rbind(ret,tmp)
+ }
+ #attr(attr(ret,"index"),"tzone") <- "GMT" # no longer needed?
+ #attr(ret,".indexTZ") <- "GMT" # no longer needed?
+ colnames(ret) <- colnames(DF)
+ if(!is.null(session)) ret<-ret[session]
+ ret
+ }
+
+ fn_split_daily <- function(DF)
+ {
+ ret<-NULL
+ tmp<-na.locf(DF)
+ tmp<-na.omit(tmp)
+ ret<-tmp
+ colnames(ret)<-colnames(DF)
+ ret
+ }
+
+ if(median(diff(.index(prc1)))<86400){
+ series<-fn_split_highFreq(DF,session)
+ } else {
+ series<-fn_split_daily(DF)
+ }
+
+ #notionalize the series
+ series[,1]<-series[,1]*mult[1]
+ series[,2]<-series[,2]*mult[2]
+
+ #TODO FIXME convert one series if the currencies aren't the same
+
+ OHLCprc1<-to.period(series[,1],period=on,k=k)
+ OHLCprc2<-to.period(series[,2],period=on,k=k)
+ barData<-cbind(OHLCprc1,OHLCprc2)
+
+ series<-series[endpoints(series,on=on,k=k),]
+
+ priceSeries<-series #for use with stdVarRatio
+
+ if(length(price.method)!=1){stop("Your price method must be of length one.")}
+ if(price.method=='price'|price.method=='Price'|price.method=='prc'){
+ series<-series #do nothing
+ barData<-barData
+ } else if(price.method=='return'|price.method=='Return'|price.method=='ret'){
+ series<-na.omit(diff(log(series)))
+ barData<-na.omit(diff(log(barData)))
+ } else {
+ series<-na.omit(diff(series))
+ barData<-na.omit(diff(barData))
+ }
+
+ if(length(methods)<=0) stop("You must provide at least one method")
+
+ methodSwitch<-function(series,method){
+ switch(method,
+ minVar = minSpreadVarWeights(series=series,var.method='var',interval=interval),
+ minSd = minSpreadVarWeights(series=series,var.method='sd',interval=interval),
+ stdVar = stdVarRatio(priceSeries=priceSeries,retSeries=series,use.price=use.price,var.method='var'),
+ stdDev = stdVarRatio(priceSeries=priceSeries,retSeries=series,use.price=use.price,var.method='sd'),
+ betaRatio = betaRatio(series),
+ pcaRatio = pcaRatio(series),
+ equal = equalizedRatio(series),
+ MADratio = MADratio(barData),
+ covRatio = covRatio(series)
+ )
+ }
+
+ ansRow<-NULL
+ ansColNames<-c()
+ for(method in methods){
+ ansColNames<-c(ansColNames,method)
+ ansRow<-cbind(ansRow,methodSwitch(series,method))
+ }
+ ansRow<-as.data.frame(ansRow)
+ colnames(ansRow)<-ansColNames
+
+ if(length(which(ansColNames=="minVar")>0)){ #is the minVar method in there?
+ ansRow$minVar<-ansRow$minVar/ansRow$minVar[1] #if so, set it to 1 and HR
+ }
+
+ if(length(which(ansColNames=="equal")>0)){ #same deal with equal
+ ansRow$equal[1]=1
+ ansRow$equal[2]=ansRow$equal[2]+1
+ }
+
+ for(i in 1:length(ansRow[1,])){ #the rest just give the same output twice
+ ansRow[1,i]=1
+ }
+
+ ansRow<-cbind(symbols,ansRow)
+ ansRow$symbols<-as.character(ansRow$symbols)
+
+ return(ansRow)
+
+}
\ No newline at end of file
More information about the Blotter-commits
mailing list