[Blotter-commits] r907 - pkg/FinancialInstrument/inst/parser

Fri Jan 6 22:51:43 CET 2012

Author: gsee
Date: 2012-01-06 22:51:43 +0100 (Fri, 06 Jan 2012)
New Revision: 907

Modified:
   pkg/FinancialInstrument/inst/parser/TRTH_BackFill.R
Log:
use .TRTH envir directly instead of attaching/detaching

Modified: pkg/FinancialInstrument/inst/parser/TRTH_BackFill.R
===================================================================

--- pkg/FinancialInstrument/inst/parser/TRTH_BackFill.R	2012-01-05 16:48:42 UTC (rev 906)
+++ pkg/FinancialInstrument/inst/parser/TRTH_BackFill.R	2012-01-06 21:51:43 UTC (rev 907)
@@ -54,7 +54,7 @@
 #    username = 'email at domain.com',
 #    password = 'password',
 #    default_type = 'guaranteed_spread',
-#    default_currecy = "USD",
+#    default_currency = "USD",
 #    job.name = "ReutersJobName",
 #    overwrite = FALSE,
 #    tick.image = TRUE,
@@ -70,7 +70,9 @@
 
 configureTRTH <- function(config.file, path.output='~/TRTH/', ...) {
     ## Create environment to hold variables that more than one function needs to access    
-    .TRTH <- new.env(parent=.GlobalEnv)
+    if (!exists('.TRTH', .GlobalEnv)) {
+        .TRTH <- new.env(parent=.GlobalEnv)
+    } else .TRTH <- get('.TRTH', pos=.GlobalEnv)
     dargs <- list(...)
 
     ## Load required packages
@@ -96,6 +98,7 @@
     # Some things (subdirectory names) we will create if they aren't in dots or config_file
     pickDirArg <- function(x) {
         if (!is.null(dargs[[x]])) return(dargs[[x]]) #passed through dots
+        if (!is.null(.TRTH[[x]])) return(.TRTH[[x]])        
         if (exists(x)) return(addslash(get(x)))
         addslash(paste(path.output, sub("_dir", "", x), sep=""))
     }
@@ -120,6 +123,7 @@
         # otherwise, if it was in config_file, use that
         # if it's neither in dots, nor in config_file, use default
         if (!is.null(dargs[[x]])) return(dargs[[x]]) #passed through dots
+        if (!is.null(.TRTH[[x]])) return(.TRTH[[x]])
         if (exists(x)) return(get(x))
         default
     }
@@ -158,10 +162,10 @@
         .TRTH <- try(get('.TRTH', pos=.GlobalEnv))
         if (inherits(.TRTH, 'try-error')) stop("Run configureTRTH function first")
     }
-    attach(.TRTH)
+#    attach(.TRTH)
     Sys.umask("0002")
 
-    Archive.output <- list.files(archive_dir)
+    Archive.output <- list.files(.TRTH$archive_dir)
     Archive.output <- Archive.output[grep("\\.gz",Archive.output)]
     omit <- c(grep("confirmation",Archive.output),grep("report",Archive.output))
     if (length(omit) > 0) Archive.output <- Archive.output[-omit]
@@ -171,7 +175,7 @@
     {
         clear <- warnings() #currency loads from oanda alway generate warnings, clear them out
         Reuters <- system(paste("curl ftp://tickhistory-ftp.thomsonreuters.com:15500/results/ -u ",
-                                username,":",password," --ftp-ssl -k -l",sep=""),intern=TRUE)
+                                .TRTH$username,":",.TRTH$password," --ftp-ssl -k -l",sep=""),intern=TRUE)
         cat("\n")
         w <- ''
         w <- warnings()[!warnings() %in% clear]
@@ -189,19 +193,18 @@
     Reuters.report <- Reuters[grep("report",Reuters)]
 
     Reuters.output <-  Reuters[-c(grep("report",Reuters),grep("confirmation",Reuters))]
-    Reuters.output <-  Reuters.output[grep(job.name, Reuters.output)]
+    Reuters.output <-  Reuters.output[grep(.TRTH$job.name, Reuters.output)]
 
     files.gz <- Reuters.output[!(Reuters.output %in% Archive.output)]
     #files.gz <- paste(username, "-", job.name, ".csv.gz", sep="")
     
-    if (length(files.gz) == 0) files.gz <- Reuters.output
-    if (length(files.gz) == 0) stop('Cannot find .gz file containing "job.name" Maybe it has already been purged?')
-    .TRTH$files.gz = files.gz
+    if (length(.TRTH$files.gz) == 0) .TRTH$files.gz <- Reuters.output
+    if (length(.TRTH$files.gz) == 0) stop('Cannot find .gz file containing "job.name" Maybe it has already been purged?')
     assign(".TRTH", .TRTH, pos=.GlobalEnv)
     
-    for(i in 1:length(files.gz))
+    for(i in 1:length(.TRTH$files.gz))
     {	
-        filename.gz <- files.gz[i]
+        filename.gz <- .TRTH$files.gz[i]
         filename.csv <- substr(filename.gz,1,(nchar(filename.gz)-3))
 	
         alias <- unlist(strsplit(filename.gz,"-"))[3]
@@ -213,9 +216,9 @@
         Reuters2 <- 0
         while(!fileflag) #try to download individual files
         {
-            if (!file.exists(paste(archive_dir, filename.gz, sep="")) || overwrite) {
+            if (!file.exists(paste(.TRTH$archive_dir, filename.gz, sep="")) || .TRTH$overwrite) {
                 Reuters2 <- system(paste("curl -m 10800 --max-filesize 1610612736 ftp://tickhistory-ftp.thomsonreuters.com:15500/results/", 
-                                    filename.gz, " -u ", username, ":", password, " --ssl -k > ", archive_dir, filename.gz, sep=""))
+                                    filename.gz, " -u ", .TRTH$username, ":", .TRTH$password, " --ssl -k > ", .TRTH$archive_dir, filename.gz, sep=""))
             } #else cat(paste(filename.gz, 'already exists, and overwrite==FALSE; not re-downloading.'), "\n")
             if(Reuters2 != 0)
             {
@@ -230,19 +233,20 @@
         }
         
 	    ## Download Report s
-        if (!file.exists(paste(archive_dir, Reuters.report[grep(alias,Reuters.report)], sep="")) || overwrite) { 
+        if (!file.exists(paste(.TRTH$archive_dir, Reuters.report[grep(alias,Reuters.report)], sep="")) || .TRTH$overwrite) { 
     	    system(paste("curl ftp://tickhistory-ftp.thomsonreuters.com:15500/results/",
-                        Reuters.report[grep(alias,Reuters.report)], " -u ", username, ":", password,
-                        " --ftp-ssl -k > ", archive_dir, Reuters.report[grep(alias,Reuters.report)], sep=""))
+                        Reuters.report[grep(alias,Reuters.report)], " -u ", .TRTH$username, ":", .TRTH$password,
+                        " --ftp-ssl -k > ", .TRTH$archive_dir, Reuters.report[grep(alias,Reuters.report)], sep=""))
 	        #system(paste("gzip -d -f ",archive_dir,"Report/",Reuters.report[grep(alias,Reuters.report)],sep=""))
 	        cat("\n")
         } #else cat(paste(Reuters.report[grep(alias,Reuters.report)], 
           #      "already exists, and overwrite==FALSE; not re-downloading.\n"))
     }
 
-    #save(files.gz, file=paste(archive_dir, 'files.gz.tmp.rda', sep=""))
+    #save(files.gz, file=paste(.TRTH$archive_dir, 'files.gz.tmp.rda', sep=""))
     #files.gz
-    detach(.TRTH)
+#    detach(.TRTH)
+    assign(".TRTH", .TRTH, pos=.GlobalEnv)
     .TRTH
 }
 
@@ -265,40 +269,40 @@
 splitCSV <- function(.TRTH) {
     #FIXME: respect overwrite argument
     if (missing(.TRTH) && !exists(".TRTH")) stop("Run configureTRTH function first")
-    attach(.TRTH)
-    if (substr(path.output, nchar(path.output), nchar(path.output)) != "/") {
-        .TRTH$path.output <- path.output <- paste(path.output, "/", sep="")
+#    attach(.TRTH)
+    if (substr(.TRTH$path.output, nchar(.TRTH$path.output), nchar(.TRTH$path.output)) != "/") {
+        .TRTH$path.output <- paste(.TRTH$path.output, "/", sep="")
     }
 
-    if (!exists('files.gz')) .TRTH$files.gz <- files.gz <- get_files.gz(archive_dir, job.name)
+    if (is.null(.TRTH$files.gz)) .TRTH$files.gz <- get_files.gz(.TRTH$archive_dir, .TRTH$job.name)
 
-    if (!exists('instrument_file')) { #Don't need this anymore
-        tmp <- list.files(paste(path.output))
-        instrument_file <- paste(path.output, tail(tmp[grep("instruments", tmp)], 1), sep="")
+    if (is.null(.TRTH$instrument_file)) { #Don't need this anymore
+        tmp <- list.files(paste(.TRTH$path.output))
+        instrument_file <- paste(.TRTH$path.output, tail(tmp[grep("instruments", tmp)], 1), sep="")
         if (!file.exists(instrument_file)) {
             stop("Could not find instrument_file; please specify")
         } else .TRTH$instrument_file <- instrument_file
     }
 
-    loadInstruments(instrument_file)
-    registerDoMC(no.cores)
+    loadInstruments(.TRTH$instrument_file)
+    registerDoMC(.TRTH$no.cores)
 
     ## unzip and split (new unzip method does not require rezip; keeps original gz file)
-    setwd(archive_dir)
+    setwd(.TRTH$archive_dir)
 
-    foreach(i = 1:length(files.gz)) %dopar% 
+    foreach(i = 1:length(.TRTH$files.gz)) %dopar% 
     { # unzip in parallel
-        filename.gz <- files.gz[i]
+        filename.gz <- .TRTH$files.gz[i]
         filename.csv <- substr(filename.gz,1,(nchar(filename.gz)-3))
 	    #unzip the file
 	    print(paste("unzipping ",filename.gz, sep=""))
         #system(paste("gzip -d -f ",archive_dir,filename.gz,sep=""))
-        system(paste("gunzip -f < ", archive_dir, filename.gz, " > ", archive_dir, filename.csv, sep=""))
+        system(paste("gunzip -f < ", .TRTH$archive_dir, filename.gz, " > ", .TRTH$archive_dir, filename.csv, sep=""))
     }
 
-    for (i in 1:length(files.gz)) 
+    for (i in 1:length(.TRTH$files.gz)) 
     {
-        filename.gz <- files.gz[i]
+        filename.gz <- .TRTH$files.gz[i]
         filename.csv <- substr(filename.gz,1,(nchar(filename.gz)-3))
         # Use awk to split the big CSV into daily CSVs.  Each CSV will have a single
         # row which we will then overwrite with the column headers.  Then we'll
@@ -310,7 +314,7 @@
         system(paste('awk -v f2="" -F "," '," '",'{f1 = $1"."$2".csv";if(f1 != f2) { print >> f1; close(f2); f2=f1; } }',"' ",filename.csv, sep=""))
     }
 
-    tmpfiles <- list.files(archive_dir)
+    tmpfiles <- list.files(.TRTH$archive_dir)
     files.header <- tmpfiles[grep("RIC",tmpfiles)]        
     big.files <- tmpfiles[grep("@", tmpfiles)] #Big zipped CSVs from Reuters have e-mail address in name
     #big.files <- tmpfiles[grep(job.name, tmpfiles)]
@@ -323,22 +327,21 @@
                 tmpfiles[grep("missing_instruments", tmpfiles)]               
                 )
 
-    files.csv <- tmpfiles[!tmpfiles %in% ignore]
-    .TRTH$files.csv <- files.csv
+    .TRTH$files.csv <- tmpfiles[!tmpfiles %in% ignore]
     # files.header now has several identical rows. Delete all but first by extracting first row, and overwritting file with it
     system(paste('head -1 "', files.header, '" > header.csv', sep="")) # extract 1st line
     # head -1 "RIC.Date[G].csv" > header.csv
     system(paste('mv header.csv "', files.header, '"', sep=""))        # replace files.header with only 1st line
     # mv header.csv "RIC.Date[G].csv"
 
-    for (fl in files.csv) {
-        system(paste('cp "', files.header, '" ', paste(archive_dir, fl, sep=""), sep=""))
+    for (fl in .TRTH$files.csv) {
+        system(paste('cp "', files.header, '" ', paste(.TRTH$archive_dir, fl, sep=""), sep=""))
         #cp "#RIC.Date[G].csv" /home/garrett/TRTH/archive/GEM1-U1.01-APR-2008.csv
     }
 
-    for (j in 1:length(files.gz))
+    for (j in 1:length(.TRTH$files.gz))
     {   
-        filename.gz <- files.gz[j]
+        filename.gz <- .TRTH$files.gz[j]
         filename.csv <- substr(filename.gz,1,(nchar(filename.gz)-3))
 
 	    ## Split the Files 
@@ -353,9 +356,9 @@
 	    print(paste('Done splitting ', filename.csv, sep=""))
 
         # remove header file
-        invisible(file.remove(paste(archive_dir, files.header, sep="")))
+        invisible(file.remove(paste(.TRTH$archive_dir, files.header, sep="")))
         # remove unzipped csv
-        invisible(file.remove(paste(archive_dir, filename.csv, sep="")))
+        invisible(file.remove(paste(.TRTH$archive_dir, filename.csv, sep="")))
 	    ## Zip the File
         # print(paste("zipping ",filename.csv,sep=""))
         # system(paste("gzip -f ",archive_dir,filename.csv,sep=""))
@@ -366,11 +369,11 @@
 
     # Move split CSVs into csv_dir
     files.xts <- NULL
-#    foreach (k = icount(length(files.csv))) %dopar%
-    for (k in 1:length(files.csv))
+#    foreach (k = icount(length(.TRTH$files.csv))) %dopar%
+    for (k in 1:length(.TRTH$files.csv))
     {
         #print(k)
-        name.csv <- files.csv[k]                        # "ASBC.O.08-JAN-2011.csv"
+        name.csv <- .TRTH$files.csv[k]                        # "ASBC.O.08-JAN-2011.csv"
         #name <- unlist(strsplit(name.csv,".",fixed=TRUE))[1]
         spl.name <- unlist(strsplit(name.csv, "\\."))   # "ASBC" "O" "08-JAN-2011" "csv" 
         last2 <- (length(spl.name) - 1):length(spl.name)# 3 4
@@ -385,11 +388,11 @@
         } else make.names(name)
 
         ## Create directory if it does not exist
-        dir.create(paste(csv_dir, date.format, "/", sep=""), showWarnings=FALSE, recursive=TRUE, mode='0775') #mode='0664'
+        dir.create(paste(.TRTH$csv_dir, date.format, "/", sep=""), showWarnings=FALSE, recursive=TRUE, mode='0775') #mode='0664'
 
         ## Move files to appropriate place
         #system(paste("mv -vf ", path.output,"Archives/",name.csv, " ", path.output,date.format,"/",date.format,".",name.new,".csv", sep=""))
-        system(paste("mv -f ", name.csv, " ", csv_dir, date.format, "/", date.format, ".", name.new, ".csv", sep=""))
+        system(paste("mv -f ", name.csv, " ", .TRTH$csv_dir, date.format, "/", date.format, ".", name.new, ".csv", sep=""))
 
         print(paste(date.format, name.new, "moved", sep=" "))
         files.xts <- rbind(files.xts,as.data.frame(cbind(name.new,date.format),stringsAsFactors=FALSE))
@@ -401,7 +404,8 @@
     
     missing_i <- NULL
     instr_s <- unique(files.xts[,'name.new'])
-    print(paste('Defining', length(instr_s[!instr_s %in% ls_instruments()]), 'missing instruments'))
+    alldefined <- c(ls_instruments(), ls_instruments_by('identifiers', NULL))
+    print(paste('Defining', length(instr_s[!instr_s %in% alldefined]), 'missing instruments'))
     missing_list <- list() # list to hold auto-defined missing instruments
     for(i in 1:length(instr_s)){
         instr <- getInstrument(instr_s[i], silent=TRUE)
@@ -410,12 +414,12 @@
             files.xts[files.xts$name.new ==instr_s[i],]$type <- paste(instr$type, collapse=";")
         } else {
             #print(paste(instr_s[i], 'does not appear to be an instrument, setting it to', default_type))
-            iauto <- instrument.auto(instr_s[i], currency=default_currency, 
-                                    default_type=default_type, assign_i=FALSE)
+            iauto <- instrument.auto(instr_s[i], currency=.TRTH$default_currency, 
+                                    default_type=.TRTH$default_type, assign_i=FALSE)
             if (!is.instrument(iauto)) {
-                warning(paste("Could NOT create ", default_type, " from ", 
+                warning(paste("Could NOT create ", .TRTH$default_type, " from ", 
                             instr_s[i], ". Creating _unknown_ instrument instead.", sep=""))
-                iauto <- try(suppressWarnings(instrument.auto(instr_s[i], currency=default_currency,
+                iauto <- try(suppressWarnings(instrument.auto(instr_s[i], currency=.TRTH$default_currency,
                                             default_type="unknown", assign_i=FALSE)))
             }
             missing_list[[iauto$primary_id]] <- iauto
@@ -431,35 +435,39 @@
         assign(x$primary_id, x, pos=FinancialInstrument:::.instrument)
     })
 
-    saveInstruments(paste("missing_instr",  format(Sys.time(), "%Y.%m.%d_%H%M%S"), sep='_'), path.output) #If you load this with loadInstruments it will not clobber .instrument
-    # now that we've save only the newly defined instruments, we can load back our other instruments
-    loadInstruments(instrument_file)
+    saveInstruments(paste("missing_instr",  format(Sys.time(), "%Y.%m.%d_%H%M%S"), sep='_'), .TRTH$path.output)
+    # now that we've saved only the newly defined instruments, we can load back our other instruments
+    loadInstruments(.TRTH$instrument_file)
     if (!is.null(iauto)) {
         .TRTH$missing_i <- missing_i <- data.frame(symbol=missing_i,type=iauto$type[1]) #legacy
-        write.csv(missing_i,file=paste(path.output,'missing_instruments.CSV',sep='')) 
+        write.csv(missing_i,file=paste(.TRTH$path.output,'missing_instruments.CSV',sep='')) 
     }
     .TRTH$files.xts <- files.xts
     assign('.TRTH', .TRTH, pos=.GlobalEnv)
-    detach(.TRTH)
+    #detach(.TRTH)
     .TRTH
 }
 
 
 FEreut2xts <- function(.TRTH) {
     if (missing(.TRTH) && !exists(".TRTH")) stop("Run configureTRTH function first")
-    attach(.TRTH)
+    #attach(.TRTH)
     # Make sure csv_dir exists since it is where we read the data from
-    if (!file.exists(csv_dir)) stop("There is no directory", paste(csv_dir))
-    if (!exists('files.xts')) stop("Cannot find 'files.xts' -- Run splitCSV first")
+    if (!file.exists(.TRTH$csv_dir)) stop("There is no directory", paste(.TRTH$csv_dir))
+    if (is.null(.TRTH$files.xts)) stop("Cannot find 'files.xts' -- Run splitCSV first")
+
+    files.xts <- .TRTH$files.xts
+
     oldTZ <- Sys.getenv("TZ")
     Sys.setenv(TZ='GMT')
 
     write.tick <- TRUE #if the tickdata file already exists and overwrite==FALSE, this will be set to FALSE
     write.sec <- TRUE #if the secdata file already exists and overwrite==FALSE, this will be set to FALSE
 
-    nc <- nchar(path.output) # make sure path.output ends with a forward slash
-    if(substr(path.output, nc, nc) != "/") path.output <- paste(path.output, "/", sep="") 
 
+    nc <- nchar(.TRTH$path.output) # make sure path.output ends with a forward slash
+    if(substr(.TRTH$path.output, nc, nc) != "/") .TRTH$path.output <- paste(.TRTH$path.output, "/", sep="") 
+
     # Function that we'll use to save charts of the data
     makeImages <- function(Data, dir, RIC, date) {
         stopifnot(file.exists(paste(dir, RIC, sep="")))
@@ -489,18 +497,18 @@
         date=files.xts[ii, 2] 
         type=files.xts[ii, 3]        
 
-        file.name.xts <- paste(tick_dir, RIC, "/", date, ".", RIC, ".RData", sep="")
-        file.name.sec <- paste(sec_dir, RIC, "/", date, ".", RIC, ".RData", sep="")	
-        if(!isTRUE(overwrite)) {
+        file.name.xts <- paste(.TRTH$tick_dir, RIC, "/", date, ".", RIC, ".RData", sep="")
+        file.name.sec <- paste(.TRTH$sec_dir, RIC, "/", date, ".", RIC, ".RData", sep="")	
+        if(!isTRUE(.TRTH$overwrite)) {
             if (file.exists(file.name.xts)){
 	            cat(paste(file.name.xts, "already exists, not overwriting\n"))
                 write.tick <- FALSE
-                tick.image <- FALSE
+                .TRTH$tick.image <- FALSE
             }
             if (file.exists(file.name.sec)) {
             	cat(paste(file.name.sec, "already exists, not overwriting\n"))
                 write.sec <- FALSE
-                sec.image <- FALSE
+                .TRTH$sec.image <- FALSE
             }
         }
 
@@ -510,7 +518,7 @@
         # there is nothing to be done -- return NULL
         if (!any(c(write.tick, write.sec))) return(NULL) 
 
-        CSV.name <- paste(csv_dir, date, '/', date, '.', RIC, '.csv', sep="")
+        CSV.name <- paste(.TRTH$csv_dir, date, '/', date, '.', RIC, '.csv', sep="")
         if (!file.exists(CSV.name) && file.exists(paste(CSV.name, ".gz", sep=""))) {
             #only zipped file on disk. We'll have to unzip.
             system(paste("gzip -d -f ", CSV.name, ".gz", sep=""))
@@ -610,28 +618,29 @@
         if(dim(Data)[1]<=25){return(NULL)}
 
         if(write.tick) {
-            dir.create(paste(tick_dir, RIC, sep=""), showWarnings=FALSE, mode='0775')
+            dir.create(paste(.TRTH$tick_dir, RIC, sep=""), showWarnings=FALSE, mode='0775')
             assign(RIC, Data)  # Rename Data to RIC
             save(list=RIC, file=file.name.xts)
         }
 
         datarange <- range(index(Data),na.rm = TRUE)
         datarange.dif <- difftime(datarange[2],datarange[1],units="secs")
-        if(isTRUE(tick.image) && datarange.dif>3600) makeImages(Data, tick_dir, RIC, date)
+        if(isTRUE(.TRTH$tick.image) && datarange.dif>3600) makeImages(Data, .TRTH$tick_dir, RIC, date)
 
         # Convert to 1 second data and save
         if (write.sec) {
-            dir.create(paste(sec_dir, RIC, "/", sep=""), showWarnings=FALSE, mode='0775')
+            dir.create(paste(.TRTH$sec_dir, RIC, "/", sep=""), showWarnings=FALSE, mode='0775')
             secData <- to_secBATV(Data)
             if (length(secData) == 0) return(NULL)
             assign(RIC, secData)
             save(list=RIC, file=file.name.sec)
         }
-        if (isTRUE(sec.image) && datarange.dif > 3600) makeImages(Data, sec_dir, RIC, date)
+        if (isTRUE(.TRTH$sec.image) && datarange.dif > 3600) makeImages(Data, .TRTH$sec_dir, RIC, date)
     } # End foreach loop
     #rm(list = 'RIC')
     Sys.setenv(TZ=oldTZ)
-    save(.TRTH, file=paste(path.output, 'config.env.RData', sep=""))
+    save(.TRTH, file=paste(.TRTH$path.output, 'config.env.RData', sep=""))
+    assign('.TRTH', .TRTH, pos=.GlobalEnv)
     Out
 }  ## End fn reut2xts