[Qca-commits] r34 - / DDIwR DDIwR/pkg DDIwR/pkg/R DDIwR/pkg/inst DDIwR/pkg/man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mon Aug 11 12:32:21 CEST 2014


Author: dusadrian
Date: 2014-08-11 12:32:21 +0200 (Mon, 11 Aug 2014)
New Revision: 34

Added:
   DDIwR/
   DDIwR/pkg/
   DDIwR/pkg/DESCRIPTION
   DDIwR/pkg/NAMESPACE
   DDIwR/pkg/R/
   DDIwR/pkg/R/getEnter.R
   DDIwR/pkg/R/getFiles.R
   DDIwR/pkg/R/getMetadata.R
   DDIwR/pkg/R/setupfile.R
   DDIwR/pkg/R/splitrows.R
   DDIwR/pkg/R/treatPath.R
   DDIwR/pkg/inst/
   DDIwR/pkg/inst/ChangeLog
   DDIwR/pkg/man/
   DDIwR/pkg/man/DDIwR-internal.Rd
   DDIwR/pkg/man/DDIwR.package.Rd
   DDIwR/pkg/man/getMetadata.Rd
   DDIwR/pkg/man/setupfile.Rd
   DDIwR/www/
Log:
First R-forge version of DDIwR

Added: DDIwR/pkg/DESCRIPTION
===================================================================
--- DDIwR/pkg/DESCRIPTION	                        (rev 0)
+++ DDIwR/pkg/DESCRIPTION	2014-08-11 10:32:21 UTC (rev 34)
@@ -0,0 +1,10 @@
+Package: DDIwR
+Version: 0.1-0
+Date: 2014-07-30
+Title: DDI with R
+Authors at R: c(person("Adrian", "Dusa", role = c("aut", "cre"),
+                      email = "dusa.adrian at unibuc.ro"))
+Depends: R (>= 3.0.0)
+Imports: XML, foreign
+Description: This package provides useful functions for various DDI (Data Documentation Initiative) related outputs.
+License: GPL (>= 2)

Added: DDIwR/pkg/NAMESPACE
===================================================================
--- DDIwR/pkg/NAMESPACE	                        (rev 0)
+++ DDIwR/pkg/NAMESPACE	2014-08-11 10:32:21 UTC (rev 34)
@@ -0,0 +1,8 @@
+export(setupfile,
+       splitrows,
+       treatPath,
+       getFiles,
+       getMetadata,
+       getEnter)
+import(XML)
+import(foreign)


Property changes on: DDIwR/pkg/NAMESPACE
___________________________________________________________________
Added: svn:executable
   + *

Added: DDIwR/pkg/R/getEnter.R
===================================================================
--- DDIwR/pkg/R/getEnter.R	                        (rev 0)
+++ DDIwR/pkg/R/getEnter.R	2014-08-11 10:32:21 UTC (rev 34)
@@ -0,0 +1,20 @@
+getEnter <- function(OS) {
+
+    detectedOS <- Sys.info()[['sysname']]
+    
+    if (OS == "Windows" | OS == "windows" | OS == "Win" | OS == "win") {
+        enter <- ifelse(detectedOS == "Windows", "\n", "\r\n")
+    }
+    else if (OS == "Linux" | OS == "linux") {
+        enter <- "\n"
+    }
+    else if (OS == "Darwin" | OS == "MacOS" | OS == "Apple" | OS == "Mac" | OS == "mac") {
+        enter <- ifelse(detectedOS == "Darwin", "\n", "\r")
+    }
+    else {
+        cat("\n")
+        stop("The specified OS is not supported.\n\n", call. = FALSE)
+    }
+    
+    return(enter)
+}

Added: DDIwR/pkg/R/getFiles.R
===================================================================
--- DDIwR/pkg/R/getFiles.R	                        (rev 0)
+++ DDIwR/pkg/R/getFiles.R	2014-08-11 10:32:21 UTC (rev 34)
@@ -0,0 +1,61 @@
+getFiles <- function(dirpath = ".", type="", currdir) {
+    
+    
+    # get all files
+    files <- list.files(dirpath)
+    
+    if (length(files) == 0) {
+        return(paste("The directory", dirpath, "is empty.\n\n"))
+    }
+    
+    # split all files by "." to separate names from extentions
+    filesplit <- strsplit(files, split="\\.")
+    
+    # get rid of files without extensions, and (sub)directory names
+    noext <- unlist(lapply(filesplit, length))
+    files <- files[noext > 1]
+    filesplit <- filesplit[noext > 1]
+    
+    if (length(filesplit) == 0) {
+        return(paste("The directory \"", dirpath, "\" doesn't contain any known files.\n\n", sep=""))
+    }
+    
+    
+    # get the file extensions
+    fileext <- unlist(lapply(filesplit, function(x) {
+        # we want the last part of the split
+        return(x[length(x)])
+    }))
+    
+    
+    if (type != "*") {
+        # check if there is any file with the right extension
+        fileidxs <- which(toupper(fileext) == toupper(type))
+        
+        if (length(fileidxs) == 0) {
+            return(paste("There is no .", type, " type file in the directory \"", dirpath, "\"\n\n", sep=""))
+        }
+        
+        # if code survives this far, filter all the "right" files from all files
+        files <- files[fileidxs]
+    }
+    
+    # split the files again, just in case some of them were not the right type
+    filesplit <- strsplit(files, split="\\.")
+    
+    # get the file names
+    # the code below is necessary just in case the filename contains a "."
+    # e.g. test.1.R
+    filenames <- unlist(lapply(filesplit, function(x) {
+        # we want all parts except the last, to restore the original filename
+        return(paste(x[-length(x)], collapse="."))
+    }))
+    
+    # get the file extensions again
+    fileext <- unlist(lapply(filesplit, function(x) {
+        return(x[length(x)])
+    }))
+    
+    return(list(files=files, filenames=filenames, fileext=fileext))
+    
+}

Added: DDIwR/pkg/R/getMetadata.R
===================================================================
--- DDIwR/pkg/R/getMetadata.R	                        (rev 0)
+++ DDIwR/pkg/R/getMetadata.R	2014-08-11 10:32:21 UTC (rev 34)
@@ -0,0 +1,144 @@
+require(XML)
+getMetadata <- function(xmlpath, OS = "windows", saveFile=FALSE, ...) {
+    
+    # TODO: detect DDI version or ask the version through a dedicated argument
+    
+    
+    other.args <- list(...)
+    enter <- getEnter(OS=OS)
+    
+    fromsetupfile <- FALSE
+    if ("fromsetupfile" %in% names(other.args)) {
+        fromsetupfile <- other.args$fromsetupfile
+    }
+    
+    tp <- treatPath(xmlpath, type="XML")
+    
+    ### !!! ###
+    # NEVER use getNodeSet() it's toooooo slooooow!!!
+    # use instead xmlElementsByTagName()
+    
+    currdir <- getwd()
+    if (saveFile) {
+        setwd(tp$completePath)
+    }
+    
+    singlefile <- length(tp$files) == 1
+    
+    if (!fromsetupfile) {
+        cat("Processing:\n")
+    }
+    
+    
+    for (ff in seq(length(tp$files))) {
+        if (!fromsetupfile) {
+            cat(tp$files[ff], "\n")
+        }
+        
+        if (saveFile) {
+            sink(paste(tp$filenames[ff], "R", sep="."))
+        }
+        
+        dd <- xmlTreeParse(tp$files[ff])$doc$children$codeBook
+        dd <- xmlElementsByTagName(dd, "dataDscr")[[1]]
+        dd <- xmlElementsByTagName(dd, "var")
+              
+        xmlVarNames <- as.vector(sapply(dd, xmlGetAttr, "name"))
+        # return(drop(xmlVarNames))
+        
+        metadata <- list()
+        metadata$varlab <- list()
+        metadata$vallab <- list()
+        
+        if (saveFile) {
+            cat("metadata <- list()", enter)
+            cat("metadata$varlab <- list()", enter)
+            cat("metadata$vallab <- list()", enter, enter)
+        }
+        
+        for (i in seq(length(dd))) {
+            
+            # metadata$varlab[[xmlVarNames[i]]] <- xmlValue(getNodeSet(dd[[i]], "//labl[@level='variable']")[[1]])
+            varlab <- xmlValue(xmlElementsByTagName(dd[[i]], "labl")[[1]])
+            varlab <- gsub("\"", "'", varlab)
+            varlab <- gsub("\\\\", "/", varlab)
+            metadata$varlab[[xmlVarNames[i]]] <- varlab
+            
+            if (saveFile) {
+                cat(paste("metadata$varlab$", xmlVarNames[i], " <- \"", varlab, "\"", enter, sep=""))
+            }
+            
+            #vallabs <- unlist(lapply(getNodeSet(dd[[i]], "//labl[@level='category']"), xmlValue))
+            vallabs <- xmlElementsByTagName(dd[[i]], "catgry")
+            
+            if (length(vallabs) > 0) {
+                
+                # metadata$vallab[[xmlVarNames[i]]] <- unlist(lapply(getNodeSet(dd[[i]], "//catValu"), xmlValue))
+                values <- as.vector(unlist(lapply(lapply(vallabs, xmlElementsByTagName, "catValu"), function(x) {
+                    return(xmlValue(x[[1]][[1]]))
+                })))
+                values <- gsub("\"", "'", values)
+                values <- gsub("\\\\", "/", values)
+                
+                labl <- as.vector(lapply(vallabs, xmlElementsByTagName, "labl"))
+                havelbls <- unlist(lapply(labl, function(x) length(x) > 0))
+                
+                values <- values[havelbls]
+                labl <- labl[havelbls]
+                
+                if (length(values) > 0) {
+                    metadata$vallab[[xmlVarNames[i]]] <- values
+                    testNum <- tryCatch(as.numeric(values),
+                                        warning = function(x) {
+                                                     return("...string...!!!")
+                                        })
+                    
+                    if (all(testNum != "...string...!!!")) {
+                        metadata$vallab[[xmlVarNames[i]]] <- testNum
+                        
+                        if (saveFile) {
+                            cat(paste("metadata$vallab$", xmlVarNames[i], " <- c(", 
+                                paste(testNum, collapse=", "), ")", enter, sep=""))
+                        }
+                        
+                        justlbls <- as.vector(unlist(lapply(labl, function(x) {
+                            return(xmlValue(x[[1]][[1]]))
+                        })))
+                        
+                        justlbls <- gsub("\"", "'", justlbls)
+                        justlbls <- gsub("\\\\", "/", justlbls)
+                        
+                        names(metadata$vallab[[xmlVarNames[i]]]) <- justlbls
+                        
+                        if (saveFile) {
+                            cat(paste("names(metadata$vallab$", xmlVarNames[i], ") <- c(\"",
+                                    paste(justlbls, collapse="\", \""), "\")", enter, sep=""))
+                        }
+                    }
+                    else {
+                        
+                        justlbls <- as.vector(unlist(lapply(lapply(vallabs, xmlElementsByTagName, "catValu"), function(x) {
+                            return(xmlValue(x[[1]][[1]]))
+                        })))
+                        justlbls <- gsub("\"", "'", justlbls)
+                        justlbls <- gsub("\\\\", "/", justlbls)
+                        
+                        if (saveFile) {
+                            cat(paste("metadata$vallab$", xmlVarNames[i], " <- c(\"",
+                                    paste(justlbls, collapse="\", \""), "\")", enter, sep=""))
+                        }
+                    }
+                }
+            }
+            cat(enter)
+        }
+    
+        sink()
+    }
+    
+    setwd(currdir)
+    if (singlefile) {
+        return(invisible(metadata))
+    }
+}
+

Added: DDIwR/pkg/R/setupfile.R
===================================================================
--- DDIwR/pkg/R/setupfile.R	                        (rev 0)
+++ DDIwR/pkg/R/setupfile.R	2014-08-11 10:32:21 UTC (rev 34)
@@ -0,0 +1,1282 @@
+setupfile <- function(lbls = "", type="all", csv = "", miss, trymiss = FALSE, uniqueid = "",
+                      SD = "", delimiter = ",", OS = "windows", outfile="", ...) {
+    
+    # change the intrnlbls argument into a very unique name, just in case the
+    # list object in the external R file(s) are named "intrnlbls" as well
+    
+    intrnlbls <- lbls
+    intrnlbls_objname <- deparse(substitute(lbls))
+    rm(lbls)
+    
+    other.args <- list(...)
+    
+    pathIsFolder <- FALSE
+    if ("pathIsFolder" %in% names(other.args)) {
+        pathIsFolder <- other.args$pathIsFolder
+    }
+    
+    saveFile <- FALSE
+    if ("saveFile" %in% names(other.args)) {
+        saveFile <- other.args$saveFile
+    }
+    
+    
+    if (OS == "") {
+        OS <- Sys.info()[['sysname']]
+    }
+    
+    if (all(is.character(intrnlbls))) { # all() just in case someone provides a vector by mistake
+        
+        if (length(intrnlbls) > 1) {
+            cat("\n")
+            stop("The lbls argument should contain a single path to the list object.\n\n", call. = FALSE)
+        }
+        
+        xmlfiles <- FALSE
+        
+        labelist <- treatPath(intrnlbls, type = "R")
+        
+        if (length(labelist) == 1) {
+            labelist <- treatPath(intrnlbls, type = "XML")
+            if (length(labelist) == 1) {
+                cat("\n")
+                stop(gsub("XML", "R or .XML", labelist), call. = FALSE)
+            }
+            else {
+                xmlfiles <- TRUE
+            }
+        }
+        
+        if (!file.exists("Setup files")) {
+            dir.create("Setup files")
+        }
+        
+        csvdatadir <- FALSE # by default
+        
+        
+        # now trying to assess what the csv argument is
+        # it can be an object containing csv data, or
+        # it can be a string containing a path to the data
+        
+        
+        
+        if (all(is.character(csv))) {
+            if (csv != "") {
+                if (length(csv) > 1) {
+                    cat("\n")
+                    stop("The csv argument should contain a single path to the list object.\n\n", call. = FALSE)
+                }
+                
+                csvlist <- treatPath(csv, type = "csv")
+                if (length(csvlist) > 1) {
+                    datadir <- csvlist$completePath
+                    csvdatadir <- TRUE
+                }
+                else {
+                    cat("\nNOTE:", csvlist)
+                }
+            }
+            else {
+                # it's important to differentiate between "data" and "Data", for OSs that are case sensitive
+                csvdatadir <- file.exists(datadir <- file.path(labelist$completePath, "data"))
+                datathere <- csvdatadir
+                csvdatadir <- file.exists(datadir <- file.path(labelist$completePath, "Data"))
+                
+                if (csvdatadir) {
+                    csvlist <- treatPath(datadir, type = "csv")
+                    if (length(csvlist) == 1) {
+                        csvdatadir <- FALSE
+                        cat(paste("\nNOTE: There is a ", ifelse(datathere, "data", "Data"), " directory within ",
+                            labelist$completePath, ". "), csvlist)
+                    }
+                }
+            }
+        }
+        
+        
+        if (csvdatadir) {
+            csvfiles <- csvlist$files
+            csvnames <- csvlist$filenames
+            csvext <- csvlist$fileext
+            
+            cat ("Processing (including data directory):\n")
+        }
+        else {
+            
+            if (is.data.frame(csv)) {
+                if (length(labelist$files) > 1) {
+                    cat("\n")
+                    stop("There are multiple files containing labels and only one csv file provided.\n\n", call. = FALSE)
+                }
+            }
+            
+            cat("Processing (no data directory):\n")
+        }
+        
+        
+        for (i in seq(length(labelist$files))) {
+            
+            if (xmlfiles) {
+                intrnlblsObject <- getMetadata(file.path(labelist$completePath, labelist$files[i]), fromsetupfile = TRUE, saveFile = saveFile)
+            }
+            else {
+                aa <- ls()
+                
+                tryCatch(eval(parse(file.path(labelist$completePath, labelist$files[i]))), error = function(x) {
+                    stop(paste("\nThere is an error associated with the file \"", labelist$files[i], "\", see below:\n       ", gsub("Error in ", "", as.character(x)), sep=""), call. = FALSE)
+                })
+                
+                bb <- ls()
+                bb <- bb[-which(bb == "aa")]
+            }
+            
+            if (csvdatadir) {
+                
+                if (labelist$filenames[i] %in% csvnames) {
+                    cat(labelist$filenames[i], "\n")
+                    position <- match(labelist$filenames[i], csvnames)
+                    
+                    
+                    for (j in seq(length(position))) {
+                        
+                        if (csvext[position[j]] == "CSV") {
+                            csvreadfile <- read.csv(file.path(datadir, csvfiles[position[j]]), as.is=TRUE)
+                            
+                            
+                            # if the delimiter is not a comma, there will be only one big column
+                            if (ncol(csvreadfile) == 1) { # try ";" separated
+                                delimiter <- ";"
+                                csvreadfile <- read.csv(file.path(datadir, csvfiles[position[j]]), sep=";", as.is=TRUE)
+                            }
+                            
+                            # if still the delimiter is not the right one
+                            if (ncol(csvreadfile) == 1) { # try tab separated
+                                delimiter <- "\t"
+                                csvreadfile <- read.csv(file.path(datadir, csvfiles[position[j]]), sep="\t", as.is=TRUE)
+                            }
+                            
+                            # finally, if it's still not the right delimiter stop and print an error message
+                            if (ncol(csvreadfile) == 1) {
+                                cat("\n")
+                                stop(paste("Unknown column separator for the file", csvfiles[position[j]],
+                                           "\nShould be either \",\" or \";\" or tab separated.\n\n"), call. = FALSE)
+                            }
+                            
+                            
+                            if (!xmlfiles) {
+                                intrnlblsObject <- get(setdiff(bb, aa))
+                            }
+                            tryCatch(Recall(intrnlblsObject, type = type, miss = miss, csv = csvreadfile, trymiss = trymiss, uniqueid = uniqueid, SD = SD,
+                                            delimiter = delimiter, OS = OS, outfile = labelist$filenames[i], pathIsFolder = pathIsFolder, ... = ...),
+                                error = function(x) {
+                                    cat(paste("     There is an error associated with the file \"", labelist$filenames[i], "\", see below:\n     ", sep=""))
+                                    cat(as.character(x))
+                                })
+                        }
+                    }
+                }
+                else {
+                    cat(labelist$filenames[i], "(no .csv file)", "\n")
+                    if (!xmlfiles) {
+                        intrnlblsObject <- get(setdiff(bb, aa))
+                    }
+                    tryCatch(Recall(intrnlblsObject, type = type, miss = miss, trymiss = trymiss, uniqueid = uniqueid, SD = SD, 
+                                    delimiter = delimiter, OS = OS, outfile = labelist$filenames[i], pathIsFolder = pathIsFolder, ... = ...),
+                        error = function(x) {
+                            cat(paste("     There is an error associated with the file \"", labelist$filenames[i], "\", see below:\n     ", sep=""))
+                            cat(as.character(x))
+                        })
+                }
+            }
+            else {
+                cat(labelist$filenames[i], "\n")
+                
+                if (is.data.frame(csv)) {
+                    if (length(labelist$filenames) == 1) {
+                        if (!xmlfiles) {
+                            intrnlblsObject <- get(setdiff(bb, aa))
+                        }
+                        tryCatch(Recall(intrnlblsObject, type = type, miss = miss, csv = csv, trymiss = trymiss, uniqueid = uniqueid, SD = SD,
+                                        delimiter = delimiter, OS = OS, outfile = labelist$filenames[i], pathIsFolder = pathIsFolder, ... = ...),
+                        error = function(x) {
+                            cat(paste("     There is an error associated with the file \"", labelist$filenames[i], "\", see below:\n     ", sep=""))
+                            cat(as.character(x))
+                        })
+                    }
+                }
+                else {
+                    # there is really no csv data
+                        if (!xmlfiles) {
+                            intrnlblsObject <- get(setdiff(bb, aa))
+                        }
+                        tryCatch(Recall(intrnlblsObject, type = type, miss = miss, trymiss = trymiss, uniqueid = uniqueid, SD = SD, 
+                                        delimiter = delimiter, OS = OS, outfile = labelist$filenames[i], pathIsFolder = pathIsFolder, ... = ...),
+                        error = function(x) {
+                            cat(paste("     There is an error associated with the file \"", labelist$filenames[i], "\", see below:\n     ", sep=""))
+                            cat(as.character(x))
+                        })
+                }
+            }
+            
+            if (!xmlfiles) {
+                rm(list = c(eval(setdiff(bb, aa)), "bb", "aa"))
+            }
+        }
+        
+        cat("\nSetup files created in:\n", file.path(getwd(), "Setup files"), "\n\n", sep="")
+        
+        return(invisible())
+    }
+    
+    csvlist <- NULL # initialization
+    if (all(is.character(csv))) {
+        if (all(csv != "")) {
+            if (length(csv) > 1) {
+                cat("\n")
+                stop("The csv argument should contain a single path to the list object.\n\n", call. = FALSE)
+            }
+            
+            csvlist <- treatPath(csv, type = "CSV")
+            if (length(csvlist) > 1) {
+                # no error
+                if (length(csvlist$files) > 1) {
+                    cat("\n")
+                    stop("There is only one object containing labels and multiple csv files.\n\n", call. = FALSE)
+                }
+            }
+            else {
+                # There is a single string returned by treatPath(), with an error message
+                cat("\nNOTE:", csvlist)
+                csv <- "" # back to the default value
+            }
+        }
+    }
+    
+    
+    
+    if (is.null(names(intrnlbls)) | !all(names(intrnlbls) %in% c("varlab", "vallab"))) {
+        cat("\n")
+        stop("The object does not contain labels for variables and/or values.\n\n", call. = FALSE)
+    }
+    
+    if (!(type %in% c("SPSS", "Stata", "SAS", "R", "all"))) {
+        cat("\n")
+        stop("The argument <type> can only be one of: \"SPSS\", \"Stata\", \"SAS\", \"R\", or \"all\".\n\n", call. = FALSE)
+    }
+    
+    enter <- getEnter(OS=OS)
+    
+    varnames <- names(intrnlbls$varlab)
+    maxchars <- max(nchar(varnames))
+    varcheck <- rep(0, length(varnames))
+    formats <- FALSE
+    
+    csv_is_df <- is.data.frame(csv)
+    csv_is_path <- FALSE
+    if (length(csv) == 1) { # csv is a character vector of length 1, i.e. a path 
+        if (is.character(csv)) {
+            if (csv != "") {
+                csv_is_path <- TRUE
+            }
+        }
+    }
+    
+    if (csv_is_df | csv_is_path) {
+        
+        if (!is.null(csvlist)) {
+            csvreadfile <- read.csv(file.path(csvlist$completePath, csvlist$files[1]), as.is=TRUE)
+            
+            # if the delimiter is not a comma, there will be only one big column
+            if (ncol(csvreadfile) == 1) { # try ";" separated
+                delimiter <- ";"
+                csvreadfile <- read.csv(file.path(csvlist$completePath, csvfiles[position[j]]), sep=";", as.is=TRUE)
+            }
+            
+            # if still the delimiter is not the right one
+            if (ncol(csvreadfile) == 1) { # try tab separated
+                delimiter <- "\t"
+                csvreadfile <- read.csv(file.path(csvlist$completePath, csvfiles[position[j]]), sep="\t", as.is=TRUE)
+            }
+            
+            # finally, if it's still not the right delimiter stop and print an error message
+            if (ncol(csvreadfile) == 1) {
+                cat("\n")
+                stop(paste("Unknown column separator for the file", csvfiles[position[j]],
+                           "\nShould be either \",\" or \";\" or tab separated.\n\n"), call. = FALSE)
+            }
+            
+            cat("\n")
+            cat("Found \"", csvlist$files[1], "\" in the directory \"", csvlist$completePath, "\". Using that as the .csv file.\n\n", sep="")
+            
+            csv <- csvreadfile
+        }
+        
+        csvnames <- names(csv)
+        csvformats <- sasformats <- rep("", length(csvnames))
+        if (!is.data.frame(csv)) {
+            cat("\n")
+            stop("The csv file should be a data frame.\n\n", call. = FALSE)
+        }
+        
+        gofurther <- TRUE
+        
+        plusnames <- setdiff(toupper(csvnames), toupper(names(intrnlbls$varlab)))
+        if (length(plusnames) > 0) {
+            if (length(plusnames) == length(csvnames)) {
+                cat("    None of the variables in the .csv file have metadata information.\n",
+                    "    (perhaps the .csv file doesn't have the variable names in the first row?)\n", sep="")
+                gofurther <- FALSE
+            }
+            else {
+                cat("    There is no metadata information for the following variables in the .csv file:\n")
+                plusnames <- strwrap(paste(plusnames, collapse=", "), 75)
+                for (pnms in plusnames) {
+                    cat("       ", pnms, "\n")
+                }
+                cat("\n")
+            }
+        }
+        
+        
+        plusnames <- setdiff(toupper(names(intrnlbls$varlab)), toupper(csvnames))
+        if (length(plusnames) > 0) {
+            cat("    There is metadata information for the following variables, but *not* in the .csv file:\n")
+            plusnames <- strwrap(paste(plusnames, collapse=", "), 75)
+            for (pnms in plusnames) {
+                cat("       ", pnms, "\n")
+            }
+            
+            if (gofurther) {
+                cat("       ", ifelse(length(plusnames) == 1, "This variable", "These variables"), "will be omitted.\n")
+            }
+            else {
+                cat("\n")
+            }
+        }
+        
+        nrowscsv <- nrow(csv)
+        
+        
+        
+        if (gofurther) {
+            
+            intrnlbls$varlab <- intrnlbls$varlab[which(toupper(names(intrnlbls$varlab)) %in% toupper(csvnames))]
+            intrnlbls$vallab <- intrnlbls$vallab[which(toupper(names(intrnlbls$vallab)) %in% toupper(csvnames))]
+            
+            varnames <- names(intrnlbls$varlab)
+            maxchars <- max(nchar(varnames))
+            varcheck <- rep(0, length(varnames))
+            
+            printNOTE <- FALSE
+            
+            for (i in seq(length(csvnames))) {
+                vartype <- "numeric"
+                decimals <- FALSE
+                
+                tempvar <- csv[, csvnames[i]]
+                
+                if (is.factor(tempvar)) {
+                    templevels <- levels(tempvar)
+                    tempvar <- as.character(tempvar)
+                    if ("." %in% templevels) {
+                        tempvar[tempvar == "."] <- NA
+                        printNOTE <- TRUE
+                    }
+                }
+                
+                nofchars <- nchar(as.character(tempvar))
+                nofchars[is.na(tempvar)] <- 0
+                maxvarchar <- max(nofchars)
+                
+                if (toupper(csvnames[i]) %in% names(intrnlbls$vallab)) {
+                    if (is.character(intrnlbls$vallab[[toupper(csvnames[i])]])) {
+                        vartype <- "string"
+                        gofurther <- FALSE
+                        maxvarchar <- max(maxvarchar, nchar(intrnlbls$vallab[[toupper(csvnames[i])]]))
+                    }
+                }
+                
+                if (gofurther) {
+                    if (all(is.na(csv[, csvnames[i]]))) { # completely empty variable
+                        maxvarchar <- 1
+                    }
+                    else {
+                        
+                        # if (length(tryCatch(cc <- as.numeric(as.character(tempvar)), warning = function(x) {return(0)})) == nrowscsv) { # numeric variable
+                        if (is.numeric(tempvar)) {
+                            if (max(tempvar, na.rm = TRUE) - floor(max(tempvar, na.rm = TRUE)) > 0) { # has decimals
+                                decimals <- TRUE
+                            }
+                            else {
+                                if (toupper(csvnames[i]) %in% names(intrnlbls$vallab)) {
+                                    if (is.numeric(intrnlbls$vallab[[toupper(csvnames[i])]])) {
+                                        maxvarchar <- max(maxvarchar, nchar(length(intrnlbls$vallab[[toupper(csvnames[i])]])))
+                                    }
+                                }
+                            }
+                        }
+                        else { # string variable
+                            vartype <- "string"
+                        }
+                    }
+                }
+                
+                if (vartype == "numeric") {
+                    if (decimals) {
+                        csvformats[i] <- paste("F", maxvarchar, ".2", sep="")
+                    }
+                    else {
+                        csvformats[i] <- paste("F", maxvarchar, ".0", sep="")
+                    }
+                }
+                else if (vartype == "string") {
+                    sasformats[i] <- "$"
+                    csvformats[i] <- paste("A", maxvarchar, sep="")
+                }
+                
+                varcheck[i] <- 1
+            }
+            
+            if (printNOTE) {
+                cat("    NOTE: some of the variables in this file have a \".\" sign to represent a missing.\n")
+                cat("    The import of the .csv file will not work for other softwares than SPSS.\n\n")
+            }
+            
+            formats <- all(csvformats != "")
+        }
+        
+        ## TO check if any of the existing metadata variables is not found in the CSV data file
+    }
+    
+    stringvars <- lapply(intrnlbls$vallab, function(x) {
+        all(is.character(x[[1]]))
+    })
+    
+    
+    if (missing(outfile)) {
+        if (grepl("\"", intrnlbls_objname)) {
+            outfile <- readline("Name for the setup file:\n")
+        }
+        else {
+            outfile <- intrnlbls_objname
+        }
+    }
+    
+    uniqueList <- lapply(unique(intrnlbls$vallab), function(uniques) {
+        vars <- sapply(names(intrnlbls$vallab),
+                     function(x) {
+                         ifelse(length(intrnlbls$vallab[[x]]) == length(uniques),
+                                all(names(intrnlbls$vallab[[x]]) == names(uniques)), FALSE)
+                     })
+        return(names(vars[vars]))
+    })
+    
+    
+    if (missing(miss)) {
+        if (trymiss) {
+            miss <- c("DK/NA", "DK/NO", "DK", "NA", "N/A", "N.A.", "Not answered",
+                      "Don't know", "(Don't know)", "No answer", "No opinion",
+                      "Not applicable", "Not relevant", "Refused", "(Refused)",
+                      "Refused / no answer", "(Refused / no answer)",
+                      "Can't say", "Don't know / Can't say")
+        }
+    }
+    
+    
+    if (type == "SPSS" | type == "all") {
+        intrnlbls2 <- intrnlbls
+        printMISSING <- FALSE
+        
+        if (!file.exists("Setup files")) {
+            dir.create("Setup files")
+        }
+        
+        if (!file.exists(file.path("Setup files", "SPSS"))) {
+            dir.create(file.path("Setup files", "SPSS"))
+        }
+        
+        
+        currentdir <- getwd()
+        setwd(file.path("Setup files", "SPSS"))
+        sink(ifelse(length(grep("\\.sps", outfile)) > 0, outfile, paste(outfile, ".sps", sep="")))
+        
+        cat("* ------------------------------------------------------------------------------", enter, enter,
+            "* --- CONFIGURATION SECTION - START ---", enter, enter, enter, sep="")
+
+        if (formats) {
+            cat("* The following command should contain the complete path and", enter,
+                "* name of the .csv file to be read (e.g. \"C:/CIS 2008/Data/ALL.csv\")", enter,
+                "* Change CSV_DATA_PATH to your filename, below:", enter, enter,
+                "FILE HANDLE csvpath /NAME=\"CSV_DATA_PATH\" .", enter, enter, enter, sep="")
+        }
+        
+        cat("* The following command should contain the complete path and", enter,
+            "* name of the .sav file to be saved (e.g. \"C:/CIS 2008/Data/ALL.sav\")", enter,
+            "* Change SAV_DATA_PATH to your filename, below:", enter, enter,
+            "FILE HANDLE savfile /NAME=\"SAV_DATA_PATH\" .", enter, enter, enter,
+            "* --- CONFIGURATION SECTION -  END  ---", enter, enter,
+            "* ------------------------------------------------------------------------------", enter, enter, enter, enter,
+            "* There should be nothing to change below this line", enter,                                                            
+            "* ------------------------------------------------------------------------------", enter, enter, enter, enter, sep="")
+                  
+        if (formats) {
+            cat("* -------------- Start Definition Macro --------------", enter, enter,
+                "SET LOCALE = 'English' .", enter,
+                "SHOW LOCALE .", enter, enter, # SET DECIMAL = COMMA . * (might be another idea)
+                "* --------------     Read Raw Data      --------------", enter, enter,
+                "GET DATA", enter,
+                " /TYPE=TXT", enter,
+                " /FILE=csvpath", enter,
+                " /DELCASE=LINE", enter,
+                " /DELIMITERS=\"", ifelse(delimiter == "\t", "\\t", delimiter), "\"", enter,
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/qca -r 34


More information about the Qca-commits mailing list