[Qca-commits] r34 - / DDIwR DDIwR/pkg DDIwR/pkg/R DDIwR/pkg/inst DDIwR/pkg/man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Mon Aug 11 12:32:21 CEST 2014
Author: dusadrian
Date: 2014-08-11 12:32:21 +0200 (Mon, 11 Aug 2014)
New Revision: 34
Added:
DDIwR/
DDIwR/pkg/
DDIwR/pkg/DESCRIPTION
DDIwR/pkg/NAMESPACE
DDIwR/pkg/R/
DDIwR/pkg/R/getEnter.R
DDIwR/pkg/R/getFiles.R
DDIwR/pkg/R/getMetadata.R
DDIwR/pkg/R/setupfile.R
DDIwR/pkg/R/splitrows.R
DDIwR/pkg/R/treatPath.R
DDIwR/pkg/inst/
DDIwR/pkg/inst/ChangeLog
DDIwR/pkg/man/
DDIwR/pkg/man/DDIwR-internal.Rd
DDIwR/pkg/man/DDIwR.package.Rd
DDIwR/pkg/man/getMetadata.Rd
DDIwR/pkg/man/setupfile.Rd
DDIwR/www/
Log:
First R-forge version of DDIwR
Added: DDIwR/pkg/DESCRIPTION
===================================================================
--- DDIwR/pkg/DESCRIPTION (rev 0)
+++ DDIwR/pkg/DESCRIPTION 2014-08-11 10:32:21 UTC (rev 34)
@@ -0,0 +1,10 @@
+Package: DDIwR
+Version: 0.1-0
+Date: 2014-07-30
+Title: DDI with R
+Authors at R: c(person("Adrian", "Dusa", role = c("aut", "cre"),
+ email = "dusa.adrian at unibuc.ro"))
+Depends: R (>= 3.0.0)
+Imports: XML, foreign
+Description: This package provides useful functions for various DDI (Data Documentation Initiative) related outputs.
+License: GPL (>= 2)
Added: DDIwR/pkg/NAMESPACE
===================================================================
--- DDIwR/pkg/NAMESPACE (rev 0)
+++ DDIwR/pkg/NAMESPACE 2014-08-11 10:32:21 UTC (rev 34)
@@ -0,0 +1,8 @@
+export(setupfile,
+ splitrows,
+ treatPath,
+ getFiles,
+ getMetadata,
+ getEnter)
+import(XML)
+import(foreign)
Property changes on: DDIwR/pkg/NAMESPACE
___________________________________________________________________
Added: svn:executable
+ *
Added: DDIwR/pkg/R/getEnter.R
===================================================================
--- DDIwR/pkg/R/getEnter.R (rev 0)
+++ DDIwR/pkg/R/getEnter.R 2014-08-11 10:32:21 UTC (rev 34)
@@ -0,0 +1,20 @@
+getEnter <- function(OS) {
+
+ detectedOS <- Sys.info()[['sysname']]
+
+ if (OS == "Windows" | OS == "windows" | OS == "Win" | OS == "win") {
+ enter <- ifelse(detectedOS == "Windows", "\n", "\r\n")
+ }
+ else if (OS == "Linux" | OS == "linux") {
+ enter <- "\n"
+ }
+ else if (OS == "Darwin" | OS == "MacOS" | OS == "Apple" | OS == "Mac" | OS == "mac") {
+ enter <- ifelse(detectedOS == "Darwin", "\n", "\r")
+ }
+ else {
+ cat("\n")
+ stop("The specified OS is not supported.\n\n", call. = FALSE)
+ }
+
+ return(enter)
+}
Added: DDIwR/pkg/R/getFiles.R
===================================================================
--- DDIwR/pkg/R/getFiles.R (rev 0)
+++ DDIwR/pkg/R/getFiles.R 2014-08-11 10:32:21 UTC (rev 34)
@@ -0,0 +1,61 @@
+getFiles <- function(dirpath = ".", type="", currdir) {
+
+
+ # get all files
+ files <- list.files(dirpath)
+
+ if (length(files) == 0) {
+ return(paste("The directory", dirpath, "is empty.\n\n"))
+ }
+
+ # split all files by "." to separate names from extentions
+ filesplit <- strsplit(files, split="\\.")
+
+ # get rid of files without extensions, and (sub)directory names
+ noext <- unlist(lapply(filesplit, length))
+ files <- files[noext > 1]
+ filesplit <- filesplit[noext > 1]
+
+ if (length(filesplit) == 0) {
+ return(paste("The directory \"", dirpath, "\" doesn't contain any known files.\n\n", sep=""))
+ }
+
+
+ # get the file extensions
+ fileext <- unlist(lapply(filesplit, function(x) {
+ # we want the last part of the split
+ return(x[length(x)])
+ }))
+
+
+ if (type != "*") {
+ # check if there is any file with the right extension
+ fileidxs <- which(toupper(fileext) == toupper(type))
+
+ if (length(fileidxs) == 0) {
+ return(paste("There is no .", type, " type file in the directory \"", dirpath, "\"\n\n", sep=""))
+ }
+
+ # if code survives this far, filter all the "right" files from all files
+ files <- files[fileidxs]
+ }
+
+ # split the files again, just in case some of them were not the right type
+ filesplit <- strsplit(files, split="\\.")
+
+ # get the file names
+ # the code below is necessary just in case the filename contains a "."
+ # e.g. test.1.R
+ filenames <- unlist(lapply(filesplit, function(x) {
+ # we want all parts except the last, to restore the original filename
+ return(paste(x[-length(x)], collapse="."))
+ }))
+
+ # get the file extensions again
+ fileext <- unlist(lapply(filesplit, function(x) {
+ return(x[length(x)])
+ }))
+
+ return(list(files=files, filenames=filenames, fileext=fileext))
+
+}
Added: DDIwR/pkg/R/getMetadata.R
===================================================================
--- DDIwR/pkg/R/getMetadata.R (rev 0)
+++ DDIwR/pkg/R/getMetadata.R 2014-08-11 10:32:21 UTC (rev 34)
@@ -0,0 +1,144 @@
+require(XML)
+getMetadata <- function(xmlpath, OS = "windows", saveFile=FALSE, ...) {
+
+ # TODO: detect DDI version or ask the version through a dedicated argument
+
+
+ other.args <- list(...)
+ enter <- getEnter(OS=OS)
+
+ fromsetupfile <- FALSE
+ if ("fromsetupfile" %in% names(other.args)) {
+ fromsetupfile <- other.args$fromsetupfile
+ }
+
+ tp <- treatPath(xmlpath, type="XML")
+
+ ### !!! ###
+ # NEVER use getNodeSet() it's toooooo slooooow!!!
+ # use instead xmlElementsByTagName()
+
+ currdir <- getwd()
+ if (saveFile) {
+ setwd(tp$completePath)
+ }
+
+ singlefile <- length(tp$files) == 1
+
+ if (!fromsetupfile) {
+ cat("Processing:\n")
+ }
+
+
+ for (ff in seq(length(tp$files))) {
+ if (!fromsetupfile) {
+ cat(tp$files[ff], "\n")
+ }
+
+ if (saveFile) {
+ sink(paste(tp$filenames[ff], "R", sep="."))
+ }
+
+ dd <- xmlTreeParse(tp$files[ff])$doc$children$codeBook
+ dd <- xmlElementsByTagName(dd, "dataDscr")[[1]]
+ dd <- xmlElementsByTagName(dd, "var")
+
+ xmlVarNames <- as.vector(sapply(dd, xmlGetAttr, "name"))
+ # return(drop(xmlVarNames))
+
+ metadata <- list()
+ metadata$varlab <- list()
+ metadata$vallab <- list()
+
+ if (saveFile) {
+ cat("metadata <- list()", enter)
+ cat("metadata$varlab <- list()", enter)
+ cat("metadata$vallab <- list()", enter, enter)
+ }
+
+ for (i in seq(length(dd))) {
+
+ # metadata$varlab[[xmlVarNames[i]]] <- xmlValue(getNodeSet(dd[[i]], "//labl[@level='variable']")[[1]])
+ varlab <- xmlValue(xmlElementsByTagName(dd[[i]], "labl")[[1]])
+ varlab <- gsub("\"", "'", varlab)
+ varlab <- gsub("\\\\", "/", varlab)
+ metadata$varlab[[xmlVarNames[i]]] <- varlab
+
+ if (saveFile) {
+ cat(paste("metadata$varlab$", xmlVarNames[i], " <- \"", varlab, "\"", enter, sep=""))
+ }
+
+ #vallabs <- unlist(lapply(getNodeSet(dd[[i]], "//labl[@level='category']"), xmlValue))
+ vallabs <- xmlElementsByTagName(dd[[i]], "catgry")
+
+ if (length(vallabs) > 0) {
+
+ # metadata$vallab[[xmlVarNames[i]]] <- unlist(lapply(getNodeSet(dd[[i]], "//catValu"), xmlValue))
+ values <- as.vector(unlist(lapply(lapply(vallabs, xmlElementsByTagName, "catValu"), function(x) {
+ return(xmlValue(x[[1]][[1]]))
+ })))
+ values <- gsub("\"", "'", values)
+ values <- gsub("\\\\", "/", values)
+
+ labl <- as.vector(lapply(vallabs, xmlElementsByTagName, "labl"))
+ havelbls <- unlist(lapply(labl, function(x) length(x) > 0))
+
+ values <- values[havelbls]
+ labl <- labl[havelbls]
+
+ if (length(values) > 0) {
+ metadata$vallab[[xmlVarNames[i]]] <- values
+ testNum <- tryCatch(as.numeric(values),
+ warning = function(x) {
+ return("...string...!!!")
+ })
+
+ if (all(testNum != "...string...!!!")) {
+ metadata$vallab[[xmlVarNames[i]]] <- testNum
+
+ if (saveFile) {
+ cat(paste("metadata$vallab$", xmlVarNames[i], " <- c(",
+ paste(testNum, collapse=", "), ")", enter, sep=""))
+ }
+
+ justlbls <- as.vector(unlist(lapply(labl, function(x) {
+ return(xmlValue(x[[1]][[1]]))
+ })))
+
+ justlbls <- gsub("\"", "'", justlbls)
+ justlbls <- gsub("\\\\", "/", justlbls)
+
+ names(metadata$vallab[[xmlVarNames[i]]]) <- justlbls
+
+ if (saveFile) {
+ cat(paste("names(metadata$vallab$", xmlVarNames[i], ") <- c(\"",
+ paste(justlbls, collapse="\", \""), "\")", enter, sep=""))
+ }
+ }
+ else {
+
+ justlbls <- as.vector(unlist(lapply(lapply(vallabs, xmlElementsByTagName, "catValu"), function(x) {
+ return(xmlValue(x[[1]][[1]]))
+ })))
+ justlbls <- gsub("\"", "'", justlbls)
+ justlbls <- gsub("\\\\", "/", justlbls)
+
+ if (saveFile) {
+ cat(paste("metadata$vallab$", xmlVarNames[i], " <- c(\"",
+ paste(justlbls, collapse="\", \""), "\")", enter, sep=""))
+ }
+ }
+ }
+ }
+ cat(enter)
+ }
+
+ sink()
+ }
+
+ setwd(currdir)
+ if (singlefile) {
+ return(invisible(metadata))
+ }
+}
+
Added: DDIwR/pkg/R/setupfile.R
===================================================================
--- DDIwR/pkg/R/setupfile.R (rev 0)
+++ DDIwR/pkg/R/setupfile.R 2014-08-11 10:32:21 UTC (rev 34)
@@ -0,0 +1,1282 @@
+setupfile <- function(lbls = "", type="all", csv = "", miss, trymiss = FALSE, uniqueid = "",
+ SD = "", delimiter = ",", OS = "windows", outfile="", ...) {
+
+ # change the intrnlbls argument into a very unique name, just in case the
+ # list object in the external R file(s) are named "intrnlbls" as well
+
+ intrnlbls <- lbls
+ intrnlbls_objname <- deparse(substitute(lbls))
+ rm(lbls)
+
+ other.args <- list(...)
+
+ pathIsFolder <- FALSE
+ if ("pathIsFolder" %in% names(other.args)) {
+ pathIsFolder <- other.args$pathIsFolder
+ }
+
+ saveFile <- FALSE
+ if ("saveFile" %in% names(other.args)) {
+ saveFile <- other.args$saveFile
+ }
+
+
+ if (OS == "") {
+ OS <- Sys.info()[['sysname']]
+ }
+
+ if (all(is.character(intrnlbls))) { # all() just in case someone provides a vector by mistake
+
+ if (length(intrnlbls) > 1) {
+ cat("\n")
+ stop("The lbls argument should contain a single path to the list object.\n\n", call. = FALSE)
+ }
+
+ xmlfiles <- FALSE
+
+ labelist <- treatPath(intrnlbls, type = "R")
+
+ if (length(labelist) == 1) {
+ labelist <- treatPath(intrnlbls, type = "XML")
+ if (length(labelist) == 1) {
+ cat("\n")
+ stop(gsub("XML", "R or .XML", labelist), call. = FALSE)
+ }
+ else {
+ xmlfiles <- TRUE
+ }
+ }
+
+ if (!file.exists("Setup files")) {
+ dir.create("Setup files")
+ }
+
+ csvdatadir <- FALSE # by default
+
+
+ # now trying to assess what the csv argument is
+ # it can be an object containing csv data, or
+ # it can be a string containing a path to the data
+
+
+
+ if (all(is.character(csv))) {
+ if (csv != "") {
+ if (length(csv) > 1) {
+ cat("\n")
+ stop("The csv argument should contain a single path to the list object.\n\n", call. = FALSE)
+ }
+
+ csvlist <- treatPath(csv, type = "csv")
+ if (length(csvlist) > 1) {
+ datadir <- csvlist$completePath
+ csvdatadir <- TRUE
+ }
+ else {
+ cat("\nNOTE:", csvlist)
+ }
+ }
+ else {
+ # it's important to differentiate between "data" and "Data", for OSs that are case sensitive
+ csvdatadir <- file.exists(datadir <- file.path(labelist$completePath, "data"))
+ datathere <- csvdatadir
+ csvdatadir <- file.exists(datadir <- file.path(labelist$completePath, "Data"))
+
+ if (csvdatadir) {
+ csvlist <- treatPath(datadir, type = "csv")
+ if (length(csvlist) == 1) {
+ csvdatadir <- FALSE
+ cat(paste("\nNOTE: There is a ", ifelse(datathere, "data", "Data"), " directory within ",
+ labelist$completePath, ". "), csvlist)
+ }
+ }
+ }
+ }
+
+
+ if (csvdatadir) {
+ csvfiles <- csvlist$files
+ csvnames <- csvlist$filenames
+ csvext <- csvlist$fileext
+
+ cat ("Processing (including data directory):\n")
+ }
+ else {
+
+ if (is.data.frame(csv)) {
+ if (length(labelist$files) > 1) {
+ cat("\n")
+ stop("There are multiple files containing labels and only one csv file provided.\n\n", call. = FALSE)
+ }
+ }
+
+ cat("Processing (no data directory):\n")
+ }
+
+
+ for (i in seq(length(labelist$files))) {
+
+ if (xmlfiles) {
+ intrnlblsObject <- getMetadata(file.path(labelist$completePath, labelist$files[i]), fromsetupfile = TRUE, saveFile = saveFile)
+ }
+ else {
+ aa <- ls()
+
+ tryCatch(eval(parse(file.path(labelist$completePath, labelist$files[i]))), error = function(x) {
+ stop(paste("\nThere is an error associated with the file \"", labelist$files[i], "\", see below:\n ", gsub("Error in ", "", as.character(x)), sep=""), call. = FALSE)
+ })
+
+ bb <- ls()
+ bb <- bb[-which(bb == "aa")]
+ }
+
+ if (csvdatadir) {
+
+ if (labelist$filenames[i] %in% csvnames) {
+ cat(labelist$filenames[i], "\n")
+ position <- match(labelist$filenames[i], csvnames)
+
+
+ for (j in seq(length(position))) {
+
+ if (csvext[position[j]] == "CSV") {
+ csvreadfile <- read.csv(file.path(datadir, csvfiles[position[j]]), as.is=TRUE)
+
+
+ # if the delimiter is not a comma, there will be only one big column
+ if (ncol(csvreadfile) == 1) { # try ";" separated
+ delimiter <- ";"
+ csvreadfile <- read.csv(file.path(datadir, csvfiles[position[j]]), sep=";", as.is=TRUE)
+ }
+
+ # if still the delimiter is not the right one
+ if (ncol(csvreadfile) == 1) { # try tab separated
+ delimiter <- "\t"
+ csvreadfile <- read.csv(file.path(datadir, csvfiles[position[j]]), sep="\t", as.is=TRUE)
+ }
+
+ # finally, if it's still not the right delimiter stop and print an error message
+ if (ncol(csvreadfile) == 1) {
+ cat("\n")
+ stop(paste("Unknown column separator for the file", csvfiles[position[j]],
+ "\nShould be either \",\" or \";\" or tab separated.\n\n"), call. = FALSE)
+ }
+
+
+ if (!xmlfiles) {
+ intrnlblsObject <- get(setdiff(bb, aa))
+ }
+ tryCatch(Recall(intrnlblsObject, type = type, miss = miss, csv = csvreadfile, trymiss = trymiss, uniqueid = uniqueid, SD = SD,
+ delimiter = delimiter, OS = OS, outfile = labelist$filenames[i], pathIsFolder = pathIsFolder, ... = ...),
+ error = function(x) {
+ cat(paste(" There is an error associated with the file \"", labelist$filenames[i], "\", see below:\n ", sep=""))
+ cat(as.character(x))
+ })
+ }
+ }
+ }
+ else {
+ cat(labelist$filenames[i], "(no .csv file)", "\n")
+ if (!xmlfiles) {
+ intrnlblsObject <- get(setdiff(bb, aa))
+ }
+ tryCatch(Recall(intrnlblsObject, type = type, miss = miss, trymiss = trymiss, uniqueid = uniqueid, SD = SD,
+ delimiter = delimiter, OS = OS, outfile = labelist$filenames[i], pathIsFolder = pathIsFolder, ... = ...),
+ error = function(x) {
+ cat(paste(" There is an error associated with the file \"", labelist$filenames[i], "\", see below:\n ", sep=""))
+ cat(as.character(x))
+ })
+ }
+ }
+ else {
+ cat(labelist$filenames[i], "\n")
+
+ if (is.data.frame(csv)) {
+ if (length(labelist$filenames) == 1) {
+ if (!xmlfiles) {
+ intrnlblsObject <- get(setdiff(bb, aa))
+ }
+ tryCatch(Recall(intrnlblsObject, type = type, miss = miss, csv = csv, trymiss = trymiss, uniqueid = uniqueid, SD = SD,
+ delimiter = delimiter, OS = OS, outfile = labelist$filenames[i], pathIsFolder = pathIsFolder, ... = ...),
+ error = function(x) {
+ cat(paste(" There is an error associated with the file \"", labelist$filenames[i], "\", see below:\n ", sep=""))
+ cat(as.character(x))
+ })
+ }
+ }
+ else {
+ # there is really no csv data
+ if (!xmlfiles) {
+ intrnlblsObject <- get(setdiff(bb, aa))
+ }
+ tryCatch(Recall(intrnlblsObject, type = type, miss = miss, trymiss = trymiss, uniqueid = uniqueid, SD = SD,
+ delimiter = delimiter, OS = OS, outfile = labelist$filenames[i], pathIsFolder = pathIsFolder, ... = ...),
+ error = function(x) {
+ cat(paste(" There is an error associated with the file \"", labelist$filenames[i], "\", see below:\n ", sep=""))
+ cat(as.character(x))
+ })
+ }
+ }
+
+ if (!xmlfiles) {
+ rm(list = c(eval(setdiff(bb, aa)), "bb", "aa"))
+ }
+ }
+
+ cat("\nSetup files created in:\n", file.path(getwd(), "Setup files"), "\n\n", sep="")
+
+ return(invisible())
+ }
+
+ csvlist <- NULL # initialization
+ if (all(is.character(csv))) {
+ if (all(csv != "")) {
+ if (length(csv) > 1) {
+ cat("\n")
+ stop("The csv argument should contain a single path to the list object.\n\n", call. = FALSE)
+ }
+
+ csvlist <- treatPath(csv, type = "CSV")
+ if (length(csvlist) > 1) {
+ # no error
+ if (length(csvlist$files) > 1) {
+ cat("\n")
+ stop("There is only one object containing labels and multiple csv files.\n\n", call. = FALSE)
+ }
+ }
+ else {
+ # There is a single string returned by treatPath(), with an error message
+ cat("\nNOTE:", csvlist)
+ csv <- "" # back to the default value
+ }
+ }
+ }
+
+
+
+ if (is.null(names(intrnlbls)) | !all(names(intrnlbls) %in% c("varlab", "vallab"))) {
+ cat("\n")
+ stop("The object does not contain labels for variables and/or values.\n\n", call. = FALSE)
+ }
+
+ if (!(type %in% c("SPSS", "Stata", "SAS", "R", "all"))) {
+ cat("\n")
+ stop("The argument <type> can only be one of: \"SPSS\", \"Stata\", \"SAS\", \"R\", or \"all\".\n\n", call. = FALSE)
+ }
+
+ enter <- getEnter(OS=OS)
+
+ varnames <- names(intrnlbls$varlab)
+ maxchars <- max(nchar(varnames))
+ varcheck <- rep(0, length(varnames))
+ formats <- FALSE
+
+ csv_is_df <- is.data.frame(csv)
+ csv_is_path <- FALSE
+ if (length(csv) == 1) { # csv is a character vector of length 1, i.e. a path
+ if (is.character(csv)) {
+ if (csv != "") {
+ csv_is_path <- TRUE
+ }
+ }
+ }
+
+ if (csv_is_df | csv_is_path) {
+
+ if (!is.null(csvlist)) {
+ csvreadfile <- read.csv(file.path(csvlist$completePath, csvlist$files[1]), as.is=TRUE)
+
+ # if the delimiter is not a comma, there will be only one big column
+ if (ncol(csvreadfile) == 1) { # try ";" separated
+ delimiter <- ";"
+ csvreadfile <- read.csv(file.path(csvlist$completePath, csvfiles[position[j]]), sep=";", as.is=TRUE)
+ }
+
+ # if still the delimiter is not the right one
+ if (ncol(csvreadfile) == 1) { # try tab separated
+ delimiter <- "\t"
+ csvreadfile <- read.csv(file.path(csvlist$completePath, csvfiles[position[j]]), sep="\t", as.is=TRUE)
+ }
+
+ # finally, if it's still not the right delimiter stop and print an error message
+ if (ncol(csvreadfile) == 1) {
+ cat("\n")
+ stop(paste("Unknown column separator for the file", csvfiles[position[j]],
+ "\nShould be either \",\" or \";\" or tab separated.\n\n"), call. = FALSE)
+ }
+
+ cat("\n")
+ cat("Found \"", csvlist$files[1], "\" in the directory \"", csvlist$completePath, "\". Using that as the .csv file.\n\n", sep="")
+
+ csv <- csvreadfile
+ }
+
+ csvnames <- names(csv)
+ csvformats <- sasformats <- rep("", length(csvnames))
+ if (!is.data.frame(csv)) {
+ cat("\n")
+ stop("The csv file should be a data frame.\n\n", call. = FALSE)
+ }
+
+ gofurther <- TRUE
+
+ plusnames <- setdiff(toupper(csvnames), toupper(names(intrnlbls$varlab)))
+ if (length(plusnames) > 0) {
+ if (length(plusnames) == length(csvnames)) {
+ cat(" None of the variables in the .csv file have metadata information.\n",
+ " (perhaps the .csv file doesn't have the variable names in the first row?)\n", sep="")
+ gofurther <- FALSE
+ }
+ else {
+ cat(" There is no metadata information for the following variables in the .csv file:\n")
+ plusnames <- strwrap(paste(plusnames, collapse=", "), 75)
+ for (pnms in plusnames) {
+ cat(" ", pnms, "\n")
+ }
+ cat("\n")
+ }
+ }
+
+
+ plusnames <- setdiff(toupper(names(intrnlbls$varlab)), toupper(csvnames))
+ if (length(plusnames) > 0) {
+ cat(" There is metadata information for the following variables, but *not* in the .csv file:\n")
+ plusnames <- strwrap(paste(plusnames, collapse=", "), 75)
+ for (pnms in plusnames) {
+ cat(" ", pnms, "\n")
+ }
+
+ if (gofurther) {
+ cat(" ", ifelse(length(plusnames) == 1, "This variable", "These variables"), "will be omitted.\n")
+ }
+ else {
+ cat("\n")
+ }
+ }
+
+ nrowscsv <- nrow(csv)
+
+
+
+ if (gofurther) {
+
+ intrnlbls$varlab <- intrnlbls$varlab[which(toupper(names(intrnlbls$varlab)) %in% toupper(csvnames))]
+ intrnlbls$vallab <- intrnlbls$vallab[which(toupper(names(intrnlbls$vallab)) %in% toupper(csvnames))]
+
+ varnames <- names(intrnlbls$varlab)
+ maxchars <- max(nchar(varnames))
+ varcheck <- rep(0, length(varnames))
+
+ printNOTE <- FALSE
+
+ for (i in seq(length(csvnames))) {
+ vartype <- "numeric"
+ decimals <- FALSE
+
+ tempvar <- csv[, csvnames[i]]
+
+ if (is.factor(tempvar)) {
+ templevels <- levels(tempvar)
+ tempvar <- as.character(tempvar)
+ if ("." %in% templevels) {
+ tempvar[tempvar == "."] <- NA
+ printNOTE <- TRUE
+ }
+ }
+
+ nofchars <- nchar(as.character(tempvar))
+ nofchars[is.na(tempvar)] <- 0
+ maxvarchar <- max(nofchars)
+
+ if (toupper(csvnames[i]) %in% names(intrnlbls$vallab)) {
+ if (is.character(intrnlbls$vallab[[toupper(csvnames[i])]])) {
+ vartype <- "string"
+ gofurther <- FALSE
+ maxvarchar <- max(maxvarchar, nchar(intrnlbls$vallab[[toupper(csvnames[i])]]))
+ }
+ }
+
+ if (gofurther) {
+ if (all(is.na(csv[, csvnames[i]]))) { # completely empty variable
+ maxvarchar <- 1
+ }
+ else {
+
+ # if (length(tryCatch(cc <- as.numeric(as.character(tempvar)), warning = function(x) {return(0)})) == nrowscsv) { # numeric variable
+ if (is.numeric(tempvar)) {
+ if (max(tempvar, na.rm = TRUE) - floor(max(tempvar, na.rm = TRUE)) > 0) { # has decimals
+ decimals <- TRUE
+ }
+ else {
+ if (toupper(csvnames[i]) %in% names(intrnlbls$vallab)) {
+ if (is.numeric(intrnlbls$vallab[[toupper(csvnames[i])]])) {
+ maxvarchar <- max(maxvarchar, nchar(length(intrnlbls$vallab[[toupper(csvnames[i])]])))
+ }
+ }
+ }
+ }
+ else { # string variable
+ vartype <- "string"
+ }
+ }
+ }
+
+ if (vartype == "numeric") {
+ if (decimals) {
+ csvformats[i] <- paste("F", maxvarchar, ".2", sep="")
+ }
+ else {
+ csvformats[i] <- paste("F", maxvarchar, ".0", sep="")
+ }
+ }
+ else if (vartype == "string") {
+ sasformats[i] <- "$"
+ csvformats[i] <- paste("A", maxvarchar, sep="")
+ }
+
+ varcheck[i] <- 1
+ }
+
+ if (printNOTE) {
+ cat(" NOTE: some of the variables in this file have a \".\" sign to represent a missing.\n")
+ cat(" The import of the .csv file will not work for other softwares than SPSS.\n\n")
+ }
+
+ formats <- all(csvformats != "")
+ }
+
+ ## TO check if any of the existing metadata variables is not found in the CSV data file
+ }
+
+ stringvars <- lapply(intrnlbls$vallab, function(x) {
+ all(is.character(x[[1]]))
+ })
+
+
+ if (missing(outfile)) {
+ if (grepl("\"", intrnlbls_objname)) {
+ outfile <- readline("Name for the setup file:\n")
+ }
+ else {
+ outfile <- intrnlbls_objname
+ }
+ }
+
+ uniqueList <- lapply(unique(intrnlbls$vallab), function(uniques) {
+ vars <- sapply(names(intrnlbls$vallab),
+ function(x) {
+ ifelse(length(intrnlbls$vallab[[x]]) == length(uniques),
+ all(names(intrnlbls$vallab[[x]]) == names(uniques)), FALSE)
+ })
+ return(names(vars[vars]))
+ })
+
+
+ if (missing(miss)) {
+ if (trymiss) {
+ miss <- c("DK/NA", "DK/NO", "DK", "NA", "N/A", "N.A.", "Not answered",
+ "Don't know", "(Don't know)", "No answer", "No opinion",
+ "Not applicable", "Not relevant", "Refused", "(Refused)",
+ "Refused / no answer", "(Refused / no answer)",
+ "Can't say", "Don't know / Can't say")
+ }
+ }
+
+
+ if (type == "SPSS" | type == "all") {
+ intrnlbls2 <- intrnlbls
+ printMISSING <- FALSE
+
+ if (!file.exists("Setup files")) {
+ dir.create("Setup files")
+ }
+
+ if (!file.exists(file.path("Setup files", "SPSS"))) {
+ dir.create(file.path("Setup files", "SPSS"))
+ }
+
+
+ currentdir <- getwd()
+ setwd(file.path("Setup files", "SPSS"))
+ sink(ifelse(length(grep("\\.sps", outfile)) > 0, outfile, paste(outfile, ".sps", sep="")))
+
+ cat("* ------------------------------------------------------------------------------", enter, enter,
+ "* --- CONFIGURATION SECTION - START ---", enter, enter, enter, sep="")
+
+ if (formats) {
+ cat("* The following command should contain the complete path and", enter,
+ "* name of the .csv file to be read (e.g. \"C:/CIS 2008/Data/ALL.csv\")", enter,
+ "* Change CSV_DATA_PATH to your filename, below:", enter, enter,
+ "FILE HANDLE csvpath /NAME=\"CSV_DATA_PATH\" .", enter, enter, enter, sep="")
+ }
+
+ cat("* The following command should contain the complete path and", enter,
+ "* name of the .sav file to be saved (e.g. \"C:/CIS 2008/Data/ALL.sav\")", enter,
+ "* Change SAV_DATA_PATH to your filename, below:", enter, enter,
+ "FILE HANDLE savfile /NAME=\"SAV_DATA_PATH\" .", enter, enter, enter,
+ "* --- CONFIGURATION SECTION - END ---", enter, enter,
+ "* ------------------------------------------------------------------------------", enter, enter, enter, enter,
+ "* There should be nothing to change below this line", enter,
+ "* ------------------------------------------------------------------------------", enter, enter, enter, enter, sep="")
+
+ if (formats) {
+ cat("* -------------- Start Definition Macro --------------", enter, enter,
+ "SET LOCALE = 'English' .", enter,
+ "SHOW LOCALE .", enter, enter, # SET DECIMAL = COMMA . * (might be another idea)
+ "* -------------- Read Raw Data --------------", enter, enter,
+ "GET DATA", enter,
+ " /TYPE=TXT", enter,
+ " /FILE=csvpath", enter,
+ " /DELCASE=LINE", enter,
+ " /DELIMITERS=\"", ifelse(delimiter == "\t", "\\t", delimiter), "\"", enter,
[TRUNCATED]
To get the complete diff run:
svnlook diff /svnroot/qca -r 34
More information about the Qca-commits
mailing list