[GenABEL-dev] [Genabel-commits] r704 - in pkg/GenABEL: . R inst/unitTests man tests

Yurii Aulchenko yurii.aulchenko at gmail.com
Thu Mar 31 02:00:24 CEST 2011


Dear All,

In this commit I have added patch of Nicola Pirastu + regression
tests. All works fine, R CMD check GenABEL passed. I close bugs
[#1184], [#1185], [#1259]. This is indeed a great patch, which kills
three bugs at once! Many thanks, Nicola!

We should keep in mind to update the topics on forum.genabel.org
adding a message that the problem is fixed in 1.6-6 (now @RForge),
which, when released on CRAN, will contain Nicola's patch.

Nicola, you are the first one to contribute a patch to genabel-devel
list, and I wonder if you would find it possible to draft a small
"HOWTO make and contribute a GenABEL project patch"?

best wishes,
Yurii

Details:

Committing the patch of Nicola Pirastu

http://lists.r-forge.r-project.org/pipermail/genabel-devel/2011-March/000182.html

http://lists.r-forge.r-project.org/pipermail/genabel-devel/2011-March/000182.html


> --- pkg/GenABEL/CHANGES.LOG     2011-03-30 16:33:24 UTC (rev 703)
> +++ pkg/GenABEL/CHANGES.LOG     2011-03-30 23:54:07 UTC (rev 704)
> @@ -1,5 +1,13 @@
> -*** v. 1.6-6 (2011.03.22)
> +*** v. 1.6-6 (2011.03.31)
>
> +Applied the patch of Nicola Pirastu
> +http://lists.r-forge.r-project.org/pipermail/genabel-devel/2011-March/000182.html
> +to descriptives.trait. Added RUnit regression tests, updated
> +documentation. Bugs fixed: [#1184], [#1185], [#1259]
> +

> Modified: pkg/GenABEL/R/descriptives.trait.R
> ===================================================================
> --- pkg/GenABEL/R/descriptives.trait.R  2011-03-30 16:33:24 UTC (rev 703)
> +++ pkg/GenABEL/R/descriptives.trait.R  2011-03-30 23:54:07 UTC (rev 704)
> @@ -1,60 +1,96 @@
> -"descriptives.trait" <-
> -function (data,subset,file,by.var=NULL,digits=3) {
> -       if (missing(data)) stop("data argument must be provided")
> -       if (is(data,"gwaa.data")) data <- data at phdata
> -       if (!is(data,"data.frame")) stop("data argument must be of gwaa.data or data.frame-class")
> -       len <- dim(data)[1]
> -       ntra <- dim(data)[2]
> -       if (!missing(subset)) data <- data[subset,]
> -       if (!is.null(by.var)) {
> -               svar <- by.var
> -               if (length(levels(factor(svar)))!=2) stop("The by.var argument should contain a binary variable")
> -               out <- matrix(data=NA,nrow=ntra,ncol=9)
> -               lvls <- levels(factor(svar))
> -               for (i in (1:ntra)) {
> -                       ctrao <- data[,i]
> -                       if (!is.numeric(ctrao) | all(ctrao==svar)) {
> -                               ctra <- ctrao[which(svar == lvls[1])]
> -                               out[i,1] <- length(ctra) - sum(is.na(ctra))
> -                               ctra <- ctrao[which(svar == lvls[2])]
> -                               out[i,4] <- length(ctra) - sum(is.na(ctra))
> -                       } else {
> -                               ctra <- ctrao[which(svar == lvls[1])]
> -                               out[i,1] <- length(ctra) - sum(is.na(ctra))
> -                               out[i,2] <- mean(ctra,na.rm=TRUE)
> -                               out[i,3] <- sd(ctra,na.rm=TRUE)
> -                               ctra <- ctrao[which(svar == lvls[2])]
> -                               out[i,4] <- length(ctra) - sum(is.na(ctra))
> -                               out[i,5] <- mean(ctra,na.rm=TRUE)
> -                               out[i,6] <- sd(ctra,na.rm=TRUE)
> -                               out[i,7] <- t.test(ctrao ~ svar)$p.value
> -                               out[i,8] <- kruskal.test(ctrao ~ svar)$p.value
> -                               clv <- length(unique(ctrao))
> -                               if (clv>1 & clv<5) out[i,9] <- fisher.test(ctrao,svar)$p.value
> -                       }
> -               }
> -       } else {
> -               out <- matrix(data=NA,nrow=ntra,ncol=3)
> -               for (i in (1:ntra)) {
> -                       ctra <- data[,i]
> -                       out[i,1] <- length(ctra) - sum(is.na(ctra))
> -                       if (!is.numeric(ctra)) next;
> -                       out[i,2] <- mean(ctra,na.rm=TRUE)
> -                       out[i,3] <- sd(ctra,na.rm=TRUE)
> -               }
> -       }
> -       out <- round(out,digits=digits)
> -       out <- data.frame(out)
> -       rownames(out) <- colnames(data)
> -       if (is.null(by.var))
> -               colnames(out) <- c("No","Mean","SD")
> -       else
> -               colnames(out) <- c(paste("No(by.var=",lvls[1],")",sep=""),"Mean","SD",paste("No(by.var=",lvls[2],")",sep=""),"Mean","SD","Ptt","Pkw","Pexact")
> -       if (!missing(file)) {
> -               cat("\t",file=file,sep="")
> -               cat(colnames(out),file=file,sep="\t",append=TRUE)
> -               cat("\n",file=file,sep="",append=TRUE)
> -               write.table(out,file=file,sep="\t",append=T,col.names=FALSE)
> -       }
> -       out
> +descriptives.trait <- function (data, subset, file, by.var = NULL, digits = 3)
> +{
> +    if (missing(data))
> +        stop("data argument must be provided")
> +    if (is(data, "gwaa.data"))
> +        data <- data at phdata
> +    if (!is(data, "data.frame"))
> +        stop("data argument must be of gwaa.data or data.frame-class")
> +    len <- dim(data)[1]
> +    ntra <- dim(data)[2]
> +    if (!missing(subset))
> +        data <- data[subset, ]
> +    if (!is.null(by.var)) {
> +       svar <- by.var
> +        if(class(by.var)=="formula"){
> +           svar<-data[,as.character(by.var)[2]]
> +        }
> +       if(is.character(by.var) & length(by.var==1)){
> +          svar<-data[,by.var]
> +       }
> +        if (length(levels(factor(svar))) != 2)
> +            stop("The by.var argument should contain a binary variable")
> +        out <- matrix(data = NA, nrow = ntra, ncol = 9)
> +        lvls <- levels(factor(svar))
> +        for (i in (1:ntra)) {
> +            ctrao <- data[, i]
> +            if (!is.numeric(ctrao) | all(ctrao == svar,na.rm=T)) {
> +                ctra <- ctrao[svar == lvls[1]]
> +                out[i, 1] <- length(ctra) - sum(is.na(ctra))
> +                ctra <- ctrao[svar == lvls[2]]
> +                out[i, 4] <- length(ctra) - sum(is.na(ctra))
> +            }
> +            else {
> +                ctra <- ctrao[svar == lvls[1]]
> +                out[i, 1] <- length(ctra) - sum(is.na(ctra))
> +                out[i, 2] <- mean(ctra, na.rm = TRUE)
> +                out[i, 3] <- sd(ctra, na.rm = TRUE)
> +                ctra <- ctrao[svar == lvls[2]]
> +                out[i, 4] <- length(ctra) - sum(is.na(ctra))
> +                out[i, 5] <- mean(ctra, na.rm = TRUE)
> +                out[i, 6] <- sd(ctra, na.rm = TRUE)
> +                tmp<-try(t.test(ctrao ~ svar)$p.value)
> +                if(class(tmp)=="numeric"){
> +                   out[i, 7] <- tmp
> +                }
> +                tmp<-try(kruskal.test(ctrao ~ svar)$p.value)
> +                if(class(tmp)=="numeric"){
> +                   out[i, 8] <- tmp
> +                }
> +                clv <- length(unique(ctrao))
> +                if (clv > 1 & clv < 5)
> +                  out[i, 9] <- fisher.test(ctrao, svar)$p.value
> +            }
> +        }
> +    }
> +    else {
> +        out <- matrix(data = NA, nrow = ntra, ncol = 3)
> +        for (i in (1:ntra)) {
> +            ctra <- data[, i]
> +            out[i, 1] <- length(ctra) - sum(is.na(ctra))
> +            if (!is.numeric(ctra))
> +                next
> +            out[i, 2] <- mean(ctra, na.rm = TRUE)
> +            out[i, 3] <- sd(ctra, na.rm = TRUE)
> +        }
> +    }
> +    out <- round(out, digits = digits)
> +    out <- data.frame(out)
> +    rownames(out) <- colnames(data)
> +    if (is.null(by.var))
> +        colnames(out) <- c("No", "Mean", "SD")
> +    else colnames(out) <- c(paste("No(by.var=", lvls[1], ")",
> +        sep = ""), "Mean", "SD", paste("No(by.var=", lvls[2],
> +        ")", sep = ""), "Mean", "SD", "Ptt", "Pkw", "Pexact")
> +    if (!missing(file)) {
> +        cat("\t", file = file, sep = "")
> +        cat(colnames(out), file = file, sep = "\t", append = TRUE)
> +        cat("\n", file = file, sep = "", append = TRUE)
> +        write.table(out, file = file, sep = "\t", append = T,
> +            col.names = FALSE)
> +    }
> +    out
>  }
> +

Added regression tests:


> Added: pkg/GenABEL/inst/unitTests/runit.descriptives.trait.R
> ===================================================================
> --- pkg/GenABEL/inst/unitTests/runit.descriptives.trait.R                               (rev 0)
> +++ pkg/GenABEL/inst/unitTests/runit.descriptives.trait.R       2011-03-30 23:54:07 UTC (rev 704)
> @@ -0,0 +1,46 @@
> +### --- Test setup ---
> +#
> +# regression tests
> +#
> +
> +if(FALSE) {
> +    ## Not really needed, but can be handy when writing tests
> +    library(RUnit)
> +    library(GenABEL)
> +}
> +
> +### do not run
> +#stop("SKIP THIS TEST")
> +###
> +
> +### ---- common functions and data -----
> +
> +#source(paste("../inst/unitTests/shared_functions.R"))
> +#source(paste(path,"/shared_functions.R",sep=""))
> +
> +### --- Test functions ---
> +
> +test.descriptives.trait <- function()
> +{
> +    data(ge03d2ex)
> +# this works
> +    descriptives.trait(ge03d2ex,by.var=phdata(ge03d2ex)$sex)
> +# bug [#1184]
> +# and this does not!
> +    checkException(descriptives.trait(ge03d2ex,by.var=sex))
> +    attach(phdata(ge03d2ex))
> +    descriptives.trait(ge03d2ex,by.var=sex)
> +    detach(phdata(ge03d2ex))
> +    descriptives.trait(ge03d2ex,by.var="sex")
> +    phdata(ge03d2ex)$sex[2] <- NA
> +# bug [#1185]
> +# and this does not!
> +    descriptives.trait(ge03d2ex,by.var=phdata(ge03d2ex)$sex)
> +    descriptives.trait(ge03d2ex,by.var="sex")
> +# bug [#1259]
> +    convert.snp.illumina(infile="test_markers", outfile="test.raw")
> +    test = load.gwaa.data(pheno="test_phenos", geno="test.raw", force=T)
> +    attach(phdata(test))
> +    descriptives.trait(data=test, by = bt)
> +    detach(phdata(test))
> +}
> \ No newline at end of file
>

Updated documentation:


> Modified: pkg/GenABEL/man/descriptives.trait.Rd
> ===================================================================
> --- pkg/GenABEL/man/descriptives.trait.Rd       2011-03-30 16:33:24 UTC (rev 703)
> +++ pkg/GenABEL/man/descriptives.trait.Rd       2011-03-30 23:54:07 UTC (rev 704)
> @@ -12,7 +12,9 @@
>   \item{subset}{Subset of people to run analysis on.
>                If missing, all people from \code{data} are used for analysis.}
>   \item{file}{A string specifying the name of a file to write the tables to (default is no file otput).}
> -  \item{by.var}{a binary trait; statistics will be produced seprately for the groups and compared}
> +  \item{by.var}{a binary variable or a character scalar specifying the name
> +  of a binary trait in data; statistics will be produced separately for the
> +  groups and compared}
>   \item{digits}{number of digits to be printed}
>  }
>  %\details{
> @@ -31,5 +33,9 @@
>        data(srdta)
>        descriptives.trait(srdta)
>        descriptives.trait(srdta,by.var=srdta at phdata$sex)
> +       descriptives.trait(srdta,by.var="sex")
> +       attach(phdata(srdta))
> +       descriptives.trait(srdta,by.var=sex)
> +       detach(phdata(srdta))
>  }
>  \keyword{distribution}
>


More information about the genabel-devel mailing list