[Genabel-commits] r978 - in pkg/GenABEL: R man

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Thu Oct 11 16:32:30 CEST 2012


Author: nd_0001
Date: 2012-10-11 16:32:29 +0200 (Thu, 11 Oct 2012)
New Revision: 978

Modified:
   pkg/GenABEL/R/GC.R
   pkg/GenABEL/R/GC_ovdom.R
   pkg/GenABEL/R/PGC.R
   pkg/GenABEL/man/GC.Rd
   pkg/GenABEL/man/GC_ovdom.Rd
   pkg/GenABEL/man/PGC.Rd
Log:
Correction of GC functions

Modified: pkg/GenABEL/R/GC.R
===================================================================
--- pkg/GenABEL/R/GC.R	2012-10-11 10:41:09 UTC (rev 977)
+++ pkg/GenABEL/R/GC.R	2012-10-11 14:32:29 UTC (rev 978)
@@ -13,10 +13,11 @@
 #' @param x Model of inheritance (0 for recessive,0.5 for additive, 1 for dominant)
 #' @param index.filter Indexes for variables that will be use for analisis in data vector
 #' @param n size of the sample
+#' @param proportion The proportion of lowest P (Chi2) to be used when
+#'   estimating the inflation factor Lambda for "regress" method only
 #' @param clust For developers only
 #' @param vart0 For developers only
 #' @param tmp For developers only
-#' @param min.p For developers only
 #' @param CA For developers only
 #' @param p.table For developers only
 #' 
@@ -32,8 +33,6 @@
 #' 
 #' @examples
 #' data(ge03d2)
-#' set.seed(1)
-#' ge03d2 <- ge03d2[sample(1:nids(ge03d2),200),1:1000]
 #' qts=mlreg(phdata(ge03d2)$dm2~1,data=ge03d2,gtmode = "dominant")
 #' chi2.1df=results(qts)$chi2.1df
 #' s=summary(ge03d2)
@@ -43,11 +42,12 @@
 #' @keywords htest
 #'
 
-GC = function(data=0,p,x,method = "regress",clust=0,vart0=0,tmp=0,min.p=0.05,CA=FALSE,index.filter=0,n,p.table=0){
+GC = function(data,p,x,method = "regress",n,index.filter=NULL,proportion=1,clust=0,vart0=0,tmp=0,CA=FALSE,p.table=0){
 #
-	if (length(index.filter)<=1){
-			ind.function=(1:length(data))
-		} else ind.function=index.filter
+	if (is.null(index.filter)){
+		ind.function=which(!is.na(data))
+	} else ind.function=index.filter
+	ind.function=ind.function[which(!is.na(data[ind.function]))]
 	#
 	if (!(method=="regress" | method=="median" | method=="ks.test")){
 		print("Error. I do not know this method");
@@ -55,8 +55,6 @@
 	}
 	#
 	if (CA){
-		ololo=min.p
-
 		p.table[1,1,]->p00
 		p.table[1,2,]->p01
 		p.table[1,3,]->p02
@@ -71,7 +69,6 @@
 		p.table[3,2,]->p1
 		p.table[3,3,]->p2
 		p.table[3,4,]->n
-		
 		Zx=0;
 		exeps=0;
 		Dx=(p12-p02)+x*(p11-p01);
@@ -82,7 +79,6 @@
 			Zx[i]=((Dx[i])^2)/(vart); exeps[i]=F}
 			i=i+1;
 		}
-
 		#Zx=CASforVIF(p.table,p,x)
 		inf=which(!is.na(Zx));
 		notinf=which(is.na(Zx));
@@ -139,6 +135,7 @@
 		vector_vif=VIF(p,n,F,K);
 		Zxl=Zx/vector_vif;
 		Zxl=sort(Zxl[ind.function]);
+		Zxl_r=Zxl[1:ntp]
 		if (method=="ks.test"){
 		dMedian=-log(ks.test(Zxl,"pchisq",df=1)$p.value);
 		}
@@ -146,7 +143,7 @@
 		dMedian=abs(qchisq(.5,1)-median(Zxl));
 		}
 		if (method=="regress")
-		{dMedian=sum((Zxl-Chi2)^2);}
+		{dMedian=sum((Zxl_r-Chi2)^2);}
 		#}
 		return(1*dMedian)
 	}
@@ -159,6 +156,7 @@
 		vector_vif=VIF(p,n,F,K);
 		Zxl=Zx/vector_vif;
 		Zxl=sort(Zxl[ind.function]);
+		Zxl_r=Zxl[1:ntp]
 		if (method=="ks.test"){
 		dMedian=-log(ks.test(Zxl,"pchisq",df=1)$p.value);
 		}
@@ -166,45 +164,36 @@
 		dMedian=abs(qchisq(.5,1)-median(Zxl));
 		}
 		if (method=="regress")
-		{dMedian=sum((Zxl-Chi2)^2);}
+		{dMedian=sum((Zxl_r-Chi2)^2);}
 		#}
 		return(1*dMedian)
 	}
 
 	###
-		#Dx=(p12-p02)+x*(p11-p01);
-		F=0.5; K=n[1];
-	#
-	#i=1;
-	#j=0;
-	#	Zx=0;
-	#	while (i<=length(n)){
-	#		vart=((p2[i]+p1[i]*x^2)-(p2[i]+x*p1[i])^2)*((1/n1[i])+(1/n0[i]));
-	#		if (vart==0){j=j+1; Zx[i]=NA;} else{Zx[i]=Dx[i]^2/vart;}
-	#		i=i+1;
-	#	}
+	F=0.5; K=n[1];
 
-
-	#length(Zx)==length(inf)+length(notinf)
-	#N=length(n);
-	#if (j!=0){N=N-j;}
-	N_inf=sum(!is.na(data[ind.function]))
-	Chi2=(0:(N_inf-1))/N_inf;
-	Chi2=qchisq(Chi2,1)
-			#dta <- sort(Zx)
-			#ppp <- ppoints(dta)
-			#Chi2=qchisq(ppp,1);
-
-			#delta=1/N;
-			#x1=0;
-			#i=1;
-			#Chi2=0;
-			#while (i<=N){
-			#	x2=x1+(delta/2);
-			#	Chi2[i]=qchisq(x2,1);
-			#	x1=x1+delta;
-			#	i=i+1;
-			#}
+	#N_inf=sum(!is.na(data[ind.function]))
+	#Chi2=(0:(N_inf-1))/N_inf;
+	#Chi2=qchisq(Chi2,1)
+	data_p <- data[ind.function]
+	if (proportion>1.0 || proportion<=0) 
+		stop("proportion argument should be greater then zero and less than or equal to one")
+	ntp <- round(proportion*length(data_p))
+	if (ntp<1) stop("no valid measurments")
+	if (ntp==1) {
+		warning(paste("One measurment, Lambda = 1 returned"))
+		return(list(estimate=1.0,se=999.99))
+	}
+	if (ntp<10) warning(paste("number of points is too small:",ntp))
+	if (min(data_p)<0) stop("data argument has values <0")
+	if (max(data_p)<=1) {
+		data_p<- qchisq(data_p,1,lower.tail=FALSE)
+	}
+		
+	data_p <- sort(data_p)
+	ppoi <- ppoints(data_p)
+	Chi2 <- sort(qchisq(1-ppoi,1))
+	Chi2 <- Chi2[1:ntp]
 	#####
 	if (clust==1){
 	data.mds0 <- Clust(0)

Modified: pkg/GenABEL/R/GC_ovdom.R
===================================================================
--- pkg/GenABEL/R/GC_ovdom.R	2012-10-11 10:41:09 UTC (rev 977)
+++ pkg/GenABEL/R/GC_ovdom.R	2012-10-11 14:32:29 UTC (rev 978)
@@ -12,6 +12,8 @@
 #' @param p Input vector of allele frequencies
 #' @param index.filter Indexes for variables that will be use for analisis in data vector
 #' @param n size of the sample
+#' @param proportion The proportion of lowest P (Chi2) to be used when
+#'   estimating the inflation factor Lambda for "regress" method only
 #' @param clust For developers only
 #' @param vart0 For developers only
 #' @param tmp For developers only
@@ -28,26 +30,21 @@
 #' 
 #' @examples
 #' data(ge03d2)
-#' set.seed(2)
-#' ge03d2 <- ge03d2[sample(1:nids(ge03d2),250),1:1500]
 #' qts=mlreg(phdata(ge03d2)$dm2~1,data=ge03d2,gtmode = "overdominant")
 #' chi2.1df=results(qts)$chi2.1df
 #' s=summary(ge03d2)
 #' freq=s$Q.2
-#' ### donotrun
-#' \dontrun{
 #' result=GC_ovdom(p=freq,method = "median",data=chi2.1df,n=nids(ge03d2))
-#' }
-#' ### end norun
 #' 
 #' @keywords htest
 #'
 
-GC_ovdom = function(p,method = "regress",clust=0,vart0=0,tmp=0,data=0,index.filter=0,n){
+GC_ovdom = function(data,p,method = "regress",n,index.filter=NULL,proportion=1,clust=0,vart0=0,tmp=0){
 	#
-	if (length(index.filter)<=1){
-			ind.function=(1:length(data))
-		} else ind.function=index.filter
+	if (is.null(index.filter)){
+		ind.function=which(!is.na(data))
+	} else ind.function=index.filter
+	ind.function=ind.function[which(!is.na(data[ind.function]))]
 	#
 	if (!(method=="regress" | method=="median" | method=="ks.test")){
 	print("Error. I do not know this method");
@@ -103,6 +100,7 @@
 		vector_vif=VIF(p,n,F,K);
 		Zxl=Zx/vector_vif;
 		Zxl=sort(Zxl[ind.function]);
+		Zxl_r=Zxl[1:ntp]
 		if (method=="ks.test"){
 		dMedian=-log(ks.test(Zxl,"pchisq",df=1)$p.value);
 		}
@@ -110,7 +108,7 @@
 		dMedian=abs(qchisq(.5,1)-median(Zxl));
 		}
 		if (method=="regress")
-		{dMedian=sum((Zxl-Chi2)^2);}
+		{dMedian=sum((Zxl_r-Chi2)^2);}
 		#}
 		return(1*dMedian)
 	}
@@ -123,6 +121,7 @@
 		vector_vif=VIF(p,n,F,K);
 		Zxl=Zx/vector_vif;
 		Zxl=sort(Zxl[ind.function]);
+		Zxl_r=Zxl[1:ntp]
 		if (method=="ks.test"){
 		dMedian=-log(ks.test(Zxl,"pchisq",df=1)$p.value);
 		}
@@ -130,16 +129,35 @@
 		dMedian=abs(qchisq(.5,1)-median(Zxl));
 		}
 		if (method=="regress")
-		{dMedian=sum((Zxl-Chi2)^2);}
+		{dMedian=sum((Zxl_r-Chi2)^2);}
 		#}
 		return(1*dMedian)
 	}
 
 	F=0.5; K=0.2*n[1];
 	
-	N_inf=sum(!is.na(data[ind.function]))
-	Chi2=(0:(N_inf-1))/N_inf;
-	Chi2=qchisq(Chi2,1)
+	#N_inf=sum(!is.na(data[ind.function]))
+	#Chi2=(0:(N_inf-1))/N_inf;
+	#Chi2=qchisq(Chi2,1)
+	data_p <- data[ind.function]
+	if (proportion>1.0 || proportion<=0) 
+		stop("proportion argument should be greater then zero and less than or equal to one")
+	ntp <- round(proportion*length(data_p))
+	if (ntp<1) stop("no valid measurments")
+	if (ntp==1) {
+		warning(paste("One measurment, Lambda = 1 returned"))
+		return(list(estimate=1.0,se=999.99))
+	}
+	if (ntp<10) warning(paste("number of points is too small:",ntp))
+	if (min(data_p)<0) stop("data argument has values <0")
+	if (max(data_p)<=1) {
+		data_p<- qchisq(data_p,1,lower.tail=FALSE)
+	}
+		
+	data_p <- sort(data_p)
+	ppoi <- ppoints(data_p)
+	Chi2 <- sort(qchisq(1-ppoi,1))
+	Chi2 <- Chi2[1:ntp]
 	x=1;
 	
 	if (clust==1){

Modified: pkg/GenABEL/R/PGC.R
===================================================================
--- pkg/GenABEL/R/PGC.R	2012-10-11 10:41:09 UTC (rev 977)
+++ pkg/GenABEL/R/PGC.R	2012-10-11 14:32:29 UTC (rev 978)
@@ -14,8 +14,10 @@
 #' @param pol.d Polinom degree
 #' @param plot If true, functon makes plot of lambda from allele frequencies
 #' @param start.corr For regress method use it only when you want to make calculations faster
-#' @param index.filter Index of variables in data vector, that will be used in analysis, 
+#' @param index.filter Index of variables in data vector, that will be used in analysis 
 #' if zero - all variables will be used
+#' @param proportion The proportion of lowest P (Chi2) to be used when
+#'   estimating the inflation factor Lambda for "regress" method only
 #' 
 #' @return A list with elements
 #' \item{data}{Output vector corrected Chi square statistic}
@@ -34,10 +36,11 @@
 #' @keywords htest
 #' 
 
-PGC = function(data,method="regress",p,df, pol.d=3, plot=TRUE, index.filter=0,start.corr=FALSE){
-	if (length(index.filter)<=1){
-		ind.function=(1:length(data))
+PGC = function(data,method="regress",p,df, pol.d=3, plot=TRUE, index.filter=NULL,start.corr=FALSE,proportion=1){
+	if (is.null(index.filter)){
+		ind.function=which(!is.na(data))
 	} else ind.function=index.filter
+	ind.function=ind.function[which(!is.na(data[ind.function]))]
     pol.m = function(p, b, pol.d){
         out=0;
         for (i in (1:pol.d)){
@@ -54,7 +57,26 @@
      if (start.corr){
           data=data/(1*median(data[ind.function],na.rm=T)/qchisq(.5,df));
     }
-     N=sum(!is.na(data[ind.function]))
+    #N=sum(!is.na(data[ind.function]))
+	data_p <- data[ind.function]
+	if (proportion>1.0 || proportion<=0) 
+		stop("proportion argument should be greater then zero and less than or equal to one")
+	ntp <- round(proportion*length(data_p))
+	if (ntp<1) stop("no valid measurments")
+	if (ntp==1) {
+		warning(paste("One measurment, Lambda = 1 returned"))
+		return(list(estimate=1.0,se=999.99))
+	}
+	if (ntp<10) warning(paste("number of points is too small:",ntp))
+	if (min(data_p)<0) stop("data argument has values <0")
+	if (max(data_p)<=1) {
+		data_p<- qchisq(data_p,1,lower.tail=FALSE)
+	}
+		
+	data_p <- sort(data_p)
+	ppoi <- ppoints(data_p)
+	Chi2 <- sort(qchisq(1-ppoi,df))
+	Chi2 <- Chi2[1:ntp]
      #dta <- sort(data)
      #ppp <- ppoints(dta)
      #Chi2=qchisq(ppp,df);
@@ -69,10 +91,10 @@
     #x1=x1+delta;
     #i=i+1;
     #}
-    Chi2=(0:(N-1))/N;
-    Chi2=qchisq(Chi2,df)
-    
-     ppp=seq(from=min(p),to=max(p),length=1000)
+	
+    #Chi2=(0:(N-1))/N;
+    #Chi2=qchisq(Chi2,df)
+    ppp=seq(from=min(p),to=max(p),length=1000)
     #inf=which(!is.na(data))
     f2 = function(b){
         Zx=data
@@ -95,6 +117,7 @@
         #}
         Zx=Zx/(pol.m(p,b,pol.d))     
         Zxl=sort(Zx[ind.function])
+		Zxl_r=Zxl[1:ntp]
         if (method=="ks.test"){
 			F=-log(ks.test(Zxl,"pchisq",df=df)$p.value);
         }
@@ -102,7 +125,7 @@
 			F=abs(median(Zxl)-qchisq(.5,df))
         }
         if (method=="regress"){
-			F=sum((Zxl-Chi2)^2);
+			F=sum((Zxl_r-Chi2)^2);
         }
         return(F);
     }

Modified: pkg/GenABEL/man/GC.Rd
===================================================================
--- pkg/GenABEL/man/GC.Rd	2012-10-11 10:41:09 UTC (rev 977)
+++ pkg/GenABEL/man/GC.Rd	2012-10-11 14:32:29 UTC (rev 978)
@@ -2,9 +2,9 @@
 \alias{GC}
 \title{Genomic control for various model of inheritance using VIF}
 \usage{
-  GC(data = 0, p, x, method = "regress", clust = 0,
-    vart0 = 0, tmp = 0, min.p = 0.05, CA = FALSE,
-    index.filter = 0, n, p.table = 0)
+  GC(data, p, x, method = "regress", n,
+    index.filter = NULL, proportion = 1, clust = 0,
+    vart0 = 0, tmp = 0, CA = FALSE, p.table = 0)
 }
 \arguments{
   \item{data}{Input vector of Chi square statistic}
@@ -22,14 +22,16 @@
 
   \item{n}{size of the sample}
 
+  \item{proportion}{The proportion of lowest P (Chi2) to be
+  used when estimating the inflation factor Lambda for
+  "regress" method only}
+
   \item{clust}{For developers only}
 
   \item{vart0}{For developers only}
 
   \item{tmp}{For developers only}
 
-  \item{min.p}{For developers only}
-
   \item{CA}{For developers only}
 
   \item{p.table}{For developers only}
@@ -49,8 +51,6 @@
 }
 \examples{
 data(ge03d2)
-set.seed(1)
-ge03d2 <- ge03d2[sample(1:nids(ge03d2),200),1:1000]
 qts=mlreg(phdata(ge03d2)$dm2~1,data=ge03d2,gtmode = "dominant")
 chi2.1df=results(qts)$chi2.1df
 s=summary(ge03d2)

Modified: pkg/GenABEL/man/GC_ovdom.Rd
===================================================================
--- pkg/GenABEL/man/GC_ovdom.Rd	2012-10-11 10:41:09 UTC (rev 977)
+++ pkg/GenABEL/man/GC_ovdom.Rd	2012-10-11 14:32:29 UTC (rev 978)
@@ -2,8 +2,9 @@
 \alias{GC_ovdom}
 \title{Genomic control for overdimunant model of inheritance using VIF}
 \usage{
-  GC_ovdom(p, method = "regress", clust = 0, vart0 = 0,
-    tmp = 0, data = 0, index.filter = 0, n)
+  GC_ovdom(data, p, method = "regress", n,
+    index.filter = NULL, proportion = 1, clust = 0,
+    vart0 = 0, tmp = 0)
 }
 \arguments{
   \item{data}{Input vector of Chi square statistic}
@@ -18,6 +19,10 @@
 
   \item{n}{size of the sample}
 
+  \item{proportion}{The proportion of lowest P (Chi2) to be
+  used when estimating the inflation factor Lambda for
+  "regress" method only}
+
   \item{clust}{For developers only}
 
   \item{vart0}{For developers only}
@@ -39,18 +44,12 @@
 }
 \examples{
 data(ge03d2)
-set.seed(2)
-ge03d2 <- ge03d2[sample(1:nids(ge03d2),250),1:1500]
 qts=mlreg(phdata(ge03d2)$dm2~1,data=ge03d2,gtmode = "overdominant")
 chi2.1df=results(qts)$chi2.1df
 s=summary(ge03d2)
 freq=s$Q.2
-### donotrun
-\dontrun{
 result=GC_ovdom(p=freq,method = "median",data=chi2.1df,n=nids(ge03d2))
 }
-### end norun
-}
 \author{
   Yakov Tsepilov
 }

Modified: pkg/GenABEL/man/PGC.Rd
===================================================================
--- pkg/GenABEL/man/PGC.Rd	2012-10-11 10:41:09 UTC (rev 977)
+++ pkg/GenABEL/man/PGC.Rd	2012-10-11 14:32:29 UTC (rev 978)
@@ -3,7 +3,8 @@
 \title{Polynomial genomic control}
 \usage{
   PGC(data, method = "regress", p, df, pol.d = 3,
-    plot = TRUE, index.filter = 0, start.corr = FALSE)
+    plot = TRUE, index.filter = NULL, start.corr = FALSE,
+    proportion = 1)
 }
 \arguments{
   \item{data}{Input vector of Chi square statistic}
@@ -24,8 +25,12 @@
   want to make calculations faster}
 
   \item{index.filter}{Index of variables in data vector,
-  that will be used in analysis, if zero - all variables
+  that will be used in analysis if zero - all variables
   will be used}
+
+  \item{proportion}{The proportion of lowest P (Chi2) to be
+  used when estimating the inflation factor Lambda for
+  "regress" method only}
 }
 \value{
   A list with elements \item{data}{Output vector corrected



More information about the Genabel-commits mailing list