[Vinecopula-commits] r91 - pkg/src

noreply at r-forge.r-project.org noreply at r-forge.r-project.org
Mo Mär 30 20:36:33 CEST 2015


Author: ulf
Date: 2015-03-30 20:36:33 +0200 (Mon, 30 Mar 2015)
New Revision: 91

Modified:
   pkg/src/gof.c
Log:
comments for the goodness-of-fit functionality in C

Modified: pkg/src/gof.c
===================================================================
--- pkg/src/gof.c	2015-03-29 17:29:18 UTC (rev 90)
+++ pkg/src/gof.c	2015-03-30 18:36:33 UTC (rev 91)
@@ -174,8 +174,8 @@
 		{
 			for(j=i;j<(dd+tt);j++)
 			{
-				Dprime[kk] = hess[(j+1)+((dd+tt)*i)-1] + der[(j+1)+((dd+tt)*i)-1];
-				D[kk] = D[kk] + (Dprime[kk]/(double)(*T));
+				Dprime[kk] = hess[(j+1)+((dd+tt)*i)-1] + der[(j+1)+((dd+tt)*i)-1];	// D = H+C
+				D[kk] = D[kk] + (Dprime[kk]/(double)(*T));		// Schaetzer, deswegen durch die Anzahl der Beobachtungen teilen
 				kk++;
 			}
 		}
@@ -189,7 +189,7 @@
 		}
 	} 
 	
-	// Nicht fertig, da hier das Problem D%*%solve(V)%*%t(D) zu l?sen ist
+	// Nicht fertig, da hier das Problem D%*%solve(V)%*%t(D) zu loesen ist
 	
 	// Free memory
 	//free(D);
@@ -245,6 +245,20 @@
   else return  1;
 }
 
+////////////////////////////////////////
+// Calculation of the B_j in Berg and Bakken
+// Input:
+// T, d 		dimensions
+// family,...	RVM objects
+// data			observed data
+// vv vv2		side products of log-likelihood calculation (h-function)
+// calcupdate	which h-functions have to be calculated
+// method		numeric value (1=Breymann, 2=Berg, 3=Berg2; see my paper)
+// alpha		power for the Berg function
+//
+// Output:
+// out			sum of transformed data (PIT) (one step for the Breymann, Berg and Berg2 GOF)
+//////////////////////////////////////////
 
 void Bj(int *T, int* d, int* family, int* maxmat, int* matrix, int* condirect, int* conindirect, double* par, double* par2, double* data, 
 		 double* out, double* vv, double* vv2, int* calcupdate, int* method, int *alpha)
@@ -268,7 +282,7 @@
 				ii += 1;
 			}
 			qsort(u[j],*d,sizeof(double),(void *)comp_nums);
-			ZStar(u[j],d,tmp[j]);		//Transformation von Berg and Bakken (2007)
+			ZStar(u[j],d,tmp[j]);		//Transformation von Berg and Bakken (2007); ordered PIT
 		}
 		else	// Im Fall von Breymann ist es besser keine Transformation zu machen
 		{
@@ -284,11 +298,11 @@
 	{
 		for(i=0;i<*d;i++)
 		{
-			if(*method==1)
+			if(*method==1)	//Breymann has the standard norma quantile function squared as transformation function
 				tmp[t][i]=pow(qnorm(tmp[t][i],0.0,1.0,1,0),2);
-			else if(*method==2)
+			else if(*method==2)  // Berg: absolute value of u-0.5
 				tmp[t][i]=fabs(tmp[t][i]-0.5);
-			else if(*method==3)
+			else if(*method==3)  // Berg2: (u-0.5)^alpha
 				tmp[t][i]=pow(tmp[t][i]-0.5,*alpha);
 			
 			out[t]+=tmp[t][i];
@@ -301,6 +315,18 @@
 }
 
 
+/////////////////////////
+// For bootstrap simulate B_j
+//
+// Input:
+// S			test statistic
+// B			number of bootstrap samples
+// T,d			dimensions
+// method		numeric value (1=Breymann, 2=Berg, 3=Berg2; see my paper)
+// alpha		power for the Berg function
+//
+// p			p-value
+
 void SimulateBj(double* S, int *T, int* d, int* B, int* method, int *alpha, double* p)
 {
 	int i=0, t=0, m=0;
@@ -308,7 +334,7 @@
 	tmp = malloc(*d*sizeof(double));
 	ustar = malloc(*d*sizeof(double));
 	
-	GetRNGstate();
+	GetRNGstate();		// random number generator
 	
 	for(t=0;t<*T;t++)
 	{
@@ -350,6 +376,21 @@
 }
 
 
+////////////////////////////////
+// PIT based GOF tests
+// Breymann, Berg, Berg2
+//
+// Input:
+// data with its dimensions
+// some help variables like vv and vv2
+// method		numeric value (1=Breymann, 2=Berg, 3=Berg2; see my paper)
+// alpha		power for the Berg function
+// statisticName	numeric value (1=Anderson-Darling, 2=Kolmogorov-Smirnov, 3=Cramer-von Mises)
+//
+// Output:
+// statistic		test statistic
+/////////////////////////////////
+
 void gofPIT_AD(int *T, int* d, int* family, int* maxmat, int* matrix, int* condirect, int* conindirect, double* par, double* par2, double* data, 
 		 double* statistic, double* vv, double* vv2, int* calcupdate, int* method, int *alpha, int* B, int *statisticName)
 {
@@ -369,17 +410,17 @@
 	Bj(T, d, family, maxmat, matrix, condirect, conindirect, par, par2, data, S, vv, vv2, calcupdate, method, alpha);
 	
 	// Statistic berechnen
-	if(*B==0)
+	if(*B==0)  // if an asymptotic based test statistic should be returned
 	{
 		if(*method==1)
 		{
 			for(t=0;t<*T;t++)
 			{
-				Bhat[t]=pchisq(S[t],*d,1.0,0.0);
+				Bhat[t]=pchisq(S[t],*d,1.0,0.0);  // for Breymann the asymptotic is known (although it is shown that it is not that correct)
 			}
 		}
 		else
-			CumDist(S, T, T, Bhat);
+			CumDist(S, T, T, Bhat);  // for the other two we need the empirical distribution function
 		
 		if(*statisticName==1)		//Anderson-Darling
 			ADtest(Bhat, T, statistic);
@@ -411,6 +452,14 @@
 }
 
 
+///////////////////////////
+// p-value estimation for the PIT based GOF tests
+//
+// Input:	see above
+//
+// Output:
+// pvalue	bootstrapped p-value
+
 void gofPIT_AD_pvalue(int* T, int* d, int* family, int* maxmat, int* matrix, int* condirect, int* conindirect, double* par, double* par2, double* data,
 		double* statistic, double* vv, double* vv2, int* calcupdate, int* method, int* alpha, int* B, double* pvalue, int *statisticName)
 {
@@ -423,15 +472,15 @@
 	bvv = malloc(*d*(*d)*(*T)*sizeof(double));
 	bvv2 = malloc(*d*(*d)*(*T)*sizeof(double));
 
-	for(m=0;m<*B;m++)
+	for(m=0;m<*B;m++)	// B bootstrap steps
 	{
-		MySample(T, T, f);
+		MySample(T, T, f);  // get a sample from my data
 		for(t=0;t<*T;t++)
 		{
 			for(i=0;i<*d;i++)
 			{
 				bdata[(t+1)+(*T*i)-1]=data[(f[t])+(*T*i)-1];
-				// Forget to change vv annd vv2, too
+				// Forget to change vv and vv2, too
 				for(j=0; j<*d; j++)
 				{
 					bvv[(i+1)+(*d)*j+(*d)*(*d)*t-1] = vv[(i+1)+(*d)*j+(*d)*(*d)*(f[t]-1)-1];	// f[t]-1 because C starts to count at 0
@@ -454,8 +503,15 @@
 }
 
 
-
+/////////////////////////////////////////////
 /* Equal probability sampling; with-replacement case */
+// Input:
+// k		how many samples
+// n		max value of sample 
+//
+// output:
+// y		vector of length k returning the samples
+///////////////////////////////////////
 
 void MySample(int *k, int *n, int *y)
 {
@@ -471,9 +527,18 @@
 
 
 ////////////////////////////////////////////////////////////////
+// GOF test based on empirical copula process
+//
+// Input:
+// data				data
+// t,d				dimensions
+// family,...		RVM object
+// statisticName	numerical value (2=Kolmogorov-Smirnov, 3=Cramer-von Mises)
+//
+// Output:
+// statistic		test statistic
+////////////////////////////////////////////
 
-// gof-test based on empirical copula process
-
 void gofECP(int* T, int* d, int* family, int* maxmat, int* matrix, int* conindirect, double* par, double* par2, double* data, double* statistic, int* statisticName)
 {
 	double *znull, *Chat1, *Chat2, U=0;
@@ -493,7 +558,7 @@
 	SimulateRVine(&T2, d, family, maxmat, matrix, conindirect, par, par2, znull, &U, &takeU);
 	
 	
-	ChatZj(data, data, T, d, T, Chat1);
+	ChatZj(data, data, T, d, T, Chat1);		// empirical copula distribution
 	ChatZj(znull, data, T, d, &T2, Chat2);
 	
 	*statistic=0;
@@ -519,6 +584,10 @@
 }
 
 
+///////////////////////////
+// estimate p-value for ECP based GOF tests
+//////////////////////////////
+
 void gofECP_pvalue(int* T, int* d, int* family, int* maxmat, int* matrix, int* conindirect, double* par, double* par2, double* data, double* statistic, int* statisticName, double* pvalue, int* B)
 {
 	int i=0, m=0, t=0, *f;
@@ -548,10 +617,15 @@
 	free(bdata);
 }
 
-
+////////////////////////////
+// Empirical copula distribution
+//
+// Input:
 // n = dim(u)[1]
 // m = dim(data)[1]
-// Chat vector of length n
+//
+// Output:
+// Chat		empirical copula distribution
 
 void ChatZj(double* data, double* u, int* n, int* d, int* m, double* Chat)
 {
@@ -579,6 +653,11 @@
 	free(helpvar);
 }
 
+
+///////////////////////////
+// Copula distribution of the independence copula
+////////////////////////////
+
 void C_ind(double* data, int* n, int* d, double* C)	
 {
 	int t=0, i=0;
@@ -598,6 +677,11 @@
 
 
 
+////////////////////////////
+// GOF test based on ECP and PIT (ECP2)
+// rest see above
+//////////////////////
+
 void gofECP2(int* T, int* d, int* family, int* maxmat, int* matrix, int* condirect, int* conindirect, double* par, double* par2, double* data, 
 		double* vv, double* vv2, int* calcupdate, double* statistic, int* statisticName)
 {
@@ -647,6 +731,11 @@
 	free(Chat2);
 }
 
+
+///////////////////////////
+// p-value for ECP2
+///////////////////////////
+
 void gofECP2_pvalue(int* T, int* d, int* family, int* maxmat, int* matrix, int* condirect, int* conindirect, double* par, double* par2, double* data, 
 		double* vv, double* vv2, int* calcupdate, double* statistic, double* pvalue, int* statisticName, int* B)
 {



Mehr Informationen über die Mailingliste Vinecopula-commits