[Rqda-commits] r49 - in pkg: . R man

Wed Dec 17 17:31:57 CET 2008

Author: wincent
Date: 2008-12-17 17:31:57 +0100 (Wed, 17 Dec 2008)
New Revision: 49

Modified:
   pkg/ChangeLog
   pkg/R/Coding_Buttons.R
   pkg/R/relation.R
   pkg/R/utils.R
   pkg/TODO
   pkg/man/GetCodingTable.rd
   pkg/man/relation.rd
Log:


Modified: pkg/ChangeLog
===================================================================

--- pkg/ChangeLog	2008-12-16 01:04:35 UTC (rev 48)
+++ pkg/ChangeLog	2008-12-17 16:31:57 UTC (rev 49)
@@ -1,3 +1,7 @@
+2008-12-17
+	* Improve the relation function and revise MarkCodeFun accordingly.
+	* New function of CrossTable for inter-relationship between codes (The name of it may change in the future).
+
 2008-12-15
 	* Add help of GetCodingTable.
 	* Fix bugs of "Web Search-Baidu" and "Web Search-Sohu" of the case popup menu (now works for UTF-8 locale).

Modified: pkg/R/Coding_Buttons.R
===================================================================
--- pkg/R/Coding_Buttons.R	2008-12-16 01:04:35 UTC (rev 48)
+++ pkg/R/Coding_Buttons.R	2008-12-17 16:31:57 UTC (rev 49)
@@ -117,15 +117,19 @@
           Relations <- apply(Exist,1,FUN=function(x) relation(x[c("selfirst","selend")],c(ans$start,ans$end)))
           Exist$Relation <- sapply(Relations,FUN=function(x)x$Relation)
           if (!any(Exist$Relation=="exact")){
+            ## if they are axact, do nothing; -> if they are not exact, do something.
             Exist$WhichMin <- sapply(Relations,FUN=function(x)x$WhichMin)
             Exist$Start <- sapply(Relations,FUN=function(x)x$UnionIndex[1])
             Exist$End <- sapply(Relations,FUN=function(x)x$UnionIndex[2])
             if (all(Exist$Relation=="proximity")){
               success <- dbWriteTable(.rqda$qdacon,"coding",DAT,row.name=FALSE,append=TRUE)
               if (!success) gmessage("Fail to write to database.")
+              ## if there are no overlap in any kind, just write to database; otherwise, pass to else{}.
             } else {
-              del1 <- Exist$WhichMin==2 & Exist$Relation =="inclusion"; del1[is.na(del1)] <- FALSE
-              del2 <- Exist$Relation =="overlap"; del2[is.na(del2)] <- FALSE
+              del1 <- (Exist$Relation =="inclusion" & any(Exist$WhichMin==2,Exist$WhichMax==2))
+              ## if overlap or inclusion  old nested in new]
+              ## then the original coding should be deleted; then write the new coding to table
+              del2 <- Exist$Relation =="overlap"
               del <- (del1 | del2)
               if (any(del)){
                 Sel <- c(min(Exist$Start[del]), max(Exist$End[del]))

Modified: pkg/R/relation.R
===================================================================
--- pkg/R/relation.R	2008-12-16 01:04:35 UTC (rev 48)
+++ pkg/R/relation.R	2008-12-17 16:31:57 UTC (rev 49)
@@ -3,6 +3,7 @@
   ## results:
   ## Relation: type of relation
   ## WhichMin: which argument containts min(c(index1,index2))
+  ## WhichMax: which argument containts max(c(index1,index2))
   ## Distance: The distance between to index when Relation is proximity
   ## the index of the overlap of index1 and index2.
   if ( !is.vector(index1) || !is.vector(index1) ) stop("index1 and index2 must be vector.")
@@ -11,54 +12,81 @@
   if (length(index1)==2 || length(index1)==2){
     Max <- max(c(index1,index2))
     Min <- min(c(index1,index2))
-    ans <- list(Relation=NA,WhichMin=NA,Distance=NA,OverlapIndex=c(NA,NA),UnionIndex=c(NA,NA))
-     if (sum(index1 %in% c(Min,Max))==2 || sum(index2 %in% c(Min,Max))==2) {
-       ans$Relation <- "inclusion"
-       if ((index1[1]==Min && index1[2] == Max)) {
-         ans$WhichMin <- 1
-         ans$OverlapIndex <- index2
-         ans$UnionIndex <- index1
-       }
-       else {
-         ans$WhichMin <- 2
-         ans$OverlapIndex <- index1
-         ans$UnionIndex<- index2
-       }
-       ##if (identical(ans$Index,c(index1,index2)[c(ans$WhichMin*2-1,ans$WhichMin*2)])){
-       ##ans$Relation <- "exact"
-       ##ans$WhichMin <- NULL
-       ##}
-         if (identical(ans$UnionIndex,ans$OverlapIndex)) {
-           ans$Relation <- "exact"
-           ans$WhichMin <- NA
-         }
-     } else {
-       if (min(index1) < min(index2) &&
-           max(index1) > min(index2)) {
-         ans$Relation <- "overlap"
-         ans$WhichMin <- 1
-         ans$OverlapIndex <- c(min(index2),max(index1))
-         ans$UnionIndex <- c(min(index1),max(index2))
-       }
-       if (min(index2) < min(index1) &&
-           max(index2) > min(index1)) {
-         ans$Relation <- "overlap"
-         ans$WhichMin <- 2
-         ans$OverlapIndex<- c(min(index1),max(index2))
-         ans$UnionIndex<- c(min(index2),max(index1))
-       }
-       if (max(index1) <= min(index2)){
-         ans$Relation <- "proximity"
-         ans$WhichMin <- 1
-         ans$Distance <- min(index2) -max(index1)
-       }
-       if (max(index2) <= min(index1)){
-         ans$Relation <- "proximity"
-         ans$WhichMin <- 2
-         ans$Distance <- min(index1) -max(index2)
-       }
-     }
+    ans <- list(Relation=NA,WhichMin=NA,WhichMax=NA, Distance=NA,OverlapIndex=c(NA,NA),UnionIndex=c(NA,NA))
+    ans$WhichMin <- which(c(index1[1],index2[1])==Min)
+    ans$WhichMax <- which(c(index1[2],index2[2])==Max)
+    if (sum(index1 %in% c(Min,Max))==2 || sum(index2 %in% c(Min,Max))==2) {
+      if (length(ans$WhichMin)==2 && length(ans$WhichMax)==2){
+        ans$Relation <- "exact"
+      } else {
+        ans$Relation <- "inclusion"
+        if (intersect(ans$WhichMin,ans$WhichMax)==1) {
+          ans$OverlapIndex <- index2
+          ans$UnionIndex <- index1
+        } else {
+          ans$OverlapIndex <- index1
+          ans$UnionIndex<- index2
+        }
+      }
+    } else {
+      if (min(index1) < min(index2) &&
+          max(index1) > min(index2)) {
+        ans$Relation <- "overlap"
+        ans$OverlapIndex <- c(min(index2),max(index1))
+        ans$UnionIndex <- c(min(index1),max(index2))
+      }
+      if (min(index2) < min(index1) &&
+          max(index2) > min(index1)) {
+        ans$Relation <- "overlap"
+        ans$OverlapIndex<- c(min(index1),max(index2))
+        ans$UnionIndex<- c(min(index2),max(index1))
+      }
+      if (max(index1) <= min(index2)){
+        ans$Relation <- "proximity"
+        ans$Distance <- min(index2) -max(index1)
+      }
+      if (max(index2) <= min(index1)){
+        ans$Relation <- "proximity"
+        ans$Distance <- min(index1) -max(index2)
+      }
+    }
+    if (length(ans$WhichMin)==2) ans$WhichMin <- NA
+    if (length(ans$WhichMax)==2) ans$WhichMax <- NA
     ans
   }
 }
 
+CrossTable <- function(cid1, cid2,data,relation=c("overlap","inclusion","exact","proximity")) 
+{
+  ## cid1 and cid2 is length-1 numeric, represents the id of codes
+  ## data is return by GetCodingTable.
+  ## cid1=1; cid2=2
+  relation <- match.arg(relation)
+  data <- data[data$cid %in% c(cid1,cid2),c("cid","fid","index1","index2")]
+  ans <- 0
+  fidList <- unique(data[data$cid %in% cid1,"fid"])
+  for (fid in fidList) {
+    tmpdat1 <- data[data$fid==fid & data$cid==cid1,,drop=FALSE]
+    tmpdat2 <- data[data$fid==fid & data$cid==cid2,,drop=FALSE]
+    if (nrow(tmpdat2)>0 && nrow(tmpdat1)>0){
+      for(i in seq_len(nrow(tmpdat1))){
+        for(j in seq_len(nrow(tmpdat2))){
+          Relation <- relation(unlist(tmpdat2[j,c("index1","index2")]),unlist(tmpdat1[i,c("index1","index2")]))
+          if (Relation$Relation==relation) {
+            ans <- ans+1
+            ## may add atributes to ans, so to get more information
+          }
+        }
+      }
+    }
+  }
+  ans
+}
+
+#Cross <- function(data=GetCodingTable(),type){
+#cidList <- as.numeric(names(table(data$cid)))
+#ans <- matrix(nrow=length(cidList), ncol=length(cidList),dimnames=list(cidList,cidList))
+#for (
+#CrossTable(1,4,dat)
+#}
+

Modified: pkg/R/utils.R
===================================================================
--- pkg/R/utils.R	2008-12-16 01:04:35 UTC (rev 48)
+++ pkg/R/utils.R	2008-12-17 16:31:57 UTC (rev 49)
@@ -65,6 +65,7 @@
    ##         from coding, freecode, source 
    ##         where coding.status==1 and freecode.id=coding.cid and coding.fid=source.id")
    Codings <- dbGetQuery(.rqda$qdacon,"select coding.cid, coding.fid, freecode.name as codename, source.name as filename,
+                                       coding.selfirst as index1, coding.selend as index2,
                                        coding.selend - coding.selfirst as CodingLength
                                       from coding left join freecode on (coding.cid=freecode.id)
                                                   left join source on (coding.fid=source.id) 

Modified: pkg/TODO
===================================================================
--- pkg/TODO	2008-12-16 01:04:35 UTC (rev 48)
+++ pkg/TODO	2008-12-17 16:31:57 UTC (rev 49)
@@ -3,5 +3,3 @@
 Merge *rqda from different coders
 
 inter-coder reliability 
-
-export coding to html
\ No newline at end of file

Modified: pkg/man/GetCodingTable.rd
===================================================================
--- pkg/man/GetCodingTable.rd	2008-12-16 01:04:35 UTC (rev 48)
+++ pkg/man/GetCodingTable.rd	2008-12-17 16:31:57 UTC (rev 49)
@@ -18,6 +18,8 @@
   \item{codename }{Code name, in accordance with cid}
   \item{filename }{File name, in accordance with fid}
   \item{CodingLength }{The number of characters in the coding}
+  \item{index1}{beginning index of a coding}
+  \item{index2}{end index of a coding}
 }
 \author{ HUANG Ronggui}
 %\seealso{ ~~objects to See Also as \code{\link{help}}, ~~~ }

Modified: pkg/man/relation.rd
===================================================================
--- pkg/man/relation.rd	2008-12-16 01:04:35 UTC (rev 48)
+++ pkg/man/relation.rd	2008-12-17 16:31:57 UTC (rev 49)
@@ -21,7 +21,7 @@
 nor inclusion).
 }
 \value{
-  A 5-element list:
+  A 6-element list:
   \item{Relation}{Length-1 character, standing for the type of
     relation. It may be one of inclusion, overlap, exact or proximity.}
   \item{OverlapIndex}{Length-2 vector, the index of overlapping between
@@ -29,10 +29,10 @@
   \item{UnionIndex}{Length-2 vector, the index of union of the two
     coding indexes. It is c(NA,NA) when relation is proximity.}
   \item{Distance}{Distance of two coding indexes. It is NA when relation is not proximity.}
-  \item{WhichMin}{Is is 1 when index1 has the minimum index, 2 when
-    index2 has the minimum index. When they have the same minimum
-    index,it is 1 when index1 has the max index, it is 2 when index2 has
-    the max index. It is NA when relation is Exact.}
+  \item{WhichMin}{Which argument (index1 or index2) has the minimum
+    value. If both have the same minmum value, return NA.}
+  \item{WhichMax}{Which argument (index1 or index2) has the maximum
+    value. If both have the same maxmum value, return NA.}
 }
 \author{ HUANG Ronggui}
 %\seealso{ ~~objects to See Also as \code{\link{help}}, ~~~ }
@@ -42,7 +42,7 @@
 relation(c(10,40),c(20,80)) # overlap
 relation(c(10,20),c(30,50)) # proximity with distance of 10
 relation(c(10,20),c(10,20)) # exact
-relation(c(10,20),c(10,30)) # WhichMin is 2
+relation(c(10,20),c(10,30)) # WhichMin is c(1,2)
 }
 }
 % Add one or more standard keywords, see file 'KEYWORDS' in the