[R-gregmisc-commits] r2173 - in pkg/gtools: R man
noreply at r-forge.r-project.org
noreply at r-forge.r-project.org
Tue Jun 19 18:43:06 CEST 2018
Author: warnes
Date: 2018-06-19 18:43:05 +0200 (Tue, 19 Jun 2018)
New Revision: 2173
Modified:
pkg/gtools/R/baseOf.R
pkg/gtools/man/baseOf.Rd
Log:
Improvements to baseOf() function.
Modified: pkg/gtools/R/baseOf.R
===================================================================
--- pkg/gtools/R/baseOf.R 2017-08-23 23:03:36 UTC (rev 2172)
+++ pkg/gtools/R/baseOf.R 2018-06-19 16:43:05 UTC (rev 2173)
@@ -1,32 +1,73 @@
+# Transform integer to array of digits in specified
+baseOf <- function(v,
+ b=10,
+ l=1)
+{
+ if (is.null(v))
+ stop("v is null")
+ if(length(v)==0)
+ return(integer(0))
-# transform base
-#
-# This function rewrites regular integer numbers as an array of its digits.
-# The base of the numbering scheme may be changed away from 10,
-# which defines our decimal system, to any other integer value. For
-# b=2, the number is returned in the dual system. The least significant
-# digit has the highest index in the array, i.e. it appears on the right.
-#
-# v = value of base 10 to be transformed
-# b = new base
-# l = minimal length of returned array (default is 1)
-# return value: array of factors, highest exponent first
-baseOf<-function(v,b=10,l=1) {
- remainder<-v
- i<-l
- ret<-NULL
- while(remainder>0 || i>0) {
- #print(paste("i=",i," remainder=",remainder))
- m<-remainder%%b
- if (is.null(ret)) {
- ret<-m
- }
- else {
- ret<-c(m,ret)
- }
- remainder <- remainder %/% b
- i<-i-1
- }
- return(ret)
+ if(any(as.integer(v) != v))
+ stop("non-integer value(s) provided for v.")
+
+ if (length(v) > 1)
+ {
+ # this returns a list which may have vectors of varying lenths
+ val.list <- lapply(X=v, FUN=baseOf.inner, b=b, l=l)
+ longest <- max(sapply(val.list, length))
+
+ # call again, forcing all elements to have the same lenth
+ retval <- t(sapply(X=v, FUN=baseOf.inner, b=b, l=longest))
+
+ # add informative row and column names
+ rownames(retval) <- paste0('v.', v)
+ colnames(retval) <- paste0('b.', c(0, b^(1: (longest- 1) ) ) )
+
+ retval
+ }
+ else
+ retval <- baseOf.inner(v=v, b=b, l=l)
+
+ retval
}
-
+
+
+# Transform integer to array of digits in specified
+baseOf.inner <- function(v,
+ b=10,
+ l=1)
+{
+ if (is.na(v))
+ return(rep(NA, l))
+
+ if(v==0)
+ return(rep(0, l))
+
+ remainder <- v
+ i <- l
+ ret <- NULL
+ while(remainder > 0 || i >0)
+ {
+ #print(paste("i=",i," remainder=",remainder))
+ m <- remainder%%b
+ if (is.null(ret))
+ {
+ ret <- m
+ }
+ else
+ {
+ ret <- c(m,ret)
+ }
+ remainder <- remainder %/% b
+ i <- i-1
+ }
+
+ if(length(ret)>1)
+ names(ret) <- c(0, b^( 1:(length(ret)- 1 ) ) )
+
+ return(ret)
+}
+
+
+
Modified: pkg/gtools/man/baseOf.Rd
===================================================================
--- pkg/gtools/man/baseOf.Rd 2017-08-23 23:03:36 UTC (rev 2172)
+++ pkg/gtools/man/baseOf.Rd 2018-06-19 16:43:05 UTC (rev 2173)
@@ -1,11 +1,11 @@
\name{baseOf}
\alias{baseOf}
-\title{Transform integer to new base}
+\title{Transform an integer to an array of base-n digits}
\description{
-Transforms a given base-10 integer to an array of digits of another base.
+Transform an integer to an array of base-n digits
}
\usage{
-baseOf(v,b=10,l=1)
+baseOf(v, base=10, len=1)
}
\arguments{
\item{v}{
@@ -19,12 +19,33 @@
}
}
\details{
- This function rewrites regular integer numbers as an array of its digits.
+ This function converts the elements of an integer vector as an array of its digits.
The base of the numbering scheme may be changed away from 10,
which defines our decimal system, to any other integer value. For
b=2, the number is returned in the dual system. The least significant
digit has the highest index in the array, i.e. it appears on the right.
The highest exponent is at position 1, i.e. left.
+
+ To write decimal values in another base is very common in computer science.
+ In particular at the basis 2 the then possible values 0 and 1 are often
+ interpreted as logical false or true. And at the very interface to
+ electrical engineering, it is indicacted as an absence or presence of
+ voltage. When several bit values are transported synchronously, then
+ it is common to give every lane of such a data bus a unique 2^x value
+ and interpret it as a number in the dual system. To distinguish 256
+ characters one once needed 8 bit ("byte"). It is the common
+ unit in which larger non-printable data is presented.
+ Because of the many non-printable characters and the difficulty for most humans to
+ memorize an even longer alphabet, it is presented as two half bytes ("nibble")
+ of 4 bit in a hexadecimal presentation. Example code is shown below.
+
+ For statisticians, it is more likely to use bit representations for
+ hashing. A bit set to 1 (TRUE) at e.g. position 2, 9 or 17 is interpreted
+ as the presence of a particular feature combination of a sample.
+ With baseOf, you can refer to the bit combination as a number, which
+ is more easily and more efficiently dealt with than with an array of
+ binary values. The example code presents a counter of combinations of
+ features which may be interpreted as a Venn diagram.
}
\author{Steffen Moeller \email{moeller at debian.org} }
\examples{
@@ -42,6 +63,21 @@
paste(c(0:9,LETTERS)[baseOf(123,16)],collapse="")
# decimal representation but filling leading zeroes
baseOf(123,l=5)
+# and converting that back
+sum(2^(4:0)*baseOf(123,l=5))
+# hashing and a tabular venn diagram derived from it
+m<-matrix(sample(c(FALSE,TRUE),replace=TRUE,size=300),ncol=4)
+colnames(m)<-c("strong","colorful","nice","humorous")
+names(dimnames(m)) <- c("samples","features")
+head(m)
+m.val <- apply(m,1,function(X){return(sum(2^((ncol(m)-1):0)*X))})
+m.val.rle <- rle(sort(m.val))
+m.counts <- cbind(t(baseOf(m.val.rle$value,b=2,l=ncol(m))),
+ m.val.rle$length)
+colnames(m.counts)<- c(colnames(m),"num")
+rownames(m.counts)<- apply(m.counts[,1:ncol(m)],1,paste,collapse="")
+m.counts[1==m.counts[,"nice"]&1==m.counts[,"humorous"],,drop=F]
+m.counts[,"num",drop=T]
}
\keyword{base}
More information about the R-gregmisc-commits
mailing list