[Archetypes-commits] r17 - in pkg: . inst inst/doc

Thu Apr 23 18:44:01 CEST 2009

Author: manuel
Date: 2009-04-23 18:44:01 +0200 (Thu, 23 Apr 2009)
New Revision: 17

Added:
   pkg/inst/doc/rt-002.pdf
   pkg/inst/doc/rt-003.pdf
Modified:
   pkg/DESCRIPTION
   pkg/inst/CITATION
   pkg/inst/doc/archetypes.Rnw
   pkg/inst/doc/archetypes.bib
Log:
1.0 version of vignette.

Modified: pkg/DESCRIPTION
===================================================================

--- pkg/DESCRIPTION	2009-04-20 15:30:08 UTC (rev 16)
+++ pkg/DESCRIPTION	2009-04-23 16:44:01 UTC (rev 17)
@@ -2,7 +2,7 @@
 Type: Package
 Title: Archetypal Analysis
 Version: 1.0
-Date: 2009-04-20
+Date: 2009-04-23
 Depends: nnls (>= 1.1)
 Suggests: MASS, vcd
 Author: Manuel J. A. Eugster <manuel.eugster at stat.uni-muenchen.de>
@@ -15,4 +15,4 @@
   "good" help pages for the everyday use of a package; we refer to the
   vignette for a consistent package illustration.
 License: GPL (>= 2)
-Revision: 16
+Revision: 17

Modified: pkg/inst/CITATION
===================================================================
--- pkg/inst/CITATION	2009-04-20 15:30:08 UTC (rev 16)
+++ pkg/inst/CITATION	2009-04-23 16:44:01 UTC (rev 17)
@@ -1,12 +1,19 @@
 citHeader("To cite package archetypes in publications use:")
 
-citEntry(entry="TechReport",
-	title="{Spider-Man, the Child and the Trickster} -- Archetypal Analsis in {R}",
-	author=personList(person(first="Manuel J. A.", last="Eugster"),
-		person(first="Friedrich", last="Leisch")),
-	year="2008",
-	number="44",
-	url="http://epub.ub.uni-muenchen.de/8270/",
-	textVersion=paste("Manuel J. A. Eugster and Friedrich Leisch. Spider-Man,",
-		"the Child and the Trickster -- Archetypal Analysis in R.",
-		"Technical Report 44, LMU Muenchen, 2008."))
+citEntry(entry="Article",
+	title="{From {S}pider-{M}an to {H}ero -- Archetypal Analsis in {R}",
+	author=personList(as.person("Manuel J. A. Eugster"),
+		as.person("Friedrich Leisch")),
+	journal="Journal of Statistical Software",
+	year="2009",
+	volume="30",
+	number="8",
+	pages="1--24",
+	url="http://www.jstatsoft.org/v30/i08/",
+	
+	textVersion=paste("Manuel J. A. Eugster and Friedrich Leisch.",
+		"From Spider-Man to Hero -- Archetypal Analysis in R.",
+		"Journal of Statistical Software, 30(8), 1-24, 2009.",
+		"http://www.jstatsoft.org/v30/i08/")
+)
+

Modified: pkg/inst/doc/archetypes.Rnw
===================================================================
--- pkg/inst/doc/archetypes.Rnw	2009-04-20 15:30:08 UTC (rev 16)
+++ pkg/inst/doc/archetypes.Rnw	2009-04-23 16:44:01 UTC (rev 17)
@@ -4,123 +4,124 @@
 \usepackage{amsmath}
 \usepackage{float}
 
-\renewcommand{\caption}[1]{}
+\clubpenalty = 10000
+\widowpenalty = 10000 
+\displaywidowpenalty = 10000
 
 \author{Manuel J. A. Eugster\\{\small
 		Ludwig-Maximilians-Universit{\"a}t M{\"u}nchen} \And
         Friedrich Leisch\\{\small Ludwig-Maximilians-Universit{\"a}t
 		M{\"u}nchen}}
 
-\title{{\it Spider-Man, the Child, and the Trickster$^{1}$} --\\
+\title{From Spider-Man to Hero --\\
 	Archetypal Analysis in \proglang{R}}
 
 \Plainauthor{Manuel J. A. Eugster, Friedrich Leisch}
-
-\Plaintitle{Spider-Man, the Child and the Trickster -- Archetypal
-	Analsis in R}
-
-\Shorttitle{Archetypal Analysis in R}
-
+\Plaintitle{From Spider-Man to Hero -- Archetypal Analysis in R}
+\Shorttitle{Archetypal Analysis in \proglang{R}}
 \Keywords{archetypal analysis, convex hull, \proglang{R}}
-
 \Plainkeywords{archetypal analysis, convex hull, R}
 
 \Abstract{
-\begin{center}
-  This article is a (slightly) modified version of
-  \cite{Eugster+Leisch at 2008}.
-\end{center}
-\bigskip
+This introduction to the \proglang{R} package \pkg{archetypes} is a
+(slightly) modified version of \citet{Eugster+Leisch at 2009}, published
+in the {\it Journal of Statistical Software}.
 
 Archetypal analysis has the aim to represent observations in a
 multivariate data set as convex combinations of extremal points. This
 approach was introduced by \citet{Cutler+Breiman at 1994}; they defined
 the concrete problem, laid out the theoretical foundations and
-presented an algorithm written in Fortran, which is available on
-request. In this paper we present the R package \pkg{archetypes} which
-is available on the Comprehensive R Archive Network. The package
+presented an algorithm written in \proglang{Fortran}. In this paper we
+present the \proglang{R} package \pkg{archetypes} which is available
+on the Comprehensive \proglang{R} Archive Network. The package
 provides an implementation of the archetypal analysis algorithm within
-R and different exploratory tools to analyze the algorithm during its
-execution and its final result. The application of the package is
-demonstrated on two examples.
+\proglang{R} and different exploratory tools to analyze the algorithm
+during its execution and its final result. The application of the
+package is demonstrated on two examples.
 }
 
 \Address{
   Manuel J. A. Eugster\\
   Department of Statistics\\
-  Ludwig-Maximilans-Universit{\"a}t M{\"u}nchen\\
+  Ludwig-Maximilians-Universit{\"a}t M{\"u}nchen\\
   80539 Munich, Germany\\
   E-mail: \email{Manuel.Eugster at stat.uni-muenchen.de}\\
   URL: \url{http://www.statistik.lmu.de/~eugster/}
+  
+  \medskip
+  Friedrich Leisch\\
+  Department of Statistics\\
+  Ludwig-Maximilians-Universit{\"a}t M{\"u}nchen\\
+  80539 Munich, Germany\\
+  E-mail: \email{Friedrich.Leisch at R-project.org}\\
+  URL: \url{http://www.statistik.lmu.de/~leisch/}
+
 }
 
-
 %%\usepackage{Sweave} %% already provided by jss.cls
-%%\VignetteIndexEntry{Spider-Man, the Child, and the Trickster -- Archetypal Analysis in R}
+%%\VignetteIndexEntry{From Spider-Man to Hero -- Archetypal Analysis in R}
 %%\VignetteDepends{archetypes}
 %%\VignetteKeywords{archetypal analysis, convex hull, R}
 %%\VignettePackage{archetypes}
 
-
 \SweaveOpts{eps=FALSE, keep.source=TRUE}
 <<echo=FALSE,print=FALSE,results=HIDE>>=
-options(width=80)
-library(archetypes)
+options(width=80, prompt='R> ')
 @ 
 
-
 \begin{document}
 
-\footnotetext[1]{These are examples of archetypes in different
-contexts; see \cite{wikipedia:archetype}.}
-
-
 \section[Introduction]{Introduction\label{intro}}
 
 The \citet{merriam-webster:archetype} defines an archetype as {\it the
-original pattern or model of which all things of the same type are
-representations or copies}. Then, the aim of the archetypal analysis
-is to find ``pure types'', the archetypes, within a set defined in
-a specific context. The concept of archetypes is used in many
-different areas, the set can be defined in terms of literature,
-philosophy, psychology and also statistics. Here, the concrete problem
-is to find a few, not necessarily observed, points (archetypes) in a
-set of multivariate observations such that all the data can be well
-represented as convex combinations of the archetypes. 
+  original pattern or model of which all things of the same type are
+  representations or copies}. The aim of archetypal analysis is to
+find ``pure types'', the archetypes, within a set defined in a
+specific context. The concept of archetypes is used in many different
+areas, the set can be defined in terms of literature, philosophy,
+psychology and also statistics. Here, the concrete problem is to find
+a few, not necessarily observed, points (archetypes) in a set of
+multivariate observations such that all the data can be well
+represented as convex combinations of the archetypes. The title of
+this article illustrates the concept of archetypes on the basis of
+archetypes in literature: the {\it Spider-Man} personality belongs to
+the generic {\it Hero} archetype, and archetypal analysis tries to
+find this coherence.
 
 In statistics archetypal analysis was first introduced by
-\citet{Cutler+Breiman at 1994}. In their paper they laid out the 
+\citet{Cutler+Breiman at 1994}. In their paper they laid out the
 theoretical foundations, defined the concrete problem as a nonlinear
-least squares problem and presented an alternating minimizing algorithm
-to solve it. It has found applications in different areas, with
-recently grown popularity in economics, e.g.,
+least squares problem and presented an alternating minimizing
+algorithm to solve it. It has found applications in different areas,
+with recently grown popularity in economics, e.g.,
 \citet{Li+Wang+Louviere+Carson at 2003} and
-\citet{Porzio+Ragozini+Vistocco at 2006}. In spite of the rising interest
+\citet{Porzio+Ragozini+Vistocco at 2008}. In spite of the rising interest
 in this computer-intensive but numerically sensitive method, no
 ``easy-to-use'' and freely available software package has been
-developed yet; the only implementation is the original Fortran code by
-\citet{Cutler+Breiman at 1994} which is available upon request only. In
-this paper we present the software package \pkg{archetypes} within the
-\proglang{R} statistical environment \citep{R} which provides an
-implementation of the archetypal analysis algorithm. Additionally, the
-package provides exploratory tools to visualize the algorithm during
-the minimization steps and its final result. The newest released
-version of \pkg{archetypes} is always available from the Comprehensive
-R Archive Network at
+developed yet. In this paper we present the software package
+\pkg{archetypes} within the \proglang{R} statistical environment
+\citep{R} which provides an implementation of the archetypal analysis
+algorithm. Additionally, the package provides exploratory tools to
+visualize the algorithm during the minimization steps and its final
+result. The newest released version of \pkg{archetypes} is always
+available from the Comprehensive R Archive Network at
 \url{http://CRAN.R-project.org/package=archetypes}.
 
-The paper is organized as follows: In Section \ref{algorithm} we
+The paper is organized as follows: In Section~\ref{algorithm} we
 outline the archetypal analysis with its different conceptual
 parts. We present the theoretical background as far as we need it for
 a sound introduction of our implementation; for a complete explanation
-we refer to the original paper. Section \ref{pkg} demonstrates how to use
+we refer to the original paper. Section~\ref{pkg} demonstrates how to use
 \pkg{archetypes} based on a simple artificial data set, with details about
-numerical problems and the behavior of the algorithm. In Section \ref{body}
-we show a real word example -- the archetypes of human skeletal diameter 
-measurements. Section \ref{outlook} concludes the article with future
-investigations.
+numerical problems and the behavior of the algorithm. Section~\ref{comp}
+ presents a simulation study to show how the implementation scales
+with numbers of observations, attributes and archetypes. In
+Section~\ref{body} we show a real word example -- the archetypes of
+human skeletal diameter measurements. Section~\ref{outlook} concludes
+the article with future investigations.
 
 
+
 \section[Archetypal analysis]{Archetypal analysis\label{algorithm}}
 
 Given is an $n \times m$ matrix $X$ representing a multivariate data
@@ -130,26 +131,28 @@
 \begin{enumerate}
 	[label=(\arabic{enumi})]
 
-	\item The data are best approximated by combinations of the
+	\item The data are best approximated by convex combinations of the
 	archetypes, i.e., they minimize
 	\[
-		\mbox{RSS} = \|X - Z^T * \alpha\|_2
+		\mbox{RSS} = \|X - Z^{\top} \alpha\|_2
 	\]
 	with $\alpha$, the coefficients of the archetypes, a $k \times n$
 	matrix; the elements are required to be greater equal $0$ and
 	their sum must be $1$, i.e., $\sum_{j=1}^{k} \alpha_{ij} = 1$
-	with $\alpha_{ij} \geq 0$ and $i = 1, \ldots, n$. 
+	with $\alpha_{ij} \geq 0$ and $i = 1, \ldots, n$. $\|\cdot\|_2$ 
+	denotes an appropriate matrix norm.
 
-	\item The archetypes are mixtures of the data points:
+	\item The archetypes are convex combinations of the data points:
 	\[
-		Z = X^T * \beta
+		Z = X^{\top} \beta
 	\]
 	with $\beta$, the coefficients of the data set, a $n \times k$
 	matrix where the elements are required to be greater equal $0$ and
 	their sum must be $1$, i.e., $\sum_{i=1}^{n} \beta_{ji} = 1$
 	with $\beta_{ji} \geq 0$ and $j = 1, \ldots, k$.
 \end{enumerate}
-These two fundamentals define the basic principles of the algorithm:
+These two fundamentals also define the basic principles of the
+estimation algorithm:
 it alternates between finding the best $\alpha$ for given 
 archetypes $Z$ and finding the best archetypes $Z$ for given
 $\alpha$; at each step several convex least squares problems are
@@ -178,24 +181,24 @@
 		the given set of archetypes $Z$: solve $n$ convex least
 		squares problems ($i = 1, \ldots, n$)
 		\[
-			\min_{\alpha_i} \frac{1}{2} \|Z * \alpha_i - X_i \|_2
+			\min_{\alpha_i} \frac{1}{2} \|X_i - Z \alpha_i\|_2
 			\mbox{ subject to} \; \alpha_i \geq 0 \;
-			\mbox{ and } \; \sum_{j=1}^{k} \alpha_{ij} = 1\mbox{,}
+			\mbox{ and } \; \sum_{j=1}^{k} \alpha_{ij} = 1\mbox{.}
 		\]
 		
 		\item\label{alg:loop-zt} Recalculate archetypes $\tilde{Z}$:
-		solve system of linear equations $X = \alpha * \tilde{Z}^T$.
+		solve system of linear equations $X = \alpha \tilde{Z}^{\top}$.
 
 		\item\label{alg:loop-beta} Find best $\beta$ for the
 		given set of archetypes $\tilde{Z}$: solve $k$ convex least
 		squares problems ($j = 1, \ldots, k$)
 		\[
-			\min_{\beta_j} \frac{1}{2} \|X * \beta_j - \tilde{Z}_j \|_2
+			\min_{\beta_j} \frac{1}{2} \|\tilde{Z}_j - X \beta_j\|_2
 			\mbox{ subject to} \; \beta_j \ge 0 \; 
-			\mbox{ and } \; \sum_{i=1}^{n} \beta_{ji} = 1\mbox{,}
+			\mbox{ and } \; \sum_{i=1}^{n} \beta_{ji} = 1\mbox{.}
 		\]
 
-		\item\label{alg:loop-z} Recalculate archetypes $Z$: $Z = X * \beta$.
+		\item\label{alg:loop-z} Recalculate archetypes $Z$: $Z = X \beta$.
 
 		\item\label{alg:loop-rss} Calculate residual sum of squares
 		$\mbox{RSS}$.
@@ -212,46 +215,55 @@
 \ref{alg:loop-alpha} and \ref{alg:loop-beta} several convex
 least squares problems have to be solved. \citet{Cutler+Breiman at 1994}
 use a penalized version of the non-negative least squares algorithm by
-\citet{Lawson+Hanson at 1974}. The penalization is done by adding an
-extra observation, {\it the dummy row}, to $X$ containing a ``huge''
-value $M$ at each element \citep[see, e.g.,][]{Luenberger at 1984}. The
-hugeness of the value $M$ varies from problem to problem and thus can
-be seen as a hyperparameter of the algorithm. Default value in the
-package is $200$.
+\citet{Lawson+Hanson at 1974} \citep[as general reference
+see,e.g.,][]{Luenberger at 1984}. In detail, the problems to solve are of
+the form  $\|u - Tw\|_2$  with $u, w$ vectors and $T$ a matrix, all of
+appropriate dimensions, and the non-negativity and equality
+constraints. The penalized version adds an extra element $M$ to $u$
+and to each observation of $T$; then 
+\[
+	\|u - Tw\|_2 + M^2 \|1 - w\|_2
+\]
+is minimized under non-negativity restrictions. For large
+$M$, the second term dominates and forces the equality constraint to be
+approximately satisfied while maintaining the non-negativity
+constraint. The hugeness of the value $M$ varies from problem to
+problem and thus can be seen as a hyperparameter of the
+algorithm. Default value in the package is $200$.
 
 \paragraph{Solving the system of linear equations:} In Step
 \ref{alg:loop-zt} the system of linear equations
 \[
-	\tilde{Z} = \alpha^{-1} * X
+	\tilde{Z} = \alpha^{-1} X
 \]
 has to be solved. A lot of methods exist, one approach is the
 Moore-Penrose pseudoinverse which provides an approximated unique
-solution by a least square approach: given the pseudoinverse
-$\alpha^+$ of $\alpha$,
+solution by a least squares approach: given the pseudoinverse
+$\alpha^{+}$ of $\alpha$,
 \[
-	\tilde{Z} = \alpha^{+} * X\mbox{,}
+	\tilde{Z} = \alpha^{+} X\mbox{,}
 \]
 is solved. Another approach is the usage of $QR$ decomposition:
 $\alpha = QR$, where $Q$ is an orthogonal and $R$ an upper triangular
 matrix, then
 \[
-	\tilde{Z} = Q^T * X * R^{-1}\mbox{,}
+	\tilde{Z} = Q^{\top} X R^{-1}\mbox{,}
 \]
-is sovled. Default approach in the package is the $QR$ decomposition
+is solved. Default approach in the package is the $QR$ decomposition
 using the \code{solve()} function.
 
 \paragraph{Calculating the residual sum of squares:} In Step
 \ref{alg:loop-rss} the $\mbox{RSS}$ is calculated. It uses the
-spectral norm \citep[see, e.g.,][]{Golub+Loan at 1996}). The spectral
+spectral norm \citep[see, e.g.,][]{Golub+Loan at 1996}. The spectral
 norm of a matrix $X$ is the largest singular value of $X$ or the
-square root of the largest eigenvalue of $X^H X$,
+square root of the largest eigenvalue of $X^{*} X$,
 \[
-	\|X\|_2 = \sqrt{\lambda_{max}(X^H X)}\mbox{,}
+	\|X\|_2 = \sqrt{\lambda_{max}(X^{*} X)}\mbox{,}
 \]
-where $X^H$ is the conjugate transpose of $X$.
+where $X^{*}$ is the conjugate transpose of $X$.
 
-\paragraph{Avoiding local minima:} \citet{Cutler+Breiman at 1994} shows
-that the algorithm converged in all cases, but not necessarily to a
+\paragraph{Avoiding local minima:} \citet{Cutler+Breiman at 1994} show
+that the algorithm converges in all cases, but not necessarily to a
 global minimum. Hence, the algorithm should be started several times
 with different initial archetypes. It is important that these are not
 too close together, this can cause slow convergence or convergence to
@@ -276,47 +288,50 @@
 the following two sections.
 
 
-\section[Using archetypes]{Using \pkg{archetypes}\label{pkg}}
 
-The package is loaded within \proglang{R} using the
+\section[Using archetypes]{Using package \pkg{archetypes}\label{pkg}}
+
+The package is loaded into \proglang{R} using the
 \code{library()} or \code{require()} command:
 <<echo=FALSE>>=
 library(archetypes)
 @ 
 \begin{Schunk}
 \begin{Sinput}
-> library(archetypes)
+R> library("archetypes")
 \end{Sinput}
 \begin{Soutput}
 Loading required package: nnls
 \end{Soutput}
 \end{Schunk}
 It requires the packages \pkg{nnls} \citep{nnls} for solving the
-convex least square problems.
- 
+convex least squares problems.
+
 We use a simple artificial two-dimensional data set to explain the
 usage of the implementation, and the behavior of the archetypal
 analysis at all. The advantage is that we can imagine the results and
 simply visualize them, Section \ref{body} then shows a more realistic
 example. Note that in the following the plot commands do not produce
 exactly the shown graphics concerning primitives coloring, width,
-etc.; due to readability we have reduced the presented commands to the
-significant arguments.
+etc.; to increase readability we have reduced the presented commands
+to the significant arguments. E.g., if we use a different plotting
+symbol in a scatter plot the corresponding (\code{pch = \ldots}) will be
+omitted.
 <<eval=FALSE>>=
-data(toy)
+data("toy")
 plot(toy)
 @ 
-\begin{figure}[H]
+\begin{figure}[h!t]
 	\centering
 \setkeys{Gin}{width=3in}
 <<fig=TRUE,echo=FALSE,width=3,height=3>>=
-data(toy)
+data("toy")
 
-par(mar=c(4,4,0,0)+0.1, ps=9)
-plot(toy, xlab='x', ylab='y', xlim=c(0,20), ylim=c(0,20),
-     pch=19, col=gray(0.7), cex=0.6)
+par(mar = c(2,2,0,0) + 0.1, ps = 9)
+plot(toy, xlab = "", ylab = "", xlim = c(0,20), ylim = c(0,20),
+     pch = 19, col = gray(0.7), cex = 0.6)
 @ 
-	\caption{Two-dimensional artificial data set \code{toy}.}
+	\caption{Data set {\tt toy}.}
 	\label{fig:toy-data}
 \end{figure}
 Data set \code{toy} consists of the two attributes $x$ and $y$, and
@@ -330,12 +345,12 @@
 During the fit, the function reports its improvement and stops after a
 maximum number of iterations (default is \code{maxIterations = 100})
 or if the improvement is less than a defined value (default is
-\code{minImprovement = sqrt(.Machine$double.eps)}). As basis for our %$
+\verb|minImprovement = sqrt(.Machine$double.eps)|). As basis for our %$
 further research, the implementation is a flexible framework where the
 problem solving mechanisms of the individual steps can be
-interchanged. The default values are the ``original ones'' described
+exchanged. The default values are the ``original ones'' described
 in the previous section (\code{family = archetypesFamily()}). The
-result is a S3 \code{archetypes} object,
+result is an \proglang{S}3 \code{archetypes} object,
 <<>>=
 a
 @ 
@@ -348,40 +363,36 @@
 data sets; for higher-dimensional data sets parallel coordinates are
 used.
 <<eval=FALSE>>=
-plot(a, toy, chull=chull(toy))
-plot(a, toy, adata.show=TRUE)
+plot(a, toy, chull = chull(toy))
+plot(a, toy, adata.show = TRUE)
 @ 
-\begin{figure}[H]
+\begin{figure}[h!t]
 	\centering
 \setkeys{Gin}{width=6in}
 <<fig=TRUE,echo=FALSE,width=6,height=3>>=
-par(mfrow=c(1,2), mar=c(4,4,0,0)+0.1, ps=9)
-plot(a, toy, chull=chull(toy), 
-     xlab='x', ylab='y', xlim=c(0,20), ylim=c(0,20), cex=0.6)
-plot(a, toy, adata.show=TRUE, 
-     xlab='x', ylab='y', xlim=c(0,20), ylim=c(0,20), cex=0.6)
+par(mfrow = c(1,2), mar = c(2,2,0,0)+0.1, ps = 9)
+plot(a, toy, chull = chull(toy), 
+     xlab = "", ylab = "", xlim = c(0, 20), ylim = c(0, 20), cex = 0.6)
+plot(a, toy, adata.show = TRUE, 
+     xlab = "", ylab = "", xlim = c(0, 20), ylim = c(0, 20), cex = 0.6)
 @ 
-	\caption{Three archetypes for the \code{toy} data set. The
-	left plot shows the convex hull (black), the archetypes and
-	the approximated convex hull (red).  The right plot shows the
-	approximation of the data points through the archetypes and
-	corresponding $\alpha$ values (green symbols, and grey
-	connection lines).}
+	\caption{Visualization of three archetypes.}
 	\label{fig:toy-a}
 \end{figure}
-The left plot shows the archetypes, their approximation of the
-convex hull (red dots and lines) and the convex hull (black dots and
-lines) of the data. The right plot additionally shows the
-approximation of  the data through the archetypes and the
-corresponding $\alpha$ values (green symbols, and grey connection
-lines); as we can see, all data points outside the approximated convex
-hull are mapped on its boundary. This plot is based on an idea and
-Matlab source code of Bernard Pailthorpe \citep{pc:Pailthorpe}.
+The left plot of Figure~\ref{fig:toy-a} shows the archetypes, their
+approximation of the convex hull (red dots and lines) and the convex
+hull (black dots and lines) of the data. The right plot of
+Figure~\ref{fig:toy-a} additionally shows the approximation of the data
+through the archetypes and the corresponding $\alpha$ values (green
+symbols, and grey connection lines); as we can see, all data points
+outside the approximated convex hull are mapped on its boundary. This
+plot is based on an idea and \proglang{MATLAB} source code of
+\citet{pc:Pailthorpe}.
 
 With \code{saveHistory = TRUE} (which is set per default) each step of
 the execution is saved and we can examine the archetypes in each
 iteration using the \code{ahistory()} command; the initial archetypes,
-for example, are \code{ahistory(a, step=0)}. This can be used to
+for example, are \code{ahistory(a, step = 0)}. This can be used to
 create an ``evolution movie'' of the archetypes,
 <<eval=FALSE>>=
 movieplot(a, toy)
@@ -389,22 +400,25 @@
 \begin{figure}[H]
 	\centering
 \setkeys{Gin}{width=6in}
-<<fig=TRUE,echo=FALSE,width=6,height=5.5>>=
-par(mfrow=c(4,2), mar=c(4,4,0,0)+0.1, ps=9)
-movieplot(a, toy, xlim=c(0,20), ylim=c(0,20), cex=0.6)
+<<fig=TRUE,echo=FALSE,width=6,height=3>>=
+par(mfrow = c(2, 4), mar = c(0, 0, 0, 0) + 0.1, ps = 9)
+movieplot(a, toy, xlim = c(0, 20), ylim = c(0, 20), cex = 0.6, axes = FALSE, 
+          postfn = function(iter) {
+            box()
+            text(1, 19, paste(iter + 1, ".", sep = ""), cex = 1)
+          })
 @ 
-	\caption{The ``evolution movie'' of the three archetypes on
-	\code{toy}. Real animations are as Flash movies available from
-	\url{http://www.statistik.lmu.de/~eugster/archetypes/}.}
+	\caption{Visualization of the algorithm
+          iterations.}
 	\label{fig:toy-movieplot}
 \end{figure}
 The figure shows the plots of the eight steps (the random
 initialization and the seven iterations) from top to  bottom and left
 to right. In each step the three archetypes move further to the three
 corners of the data set. A movie of the approximated data is shown when
-setting parameter \code{show = 'adata'}\footnote{Real animations are
-  as Flash movies available from
-  \url{http://www.statistik.lmu.de/~eugster/archetypes/}.}.
+setting parameter \code{show = "adata"}\footnote{Real animations are
+  as Flash movies along with this paper and available from
+  \url{http://www.jstatsoft.org/v30/i08/}.}.
 
 In the previous section we mentioned that the algorithm should be
 started several times to avoid local minima. This is done using the
@@ -413,9 +427,9 @@
 \code{nrep} which specifies the number of repetitions.
 <<>>=
 set.seed(1986)
-a4 <- stepArchetypes(data=toy, k=3, verbose=FALSE, nrep=4)
+a4 <- stepArchetypes(data = toy, k = 3, verbose = FALSE, nrep = 4)
 @ 
-The result is a S3 \code{stepArchetypes} object,
+The result is an \proglang{S}3 \code{stepArchetypes} object,
 <<>>=
 a4
 @ 
@@ -435,11 +449,11 @@
 	\centering
 \setkeys{Gin}{width=3in}
 <<fig=TRUE,echo=FALSE,width=3,height=3>>=
-par(mar=c(4,4,0,0)+0.1, ps=9)
-plot(a4, toy, cex=0.6, xlim=c(0,20), ylim=c(0,20))
+par(mar = c(2, 2, 0, 0) + 0.1, ps = 9)
+plot(a4, toy, cex = 0.6, xlim = c(0, 20), ylim = c(0, 20),
+     xlab = "", ylab = "")
 @ 
-	\caption{The final three archetypes for four different starts on
-          data set \code{toy} with randomly chosen initial archetypes.}
+	\caption{Visualization of different repetitions.}
 	\label{fig:toy-a4}
 \end{figure}
 However, the model of repetition $3$ has the lowest residual sum of
@@ -448,43 +462,44 @@
 bestModel(a4)
 @ 
 <<echo=FALSE>>=
-print(bestModel(a4), full=FALSE)
+print(bestModel(a4), full = FALSE)
 @ 
 
 At the beginning of the example we decided by looking at the data
-that three archetypes may be a good choice. It is not given that this
-is the right choice, and with higher-dimensional data this is not
-possible at all. As already mentioned in the previous section, one
-simple way to choose the correct number of archetypes is to run the
-algorithm for different numbers of $k$ and use the ``elbow criterion''
-on the residual sum of squares. The \code{stepArchetypes()} function
-allows a vector as value of argument $k$ and executes for each $k_i$
-the \code{archetypes()} function $nrep$ times.
+that three archetypes may be a good choice. Visual inspection does not
+necessarly lead to the best choice, and is not an option for
+higher-dimensional data sets.
+As already mentioned in the previous section, one simple way
+to choose a good number of archetypes is to run the algorithm for
+different numbers of $k$ and use the ``elbow criterion'' on the
+residual sum of squares. The \code{stepArchetypes()} function allows a
+vector as value of argument $k$ and executes for each $k_i$ the
+\code{archetypes()} function $nrep$ times.
 <<echo=FALSE>>=
-file <- 'toy-as.RData'
+file <- "as.RData"
 if ( file.exists(file) ) {
-  load(file=file)
+  load(file = file)
 } else {
   set.seed(1986)
-  as <- stepArchetypes(data=toy, k=1:10, verbose=FALSE, nrep=4)
-  save(as, file=file)
+  as <- stepArchetypes(data = toy, k = 1:10, verbose = FALSE, nrep = 4)
+  save(as, file = file)
 }
 @ 
 \begin{Schunk}
 \begin{Sinput}
-> set.seed(1986)
-> as <- stepArchetypes(data=toy, k=1:10, verbose=FALSE, nrep=4)
+R> set.seed(1986)
+R> as <- stepArchetypes(data = toy, k = 1:10, verbose = FALSE, nrep = 4)
 \end{Sinput}
 \begin{Soutput}
 There were 23 warnings (use warnings() to see)
 \end{Soutput} 
 \end{Schunk}
 The occurred warnings indicate that errors occured during the
-execution, in this case, singular matrizes in solving the linear
+execution, in this case, singular matrices in solving the linear
 equation system in Step \ref{alg:loop-zt} as from $k = 4$:
 \begin{Schunk}
 \begin{Sinput}
-> warnings()
+R> warnings()
 \end{Sinput}
 \begin{Soutput}
 Warnings:
@@ -505,10 +520,14 @@
 <<>>=
 iters(as)
 @ 
-which is an indication for an afflicted random initialisation. But up
-to $k = 5$ there is always at least one start with a meaningful result
-and the residual sum of squares curve of the best models shows that by
-the ``elbow criterion'' three or maybe seven is the best number of
+which is an indication for an afflicted random initialisation. If
+warnings occur within repetitions for $k_i$ archetypes, the pretended
+meaningful solutions (according to the $RSS$) have to be examined
+carefully; Section~\ref{ginv} illustrates a pretended meaningful
+solution where the plot then uncovers problems. But up to $k = 5$
+there is always at least one start with a meaningful result and the
+residual sum of squares curve of the best models shows that by the
+``elbow criterion'' three or maybe seven is the best number of
 archetypes:
 <<eval=FALSE>>=
 screeplot(as)
@@ -517,11 +536,13 @@
 	\centering
 \setkeys{Gin}{width=4in}
 <<fig=TRUE,echo=FALSE,width=4,height=3>>=
-par(mar=c(4,4,0.1,0)+0.1, ps=9)
-screeplot(as, cex=0.6, ylim=c(0, 0.08))
+par(mar = c(3, 4, 0.1, 0) + 0.1, ps = 9)
+screeplot(as, cex = 0.6, ylim = c(0, 0.08), axes = FALSE)
+mtext("Archetypes", side = 1, line = 2)
+axis(2, las = 2)
+box()
 @ 
-	\caption{Residual sum of squares curve of the best models of
-          four repetitions for $p = 1, \ldots, 7$ archetypes.}
+	\caption{Screeplot of the residual sum of squares.}
 	\label{fig:toy-rssplot}
 \end{figure}
 We already have seen the three archetypes in detail; the seven
@@ -529,8 +550,8 @@
 are:
 <<eval=FALSE>>=
 a7 <- bestModel(as[[7]])
-plot(a7, toy, chull=chull(toy))
-plot(a7, toy, adata.show=TRUE)
+plot(a7, toy, chull = chull(toy))
+plot(a7, toy, adata.show = TRUE)
 @ 
 \begin{figure}[H]
 	\centering
@@ -538,41 +559,41 @@
 <<fig=TRUE,echo=FALSE,width=6,height=3>>=
 a7 <- bestModel(as[[7]])
 
-par(mfrow=c(1,2), mar=c(4,4,0,0)+0.1, ps=9)
-plot(a7, toy, chull=chull(toy),
-     xlim=c(0,20), ylim=c(0,20), cex=0.6)
-plot(a7, toy, adata.show=TRUE,
-     xlim=c(0,20), ylim=c(0,20), cex=0.6)
+par(mfrow = c(1, 2), mar = c(2, 2, 0, 0) + 0.1, ps = 9)
+plot(a7, toy, chull = chull(toy),
+     xlim = c(0, 20), ylim = c(0, 20), cex = 0.6, xlab = "", ylab = "")
+plot(a7, toy, adata.show = TRUE,
+     xlim = c(0, 20), ylim = c(0, 20), cex = 0.6, xlab = "", ylab = "")
 @
-        \caption{Seven archetypes for the \code{toy} data set. The
-	left plot shows the convex hull (black), the archetypes and
-	the approximated convex hull (red).  The right plot shows the
-	approximation of the data points through the archetypes and
-	corresponding $\alpha$ values (green symbols, and grey
-	connection lines).}
+        \caption{Visualization of seven archetypes.}
         \label{fig:toy-a7}
 \end{figure}
 The approximation of the convex hull is now clearly visible.
 
+
+\subsection[Alternative numerical methods]{Alternative numerical
+methods\label{ginv}}
+
 As we mentioned in Section \ref{algorithm}, there are many ways to
 solve linear equation systems. One other possibility is the
 Moore-Penrose pseudoinverse:
 <<echo=FALSE>>=
-file <- 'toy-gas.RData'
+file <- "gas.RData"
 if ( file.exists(file) ) {
-  load(file=file)
+  load(file = file)
 } else {
   set.seed(1986)
-  gas <- stepArchetypes(data=toy, k=1:10, family=archetypesFamily('ginv'),
-                        verbose=FALSE, nrep=4)
-  save(gas, file=file)
+  gas <- stepArchetypes(data = toy, k = 1:10, family = archetypesFamily("ginv"),
+                        verbose = FALSE, nrep = 4)
+  save(gas, file = file)
 }
 @ 
 \begin{Schunk}
 \begin{Sinput}
-> set.seed(1986)
-> gas <- stepArchetypes(data=toy, k=1:10, family=archetypesFamily('ginv'),
-+                       verbose=FALSE, nrep=4)
+R> set.seed(1986)
+R> gas <- stepArchetypes(data = toy, k = 1:10, 
++                        family = archetypesFamily("ginv"),
++                        verbose = FALSE, nrep = 4)
 \end{Sinput}
 \begin{Soutput}
 Loading required package: MASS
@@ -581,13 +602,13 @@
 \end{Schunk}
 We use the \code{ginv()} function from the \pkg{MASS} package to
 calculate the pseudoinverse. The function ignores ill-conditioned
-matrizes and ``just solves the linear equation system'', but the
+matrices and ``just solves the linear equation system'', but the
 \code{archetypes} function throws warnings of ill-conditioned matrices
 if the matrix condition number $\kappa$ is bigger than an upper bound
 (default is \code{maxKappa = 1000}):
 \begin{Schunk}
 \begin{Sinput}
-> warnings()
+R> warnings()
 \end{Sinput}
 \begin{Soutput}
 Warnings:
@@ -614,16 +635,21 @@
 \begin{figure}[H]
 	\centering
 \setkeys{Gin}{width=6in}
-<<fig=TRUE,echo=FALSE,width=6,height=2>>=
-par(mfrow=c(1,4), mar=c(4,4,0,0)+0.1, ps=9)
-movieplot(gas[[9]][[3]], toy, xlim=c(0,20), ylim=c(0,20), cex=0.6)
+<<fig=TRUE,echo=FALSE,width=6,height=1.5>>=
+par(mfrow = c(1, 4), mar = c(0, 0, 0, 0) + 0.1, ps = 9)
+movieplot(gas[[9]][[3]], toy, xlim = c(0, 20), ylim = c(0, 20), cex = 0.6,
+          axes = FALSE, postfn = function(iter) {
+            box()
+            text(1, 19, paste(iter + 1, ".", sep = ""), cex = 1)
+          })
 @ 
-	\caption{gas movieplot}
+	\caption{Visualization of algorithm iterations until
+          ``collapsing''.}
 	\label{fig:toy-movieplot-gas}
 \end{figure}
-The figure shows the four steps (from top to bottom and left to
-right), the random initialization and the three iterations until all
-archetypes are in the center of the data.
+The figure shows the four steps (from left to right), the random
+initialization and the three iterations until all archetypes are in
+the center of the data.
 
 All other residual sum of squares are nearly equivalent to the ones
 calculated with $QR$ decomposition. Further investigations would show
@@ -635,8 +661,8 @@
 data:
 <<eval=FALSE>>=
 ga7 <- bestModel(gas[[7]])
-plot(ga7, toy, chull=chull(toy))
-plot(ga7, toy, adata.show=TRUE)
+plot(ga7, toy, chull = chull(toy))
+plot(ga7, toy, adata.show = TRUE)
 @ 
 \begin{figure}[H]
 	\centering
@@ -644,18 +670,13 @@
[TRUNCATED]

To get the complete diff run:
    svnlook diff /svnroot/archetypes -r 17