high-dimensional data (focus on dynamical views)

29 Oct 2021

packages

Some of these packages may be hard to install, i.e. depending on system resources (especially rggobi).

library(tourr)
library(rggobi)
library(factoextra)
library(tidyverse)
library(ggalt)
library(rgl)

(ggobi has been archived: use remotes::install_version("rggobi", "2.1.22") or remotes::install_github("ggobi/rggobi"); see the github repository https://github.com/ggobi/rggobi and the ggobi web site http://ggobi.org/)

Buja, Cook, and Swayne (1996)

tasks

focusing: animation/motion

interactive example

ggobi(laser)

tours

three components:

optional extras (for “guided tour”):

Goal: allow automated, systematic search through entire space of projections


f <- flea[, 1:6]
animate(f,
        ## tour_path = guided_tour(holes()),
        tour_path = grand_tour(),
        display = display_xy(col = flea$species))

available display methods

apropos("animate_")

##  [1] "animate_andrews"   "animate_density2d" "animate_depth"    
##  [4] "animate_dist"      "animate_faces"     "animate_groupxy"  
##  [7] "animate_image"     "animate_pca"       "animate_pcp"      
## [10] "animate_sage"      "animate_scatmat"   "animate_slice"    
## [13] "animate_stars"     "animate_stereo"    "animate_trails"   
## [16] "animate_xy"

apropos("_tour$")

##  [1] "dependence_tour"     "frozen_guided_tour"  "frozen_tour"        
##  [4] "grand_tour"          "guided_section_tour" "guided_tour"        
##  [7] "little_tour"         "local_tour"          "new_tour"           
## [10] "planned_tour"

PCA adjustment

Examples/data from Holmes and Huber (2019)

wine PCA example

load("../data/wine.RData")      ## numeric matrix (wine)
load("../data/wineClass.RData") ## metadata (wine.class)

winePCAd <- prcomp(wine, scale = TRUE) ## compute PCA
f0 <- fviz_pca_biplot(winePCAd, geom = "point", habillage = wine.class,
                      col.var = "violet", addEllipses = TRUE, ellipse.level = 0.69)
tmp_plot <- function(p) { ## tweaks
  p + ggtitle("") + coord_fixed() +
    scale_colour_brewer(palette="Dark2") +
    scale_fill_brewer(palette="Dark2")
}

plot

print(tmp_plot(f0))

get augmented data

aug_data <- (get_pca_ind(winePCAd)$coord
  %>% as_tibble()
  %>% mutate(class = wine.class, .before=1)
)

improved plot

f1 <- fviz_pca_biplot(winePCAd, geom = "point", habillage = wine.class,
   col.var = "violet")
f2 <- tmp_plot(f1) + ggalt::geom_encircle(data = aug_data,
                                aes(x=Dim.1,y=Dim.2,group=class,
                                    colour=class,
                                    fill=class),alpha=0.2,
                                show.legend=FALSE)

print(f2)

adjust_pcaplot <- function(p, pca, dims=1:2) {
  sdvec <- pca$sdev[dims]
  p + coord_fixed(ratio = sdvec[2]/sdvec[1])
}
f3 <- adjust_pcaplot(f2, winePCAd)

## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

print(f3)

cvec <- RColorBrewer::brewer.pal("Dark2", n=3)
with(aug_data,
     plot3d(Dim.1, Dim.2, Dim.3, col=cvec[wine.class], size=4))
with(aug_data,
     plot3d(Dim.1, Dim.2, Dim.3, col=cvec[wine.class], size=2, type = "s"))

references

Buja, Andreas, Dianne Cook, and Deborah F. Swayne. 1996. “Interactive High-Dimensional Data Visualization.” Journal of Computational and Graphical Statistics 5 (1): 78–99. https://doi.org/10.2307/1390754.

Holmes, Susan, and Wolfgang Huber. 2019. Modern Statistics for Modern Biology. 1 edition. Cambridge: Cambridge University Press.