Skip to contents

The goal of sparseCorrespondenceAnalysis is to illustrate Correspondence Analysis and its sparsification to a data-set of the cause of deaths in the United States in 2019.

Installation

You can install the development version of sparseCorrespondenceAnalysis from GitHub with:

devtools::install_github("vguillemot/sparseCorrespondenceAnalysis")

Running Sparse Correspondence Analysis

First load the package and the “Cause of death” data set.

library(sparseCorrespondenceAnalysis)
#> Le chargement a nécessité le package : PMA
#> Le chargement a nécessité le package : ggplot2
#> Le chargement a nécessité le package : ggrepel
data("death.2019")

Then apply the sCAwithPMD to the data:

sca.res <- sCAwithPMD(
  DATA = death.2019, # Contingency table
  dimensions = 2L, # the number of dimensions
  doublecentering = TRUE, # center the data
  s1 = rep(0.5 * sqrt(nrow(death.2019)), 2), # Asking for a medium amount of sparsity
  s2 = rep(0.5 * sqrt(ncol(death.2019)), 2)
)
sca.fi.map.12 <- createFactorMap(X = sca.res$fi,
                       col.background = NULL,
                       col.axes = "#42376B", 
                       width.axes = 0.5,
                       title = "SCA: row factor scores",
                       alpha.axes = 0.5,
                       alpha.points = 0.5,
                       pch = 16,
                       axis1 = 1,
                       axis2 = 2,
                       constraints = NULL, text.cex = 4)

sca.fi.plot.12 <- sca.fi.map.12$zeMap_background + sca.fi.map.12$zeMap_dots + sca.fi.map.12$zeMap_text + geom_path(color = "darkorchid4") + theme(axis.title = element_text(color = "#42376B"), axis.text = element_text(color = "#42376B"), title = element_text(color = "#42376B"), panel.border = element_rect(size = 1.5, color = "#42376B", fill = NA)) + labs(x = "Dimension 1", y = "Dimension 2")

sca.fi.plot.12

sca.fj.map.12 <- createFactorMap(X = sca.res$fj,
                       col.background = NULL,
                       col.axes = "#42376B", 
                       width.axes = 0.5,
                       title = "SCA: row factor scores",
                       alpha.axes = 0.5,
                       alpha.points = 0.5,
                       pch = 16,
                       axis1 = 1,
                       axis2 = 2,
                       constraints = NULL, 
                       text.cex = 4)

sca.fj.plot.12 <- sca.fj.map.12$zeMap_background + sca.fj.map.12$zeMap_dots + sca.fj.map.12$zeMap_text + theme(axis.title = element_text(color = "#42376B"), axis.text = element_text(color = "#42376B"), title = element_text(color = "#42376B"), panel.border = element_rect(size = 1.5, color = "#42376B", fill = NA)) + labs(x = "Dimension 1", y = "Dimension 2")

sca.fj.plot.12
#> Warning: ggrepel: 8 unlabeled data points (too many overlaps). Consider
#> increasing max.overlaps