# Low-Rank Canonical Correlation Analysis (LR-CCA)

#### 2020-06-25

require(lolR)
require(ggplot2)
require(MASS)
n=400
d=30
r=3

Data for this notebook will be n=400 examples of d=30 dimensions.

# LR-CCA

## Stacked Cigar Simulation

We first visualize the first 2 dimensions:

testdat <- lol.sims.cigar(n, d)
X <- testdat$X Y <- testdat$Y

data <- data.frame(x1=X[,1], x2=X[,2], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
geom_point() +
xlab("x1") +
ylab("x2") +
ggtitle("Simulated Data")

Projecting with LR-CCA to 3 dimensions and visualizing the first 2:

result <- lol.project.lrcca(X, Y, r)

data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
geom_point() +
xlab("x1") +
ylab("x2") +
ggtitle("Projected Data using LR-CCA")

Projecting with LDA to K-1=1 dimensions:

liney <- MASS::lda(result$Xr, Y) result <- predict(liney, result$Xr)
lhat <- 1 - sum(result$class == Y)/length(Y) data <- data.frame(x1=result$x[,1], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, fill=y)) +
geom_density(adjust=1.5, alpha=0.6) +
xlab("x1") +
ylab("Density") +
ggtitle(sprintf("LR-CCA - LDA, L = %.2f", lhat))

## Trunk Simulation

We visualize the first 2 dimensions:

testdat <- lol.sims.rtrunk(n, d)
X <- testdat$X Y <- testdat$Y

data <- data.frame(x1=X[,1], x2=X[,2], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
geom_point() +
xlab("x1") +
ylab("x2") +
ggtitle("Simulated Data")

Projecting with LR-CCA to 3 dimensions and visualizing the first 2:

result <- lol.project.lrcca(X, Y, r)

data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
geom_point() +
xlab("x1") +
ylab("x2") +
ggtitle("Projected Data using LR-CCA")

Projecting with LDA to K-1=1 dimensions:

liney <- MASS::lda(result$Xr, Y) result <- predict(liney, result$Xr)
lhat <- 1 - sum(result$class == Y)/length(Y) data <- data.frame(x1=result$x[,1], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, fill=y)) +
geom_density(adjust=1.5, alpha=0.6) +
xlab("x1") +
ylab("Density") +
ggtitle(sprintf("LRCCA-LDA, L = %.2f", lhat))

## Rotated Trunk Simulation

We visualize the first 2 dimensions:

testdat <- lol.sims.rtrunk(n, d, rotate=TRUE)
X <- testdat$X Y <- testdat$Y

data <- data.frame(x1=X[,1], x2=X[,2], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
geom_point() +
xlab("x1") +
ylab("x2") +
ggtitle("Simulated Data")

Projecting with LRCCA to 3 dimensions and visualizing the first 2:

result <- lol.project.lrcca(X, Y, r)

data <- data.frame(x1=result$Xr[,1], x2=result$Xr[,2], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, y=x2, color=y)) +
geom_point() +
xlab("x1") +
ylab("x2") +
ggtitle("Projected Data using LR-CCA")

Projecting with LDA to K-1=1 dimensions:

liney <- MASS::lda(result$Xr, Y) result <- predict(liney, result$Xr)
lhat <- 1 - sum(result$class == Y)/length(Y) data <- data.frame(x1=result$x[,1], y=Y)
data$y <- factor(data$y)
ggplot(data, aes(x=x1, fill=y)) +
geom_density(adjust=1.5, alpha=0.6) +
xlab("x1") +
ylab("Density") +
ggtitle(sprintf("LRCCA-LDA, L = %.2f", lhat))