我想将虹膜数据集分为两个子集:一个用于执行PCA的子集(训练数据,包含物种“ versicolor”和“ setosa”),一个子集包含验证数据(包含物种“ virginica”) ”)。 我想使用“ factoextra”软件包制作训练数据的PCA图,并向其中添加预测子集(验证数据)的点。
这是我到目前为止所拥有的:
library(dplyr)
library(tibble)
library(stringr)
library(factoextra)
scale_this = function(col){
my_range = range(col)
col = col - my_range[1]
col = col / (my_range[2] - my_range[1])
return(col)
}
my_data = iris
# Create annotation -----------------------------------------------------------
row_annotation = dplyr::select(my_data, Species)
row_annotation$Species = factor(row_annotation$Species)
# Create unique rownames + Scaling --------------------------------------------
my_data$unique_case_name = NA
for(i in 1:nrow(my_data)){
my_data[i, "unique_case_name"] = paste0(i, "_", my_data[i, "Species"])
}
my_data = my_data %>%
dplyr::select(-Species) %>%
# scaling
dplyr::mutate_at(vars(-unique_case_name), scale_this) %>%
tibble::column_to_rownames(var = "unique_case_name") %>%
as.matrix()
# Separating the data ---------------------------------------------------------
# training: setosa, versicolor
training_indices = str_detect(string = rownames(my_data), pattern = "virginica", negate = T)
validation_indices = str_detect(string = rownames(my_data), pattern = "virginica", negate = F)
# Performing the PCA ----------------------------------------------------------
my_pca_object = prcomp(my_data[training_indices,],
scale. = F,
center = T,
retx = T)
# Predicting the validation data coordinates ----------------------------------
predicted_coordinates = stats::predict(my_pca_object, newdata = my_data[validation_indices,])
# Plot the PCA ----------------------------------------------------------------
# Plot the training data
my_pca_plot <- fviz_pca_ind(X = my_pca_object,
axes = c(1, 2),
geom.ind = c("point"),
addEllipses = T
) +
labs(title = element_blank())
# Add validation data
my_pca_plot_2 <- fviz_add(my_pca_plot, predicted_coordinates,
color ="blue",
geom = c("point"),
addlabel = F,
shape = 8,
pointsize = 2)
my_pca_plot_2
我的问题: 1.我想在图中添加可变箭头:
我该如何解决这些问题?