#' (Dis)similarity based brand maps (MDS) #' #' @details See \url{https://radiant-rstats.github.io/docs/multivariate/mds.html} for an example in Radiant #' #' @param dataset Dataset #' @param id1 A character variable or factor with unique entries #' @param id2 A character variable or factor with unique entries #' @param dis A numeric measure of brand dissimilarity #' @param method Apply metric or non-metric MDS #' @param nr_dim Number of dimensions #' @param seed Random seed #' @param data_filter Expression entered in, e.g., Data > View to filter the dataset in Radiant. The expression should be a string (e.g., "price > 10000") #' @param envir Environment to extract data from #' #' @return A list of all variables defined in the function as an object of class mds #' #' @examples #' mds(city, "from", "to", "distance") %>% str() #' mds(diamonds, "clarity", "cut", "price") %>% str() #' #' @seealso \code{\link{summary.mds}} to summarize results #' @seealso \code{\link{plot.mds}} to plot results #' #' @importFrom MASS isoMDS #' #' @export mds <- function(dataset, id1, id2, dis, method = "metric", nr_dim = 2, seed = 1234, data_filter = "", envir = parent.frame()) { nr_dim <- as.numeric(nr_dim) df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset)) dataset <- get_data(dataset, c(id1, id2, dis), filt = data_filter, envir = envir, na.rm = FALSE) init_row <- nrow(dataset) dataset <- na.omit(dataset) nr_row <- nrow(dataset) nr_na <- init_row - nr_row if (nr_na > 0) { return(paste0("The map cannot be created because the provided data contains ", nr_na, " rows with\nmissing data. Please choose other ID variables or another dataset.\n\nFor an example dataset go to Data > Manage, select 'examples' from the\n'Load data of type' dropdown, and press the 'Load examples' button. Then\nselect the \'city' dataset.") %>% add_class("mds")) } d <- dataset[[dis]] id1_dat <- as.character(dataset[[id1]]) id2_dat <- as.character(dataset[[id2]]) rm(dataset, envir) ## ids lab <- unique(c(id1_dat, id2_dat)) nrLev <- length(lab) lower_nr <- (nrLev * (nrLev - 1)) / 2 nrObs <- length(d) mds_dis_mat <- matrix(0, nrow = nrLev, ncol = nrLev) %>% set_rownames(lab) %>% set_colnames(lab) if (lower_nr != nrObs && (lower_nr + nrLev) != nrObs) { return("Number of observations and unique IDs for the brand variable do not match.\nPlease choose other ID variables or another dataset.\n\nFor an example dataset go to Data > Manage, select 'examples' from the\n'Load data of type' dropdown, and press the 'Load examples' button. Then\nselect the \'city' dataset." %>% add_class("mds")) } else { for (i in seq_len(nr_row)) { id1 <- id1_dat[i] id2 <- id2_dat[i] mds_dis_mat[id1, id2] <- d[i] mds_dis_mat[id2, id1] <- d[i] } mds_dis_mat <- as.dist(mds_dis_mat) } ## Alternative method, metaMDS - requires vegan # res <- suppressWarnings(metaMDS(mds_dis_mat, k = nr_dim, trymax = 500)) # if (res$converged == FALSE) return("The MDS algorithm did not converge. Please try again.") seed %>% gsub("[^0-9]", "", .) %>% (function(x) if (!is.empty(x)) set.seed(seed)) res <- MASS::isoMDS(mds_dis_mat, k = nr_dim, trace = FALSE) res$stress <- res$stress / 100 if (method == "metric") { res$points <- cmdscale(mds_dis_mat, k = nr_dim) ## Using R^2 # res$stress <- sqrt(1 - cor(dist(res$points),mds_dis_mat)^2) * 100 # Using standard Kruskal formula for metric MDS res$stress <- sqrt((sum((dist(res$points) - mds_dis_mat)^2) / sum(mds_dis_mat^2))) } as.list(environment()) %>% add_class("mds") } #' Summary method for the mds function #' #' @details See \url{https://radiant-rstats.github.io/docs/multivariate/mds.html} for an example in Radiant #' #' @param object Return value from \code{\link{mds}} #' @param dec Rounding to use for output (default = 2). +1 used for stress measure #' @param ... further arguments passed to or from other methods #' #' @examples #' result <- mds(city, "from", "to", "distance") #' summary(result, dec = 1) #' #' @seealso \code{\link{mds}} to calculate results #' @seealso \code{\link{plot.mds}} to plot results #' #' @export summary.mds <- function(object, dec = 2, ...) { if (is.character(object)) { return(cat(object)) } cat("(Dis)similarity based brand map (MDS)\n") cat("Data :", object$df_name, "\n") if (!is.empty(object$data_filter)) { cat("Filter :", gsub("\\n", "", object$data_filter), "\n") } cat("Variables :", paste0(c(object$id1, object$id2, object$dis), collapse = ", "), "\n") cat("Dimensions :", object$nr_dim, "\n") meth <- if (object$method == "non-metric") "Non-metric" else "Metric" cat("Method :", meth, "\n") cat("Observations:", object$nrObs, "\n") cat("\nOriginal distance data:\n") object$mds_dis_mat %>% round(dec) %>% print() cat("\nRecovered distance data:\n") object$res$points %>% dist() %>% round(dec) %>% print() cat("\nCoordinates:\n") object$res$points %>% round(dec) %>% set_colnames({ paste("Dimension ", 1:ncol(.)) }) %>% print() cat("\nStress:", round(object$res$stress, dec + 1)) } #' Plot method for the mds function #' #' @details See \url{https://radiant-rstats.github.io/docs/multivariate/mds.html} for an example in Radiant #' #' @param x Return value from \code{\link{mds}} #' @param rev_dim Flip the axes in plots #' @param fontsz Font size to use in plots #' @param shiny Did the function call originate inside a shiny app #' @param custom Logical (TRUE, FALSE) to indicate if ggplot object (or list of ggplot objects) should be returned. This option can be used to customize plots (e.g., add a title, change x and y labels, etc.). See examples and \url{https://ggplot2.tidyverse.org/} for options. #' @param ... further arguments passed to or from other methods #' #' @examples #' result <- mds(city, "from", "to", "distance") #' plot(result, fontsz = 7) #' plot(result, rev_dim = 1:2) #' #' @seealso \code{\link{mds}} to calculate results #' @seealso \code{\link{summary.mds}} to plot results #' #' @importFrom ggrepel geom_text_repel #' @importFrom rlang .data #' #' @export plot.mds <- function(x, rev_dim = NULL, fontsz = 5, shiny = FALSE, custom = FALSE, ...) { if (is.character(x)) { return(cat(x)) } ## set extremes for plot lim <- max(abs(x$res$points)) ## set seed for ggrepel label positioning set.seed(x$seed) tbl <- as.data.frame(x$res$points) %>% set_colnames(paste0("dim", seq_len(ncol(.)))) tbl$rnames <- rownames(tbl) ## reverse selected dimensions if (!is.empty(rev_dim)) { rev_dim <- as.integer(rev_dim) tbl[, rev_dim] <- -1 * tbl[, rev_dim] } plot_list <- list() for (i in 1:(x$nr_dim - 1)) { for (j in (i + 1):x$nr_dim) { i_name <- paste0("dim", i) j_name <- paste0("dim", j) plot_list[[paste0("dim", i, "_dim", j)]] <- ggplot(tbl, aes(x = .data[[i_name]], y = .data[[j_name]], color = .data$rnames, label = .data$rnames)) + geom_point() + ggrepel::geom_text_repel(size = fontsz) + theme(legend.position = "none") + coord_cartesian(xlim = c(-lim, lim), ylim = c(-lim, lim)) + geom_vline(xintercept = 0, linewidth = 0.3) + geom_hline(yintercept = 0, linewidth = 0.3) + labs( x = paste("Dimension", i), y = paste("Dimension", j) ) } } if (length(plot_list) > 0) { if (custom) { if (length(plot_list) == 1) plot_list[[1]] else plot_list } else { patchwork::wrap_plots(plot_list, ncol = 1) %>% (function(x) if (isTRUE(shiny)) x else print(x)) } } }