#' Create a pivot table #' #' @details Create a pivot-table. See \url{https://radiant-rstats.github.io/docs/data/pivotr.html} for an example in Radiant #' #' @param dataset Dataset to tabulate #' @param cvars Categorical variables #' @param nvar Numerical variable #' @param fun Function to apply to numerical variable #' @param normalize Normalize the table by row total, column totals, or overall total #' @param tabfilt Expression used to filter the table (e.g., "Total > 10000") #' @param tabsort Expression used to sort the table (e.g., "desc(Total)") #' @param tabslice Expression used to filter table (e.g., "1:5") #' @param nr Number of rows to display #' @param data_filter Expression used to filter the dataset before creating the table (e.g., "price > 10000") #' @param arr Expression to arrange (sort) the data on (e.g., "color, desc(price)") #' @param rows Rows to select from the specified dataset #' @param envir Environment to extract data from #' #' @examples #' pivotr(diamonds, cvars = "cut") %>% str() #' pivotr(diamonds, cvars = "cut")$tab #' pivotr(diamonds, cvars = c("cut", "clarity", "color"))$tab #' pivotr(diamonds, cvars = "cut:clarity", nvar = "price")$tab #' pivotr(diamonds, cvars = "cut", nvar = "price")$tab #' pivotr(diamonds, cvars = "cut", normalize = "total")$tab #' #' @export pivotr <- function(dataset, cvars = "", nvar = "None", fun = "mean", normalize = "None", tabfilt = "", tabsort = "", tabslice = "", nr = Inf, data_filter = "", arr = "", rows = NULL, envir = parent.frame()) { vars <- if (nvar == "None") cvars else c(cvars, nvar) fill <- if (nvar == "None") 0L else NA df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset)) dataset <- get_data(dataset, vars, filt = data_filter, arr = arr, rows = rows, na.rm = FALSE, envir = envir) ## in case : was used for cvars cvars <- base::setdiff(colnames(dataset), nvar) if (nvar == "None") { nvar <- "n_obs" } else { fixer <- function(x, fun = as_integer) { if (is.character(x) || is.Date(x)) { x <- rep(NA, length(x)) } else if (is.factor(x)) { x_num <- sshhr(as.integer(as.character(x))) if (length(na.omit(x_num)) == 0) { x <- fun(x) } else { x <- x_num } } x } fixer_first <- function(x) { x <- fixer(x, function(x) as_integer(x == levels(x)[1])) } if (fun %in% c("mean", "sum", "sd", "var", "sd", "se", "me", "cv", "prop", "varprop", "sdprop", "seprop", "meprop", "varpop", "sepop")) { dataset[[nvar]] <- fixer_first(dataset[[nvar]]) } else if (fun %in% c("median", "min", "max", "p01", "p025", "p05", "p10", "p25", "p50", "p75", "p90", "p95", "p975", "p99", "skew", "kurtosi")) { dataset[[nvar]] <- fixer(dataset[[nvar]]) } rm(fixer, fixer_first) if ("logical" %in% class(dataset[[nvar]])) { dataset[[nvar]] %<>% as.integer() } } ## convert categorical variables to factors and deal with empty/missing values dataset <- mutate_at(dataset, .vars = cvars, .funs = empty_level) sel <- function(x, nvar, cvar = c()) { if (nvar == "n_obs") x else select_at(x, .vars = c(nvar, cvar)) } sfun <- function(x, nvar, cvars = "", fun = fun) { if (nvar == "n_obs") { if (is.empty(cvars)) { count(x) %>% dplyr::rename("n_obs" = "n") } else { count(select_at(x, .vars = cvars)) %>% dplyr::rename("n_obs" = "n") } } else { dataset <- mutate_at(x, .vars = nvar, .funs = as.numeric) %>% summarise_at(.vars = nvar, .funs = fun, na.rm = TRUE) colnames(dataset)[ncol(dataset)] <- nvar dataset } } ## main tab tab <- dataset %>% group_by_at(.vars = cvars) %>% sfun(nvar, cvars, fun) ## total total <- dataset %>% sel(nvar) %>% sfun(nvar, fun = fun) ## row and column totals if (length(cvars) == 1) { tab <- bind_rows( mutate_at(ungroup(tab), .vars = cvars, .funs = as.character), bind_cols( data.frame("Total", stringsAsFactors = FALSE) %>% setNames(cvars), total %>% set_colnames(nvar) ) ) } else { col_total <- group_by_at(dataset, .vars = cvars[1]) %>% sel(nvar, cvars[1]) %>% sfun(nvar, cvars[1], fun) %>% ungroup() %>% mutate_at(.vars = cvars[1], .funs = as.character) row_total <- group_by_at(dataset, .vars = cvars[-1]) %>% sfun(nvar, cvars[-1], fun) %>% ungroup() %>% select(ncol(.)) %>% bind_rows(total) %>% set_colnames("Total") ## creating cross tab tab <- spread(tab, !!cvars[1], !!nvar, fill = fill) %>% ungroup() %>% mutate_at(.vars = cvars[-1], .funs = as.character) tab <- bind_rows( tab, bind_cols( t(rep("Total", length(cvars[-1]))) %>% as.data.frame(stringsAsFactors = FALSE) %>% setNames(cvars[-1]), data.frame(t(col_total[[2]]), stringsAsFactors = FALSE) %>% set_colnames(col_total[[1]]) ) ) %>% bind_cols(row_total) rm(col_total, row_total, vars) } ## resetting factor levels ind <- ifelse(length(cvars) > 1, -1, 1) levs <- lapply(select_at(dataset, .vars = cvars[ind]), levels) for (i in cvars[ind]) { tab[[i]] %<>% factor(levels = unique(c(levs[[i]], "Total"))) } ## frequency table for chi-square test tab_freq <- tab isNum <- if (length(cvars) == 1) -1 else -c(1:(length(cvars) - 1)) if (normalize == "total") { tab[, isNum] %<>% (function(x) x / total[[1]]) } else if (normalize == "row") { if (!is.null(tab[["Total"]])) { tab[, isNum] %<>% (function(x) x / x[["Total"]]) } } else if (length(cvars) > 1 && normalize == "column") { tab[, isNum] %<>% apply(2, function(.) . / .[which(tab[, 1] == "Total")]) } nrow_tab <- nrow(tab) - 1 ## ensure we don't have invalid column names ## but skip variable names already being used cn <- colnames(tab) cni <- cn %in% setdiff(cn, c(cvars, nvar)) colnames(tab)[cni] <- fix_names(cn[cni]) ## filtering the table if desired if (!is.empty(tabfilt)) { tab <- tab[-nrow(tab), ] %>% filter_data(tabfilt, drop = FALSE) %>% bind_rows(tab[nrow(tab), ]) %>% droplevels() } ## sorting the table if desired if (!is.empty(tabsort, "")) { tabsort <- gsub(",", ";", tabsort) tab[-nrow(tab), ] %<>% arrange(!!!rlang::parse_exprs(tabsort)) ## order factors as set in the sorted table tc <- if (length(cvars) == 1) cvars else cvars[-1] ## don't change top cv for (i in tc) { tab[[i]] %<>% factor(., levels = unique(.)) } } ## slicing the table if desired if (!is.empty(tabslice)) { tab <- tab %>% slice_data(tabslice) %>% bind_rows(tab[nrow(tab), , drop = FALSE]) %>% droplevels() } tab <- as.data.frame(tab, stringsAsFactors = FALSE) attr(tab, "radiant_nrow") <- nrow_tab if (!isTRUE(is.infinite(nr))) { ind <- if (nr >= nrow(tab)) 1:nrow(tab) else c(1:nr, nrow(tab)) tab <- tab[ind, , drop = FALSE] } rm(isNum, dataset, sfun, sel, i, levs, total, ind, nrow_tab, envir) as.list(environment()) %>% add_class("pivotr") } #' Summary method for pivotr #' #' @details See \url{https://radiant-rstats.github.io/docs/data/pivotr.html} for an example in Radiant #' #' @param object Return value from \code{\link{pivotr}} #' @param perc Display numbers as percentages (TRUE or FALSE) #' @param dec Number of decimals to show #' @param chi2 If TRUE calculate the chi-square statistic for the (pivot) table #' @param shiny Did the function call originate inside a shiny app #' @param ... further arguments passed to or from other methods #' #' @examples #' pivotr(diamonds, cvars = "cut") %>% summary(chi2 = TRUE) #' pivotr(diamonds, cvars = "cut", tabsort = "desc(n_obs)") %>% summary() #' pivotr(diamonds, cvars = "cut", tabfilt = "n_obs > 700") %>% summary() #' pivotr(diamonds, cvars = "cut:clarity", nvar = "price") %>% summary() #' #' @seealso \code{\link{pivotr}} to create the pivot-table using dplyr #' #' @export summary.pivotr <- function(object, perc = FALSE, dec = 3, chi2 = FALSE, shiny = FALSE, ...) { if (!shiny) { cat("Pivot table\n") cat("Data :", object$df_name, "\n") if (!is.empty(object$data_filter)) { cat("Filter :", gsub("\\n", "", object$data_filter), "\n") } if (!is.empty(object$arr)) { cat("Arrange :", gsub("\\n", "", object$arr), "\n") } if (!is.empty(object$rows)) { cat("Slice :", gsub("\\n", "", object$rows), "\n") } if (!is.empty(object$tabfilt)) { cat("Table filter:", object$tabfilt, "\n") } if (!is.empty(object$tabsort[1])) { cat("Table sorted:", paste0(object$tabsort, collapse = ", "), "\n") } if (!is.empty(object$tabslice)) { cat("Table slice :", object$tabslice, "\n") } nr <- attr(object$tab, "radiant_nrow") if (!isTRUE(is.infinite(nr)) && !isTRUE(is.infinite(object$nr)) && object$nr < nr) { cat(paste0("Rows shown : ", object$nr, " (out of ", nr, ")\n")) } cat("Categorical :", object$cvars, "\n") if (object$normalize != "None") { cat("Normalize by:", object$normalize, "\n") } if (object$nvar != "n_obs") { cat("Numeric :", object$nvar, "\n") cat("Function :", object$fun, "\n") } cat("\n") print(format_df(object$tab, dec, perc, mark = ","), row.names = FALSE) cat("\n") } if (chi2) { if (length(object$cvars) < 3) { cst <- object$tab_freq %>% filter(.[[1]] != "Total") %>% select(-which(names(.) %in% c(object$cvars, "Total"))) %>% mutate_all(~ ifelse(is.na(.), 0, .)) %>% { sshhr(chisq.test(., correct = FALSE)) } res <- tidy(cst) if (dec < 4 && res$p.value < .001) { p.value <- "< .001" } else { p.value <- format_nr(res$p.value, dec = dec) } res <- round_df(res, dec) l1 <- paste0("Chi-squared: ", res$statistic, " df(", res$parameter, "), p.value ", p.value, "\n") l2 <- paste0(sprintf("%.1f", 100 * (sum(cst$expected < 5) / length(cst$expected))), "% of cells have expected values below 5\n") if (nrow(object$tab_freq) == nrow(object$tab)) { if (shiny) HTML(paste0("

", l1, "
", l2)) else cat(paste0(l1, l2)) } else { note <- "\nNote: Test conducted on unfiltered table" if (shiny) HTML(paste0("

", l1, "
", l2, "

", note)) else cat(paste0(l1, l2, note)) } } else { cat("The number of categorical variables should be 1 or 2 for Chi-square") } } } #' Make an interactive pivot table #' #' @details See \url{https://radiant-rstats.github.io/docs/data/pivotr.html} for an example in Radiant #' #' @param object Return value from \code{\link{pivotr}} #' @param format Show Color bar ("color_bar"), Heat map ("heat"), or None ("none") #' @param perc Display numbers as percentages (TRUE or FALSE) #' @param dec Number of decimals to show #' @param searchCols Column search and filter #' @param order Column sorting #' @param pageLength Page length #' @param caption Table caption #' @param ... further arguments passed to or from other methods #' #' @examples #' \dontrun{ #' pivotr(diamonds, cvars = "cut") %>% dtab() #' pivotr(diamonds, cvars = c("cut", "clarity")) %>% dtab(format = "color_bar") #' pivotr(diamonds, cvars = c("cut", "clarity"), normalize = "total") %>% #' dtab(format = "color_bar", perc = TRUE) #' } #' #' @seealso \code{\link{pivotr}} to create the pivot table #' @seealso \code{\link{summary.pivotr}} to print the table #' #' @export dtab.pivotr <- function(object, format = "none", perc = FALSE, dec = 3, searchCols = NULL, order = NULL, pageLength = NULL, caption = NULL, ...) { style <- if (exists("bslib_current_version") && "4" %in% bslib_current_version()) "bootstrap4" else "bootstrap" tab <- object$tab cvar <- object$cvars[1] cvars <- object$cvars %>% (function(x) if (length(x) > 1) x[-1] else x) cn <- colnames(tab) %>% (function(x) x[-which(cvars %in% x)]) ## for rounding isDbl <- sapply(tab, is_double) isInt <- sapply(tab, is.integer) dec <- ifelse(is.empty(dec) || dec < 0, 3, round(dec, 0)) ## column names without total cn_nt <- if ("Total" %in% cn) cn[-which(cn == "Total")] else cn tot <- tail(tab, 1)[-(1:length(cvars))] %>% format_df(perc = perc, dec = dec, mark = ",") if (length(cvars) == 1 && cvar == cvars) { sketch <- shiny::withTags(table( thead(tr(lapply(c(cvars, cn), th))), tfoot(tr(lapply(c("Total", tot), th))) )) } else { sketch <- shiny::withTags(table( thead( tr(th(colspan = length(c(cvars, cn)), cvar, class = "dt-center")), tr(lapply(c(cvars, cn), th)) ), tfoot( tr(th(colspan = length(cvars), "Total"), lapply(tot, th)) ) )) } if (!is.empty(caption)) { ## from https://github.com/rstudio/DT/issues/630#issuecomment-461191378 caption <- shiny::tags$caption(style = "caption-side: bottom; text-align: left; font-size:100%;", caption) } ## remove row with column totals ## should perhaps be part of pivotr but convenient for now in tfoot ## and for external calls to pivotr tab <- filter(tab, tab[[1]] != "Total") ## for display options see https://datatables.net/reference/option/dom dom <- if (nrow(tab) < 11) "t" else "ltip" fbox <- if (nrow(tab) > 5e6) "none" else list(position = "top") dt_tab <- DT::datatable( tab, container = sketch, caption = caption, selection = "none", rownames = FALSE, filter = fbox, ## must use fillContainer = FALSE to address ## see https://github.com/rstudio/DT/issues/367 ## https://github.com/rstudio/DT/issues/379 fillContainer = FALSE, style = style, options = list( dom = dom, stateSave = TRUE, ## store state searchCols = searchCols, order = order, columnDefs = list(list(orderSequence = c("desc", "asc"), targets = "_all")), autoWidth = TRUE, processing = FALSE, pageLength = { if (is.null(pageLength)) 10 else pageLength }, lengthMenu = list(c(5, 10, 25, 50, -1), c("5", "10", "25", "50", "All")) ), ## https://github.com/rstudio/DT/issues/146#issuecomment-534319155 callback = DT::JS('$(window).on("unload", function() { table.state.clear(); })') ) %>% DT::formatStyle(., cvars, color = "white", backgroundColor = "grey") %>% (function(x) if ("Total" %in% cn) DT::formatStyle(x, "Total", fontWeight = "bold") else x) ## heat map with red or color_bar if (format == "color_bar") { dt_tab <- DT::formatStyle( dt_tab, cn_nt, background = DT::styleColorBar(range(tab[, cn_nt], na.rm = TRUE), "lightblue"), backgroundSize = "98% 88%", backgroundRepeat = "no-repeat", backgroundPosition = "center" ) } else if (format == "heat") { ## round seems to ensure that 'cuts' are ordered according to DT::stylInterval brks <- quantile(tab[, cn_nt], probs = seq(.05, .95, .05), na.rm = TRUE) %>% round(5) clrs <- seq(255, 40, length.out = length(brks) + 1) %>% round(0) %>% (function(x) paste0("rgb(255,", x, ",", x, ")")) dt_tab <- DT::formatStyle(dt_tab, cn_nt, backgroundColor = DT::styleInterval(brks, clrs)) } if (perc) { ## show percentages dt_tab <- DT::formatPercentage(dt_tab, cn, dec) } else { if (sum(isDbl) > 0) { dt_tab <- DT::formatRound(dt_tab, names(isDbl)[isDbl], dec) } if (sum(isInt) > 0) { dt_tab <- DT::formatRound(dt_tab, names(isInt)[isInt], 0) } } ## see https://github.com/yihui/knitr/issues/1198 dt_tab$dependencies <- c( list(rmarkdown::html_dependency_bootstrap("bootstrap")), dt_tab$dependencies ) dt_tab } #' Plot method for the pivotr function #' #' @details See \url{https://radiant-rstats.github.io/docs/data/pivotr} for an example in Radiant #' #' @param x Return value from \code{\link{pivotr}} #' @param type Plot type to use ("fill" or "dodge" (default)) #' @param perc Use percentage on the y-axis #' @param flip Flip the axes in a plot (FALSE or TRUE) #' @param fillcol Fill color for bar-plot when only one categorical variable has been selected (default is "blue") #' @param opacity Opacity for plot elements (0 to 1) #' @param ... further arguments passed to or from other methods #' #' @examples #' pivotr(diamonds, cvars = "cut") %>% plot() #' pivotr(diamonds, cvars = c("cut", "clarity")) %>% plot() #' pivotr(diamonds, cvars = c("cut", "clarity", "color")) %>% plot() #' #' @seealso \code{\link{pivotr}} to generate summaries #' @seealso \code{\link{summary.pivotr}} to show summaries #' #' @importFrom rlang .data #' #' @export plot.pivotr <- function(x, type = "dodge", perc = FALSE, flip = FALSE, fillcol = "blue", opacity = 0.5, ...) { cvars <- x$cvars nvar <- x$nvar tab <- x$tab %>% (function(x) filter(x, x[[1]] != "Total")) if (flip) { # need reverse order here because of how coord_flip works tab <- lapply(tab, function(x) if (inherits(x, "factor")) factor(x, levels = rev(levels(x))) else x) %>% as_tibble() } if (length(cvars) == 1) { p <- ggplot(na.omit(tab), aes(x = .data[[cvars]], y = .data[[nvar]])) + geom_bar(stat = "identity", position = "dodge", alpha = opacity, fill = fillcol) } else if (length(cvars) == 2) { ctot <- which(colnames(tab) == "Total") if (length(ctot) > 0) tab %<>% select(base::setdiff(colnames(.), "Total")) dots <- paste0("factor(", cvars[1], ", levels = c('", paste0(base::setdiff(colnames(tab), cvars[2]), collapse = "','"), "'))") %>% rlang::parse_exprs(.) %>% set_names(cvars[1]) p <- tab %>% gather(!!cvars[1], !!nvar, !!base::setdiff(colnames(.), cvars[2])) %>% na.omit() %>% mutate(!!!dots) %>% ggplot(aes(x = .data[[cvars[1]]], y = .data[[nvar]], fill = .data[[cvars[2]]])) + geom_bar(stat = "identity", position = type, alpha = opacity) } else if (length(cvars) == 3) { ctot <- which(colnames(tab) == "Total") if (length(ctot) > 0) tab %<>% select(base::setdiff(colnames(.), "Total")) dots <- paste0("factor(", cvars[1], ", levels = c('", paste0(base::setdiff(colnames(tab), cvars[2:3]), collapse = "','"), "'))") %>% rlang::parse_exprs(.) %>% set_names(cvars[1]) p <- tab %>% gather(!!cvars[1], !!nvar, !!base::setdiff(colnames(.), cvars[2:3])) %>% na.omit() %>% mutate(!!!dots) %>% ggplot(aes(x = .data[[cvars[1]]], y = .data[[nvar]], fill = .data[[cvars[2]]])) + geom_bar(stat = "identity", position = type, alpha = opacity) + facet_grid(paste(cvars[3], "~ .")) } else { ## No plot returned if more than 3 grouping variables are selected return(invisible()) } if (flip) p <- p + coord_flip() if (perc) p <- p + scale_y_continuous(labels = scales::percent) if (isTRUE(nvar == "n_obs")) { if (!is.empty(x$normalize, "None")) { p <- p + labs(y = ifelse(perc, "Percentage", "Proportion")) } } else { p <- p + labs(y = paste0(nvar, " (", x$fun, ")")) } sshhr(p) } #' Deprecated: Store method for the pivotr function #' #' @details Return the summarized data. See \url{https://radiant-rstats.github.io/docs/data/pivotr.html} for an example in Radiant #' #' @param dataset Dataset #' @param object Return value from \code{\link{pivotr}} #' @param name Name to assign to the dataset #' @param ... further arguments passed to or from other methods #' #' @seealso \code{\link{pivotr}} to generate summaries #' #' @export store.pivotr <- function(dataset, object, name, ...) { if (missing(name)) { object$tab } else { stop( paste0( "This function is deprecated. Use the code below instead:\n\n", name, " <- ", deparse(substitute(object)), "$tab\nregister(\"", name, ")" ), call. = FALSE ) } }