explore <- function(dataset, vars = "", byvar = "", fun = c("mean", "sd"), top = "fun", tabfilt = "", tabsort = "", tabslice = "", nr = Inf, data_filter = "", arr = "", rows = NULL, envir = parent.frame()) { tvars <- vars if (!is.empty(byvar)) tvars <- unique(c(tvars, byvar)) df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset)) dataset <- get_data(dataset, tvars, filt = data_filter, arr = arr, rows = rows, na.rm = FALSE, envir = envir) rm(tvars) ## in case : was used vars <- base::setdiff(colnames(dataset), byvar) ## converting data as needed for summarization dc <- get_class(dataset) fixer <- function(x, fun = as_integer) { if (is.character(x) || is.Date(x)) { x <- rep(NA, length(x)) } else if (is.factor(x)) { x_num <- sshhr(as.integer(as.character(x))) if (length(na.omit(x_num)) == 0) { x <- fun(x) } else { x <- x_num } } x } fixer_first <- function(x) { x <- fixer(x, function(x) as_integer(x == levels(x)[1])) } mean <- function(x, na.rm = TRUE) sshhr(base::mean(fixer_first(x), na.rm = na.rm)) sum <- function(x, na.rm = TRUE) sshhr(base::sum(fixer_first(x), na.rm = na.rm)) var <- function(x, na.rm = TRUE) sshhr(stats::var(fixer_first(x), na.rm = na.rm)) sd <- function(x, na.rm = TRUE) sshhr(stats::sd(fixer_first(x), na.rm = na.rm)) se <- function(x, na.rm = TRUE) sshhr(radiant.data::se(fixer_first(x), na.rm = na.rm)) me <- function(x, na.rm = TRUE) sshhr(radiant.data::me(fixer_first(x), na.rm = na.rm)) cv <- function(x, na.rm = TRUE) sshhr(radiant.data::cv(fixer_first(x), na.rm = na.rm)) prop <- function(x, na.rm = TRUE) sshhr(radiant.data::prop(fixer_first(x), na.rm = na.rm)) varprop <- function(x, na.rm = TRUE) sshhr(radiant.data::varprop(fixer_first(x), na.rm = na.rm)) sdprop <- function(x, na.rm = TRUE) sshhr(radiant.data::sdprop(fixer_first(x), na.rm = na.rm)) seprop <- function(x, na.rm = TRUE) sshhr(radiant.data::seprop(fixer_first(x), na.rm = na.rm)) meprop <- function(x, na.rm = TRUE) sshhr(radiant.data::meprop(fixer_first(x), na.rm = na.rm)) varpop <- function(x, na.rm = TRUE) sshhr(radiant.data::varpop(fixer_first(x), na.rm = na.rm)) sdpop <- function(x, na.rm = TRUE) sshhr(radiant.data::sdpop(fixer_first(x), na.rm = na.rm)) median <- function(x, na.rm = TRUE) sshhr(stats::median(fixer(x), na.rm = na.rm)) min <- function(x, na.rm = TRUE) sshhr(base::min(fixer(x), na.rm = na.rm)) max <- function(x, na.rm = TRUE) sshhr(base::max(fixer(x), na.rm = na.rm)) p01 <- function(x, na.rm = TRUE) sshhr(radiant.data::p01(fixer(x), na.rm = na.rm)) p025 <- function(x, na.rm = TRUE) sshhr(radiant.data::p025(fixer(x), na.rm = na.rm)) p05 <- function(x, na.rm = TRUE) sshhr(radiant.data::p05(fixer(x), na.rm = na.rm)) p10 <- function(x, na.rm = TRUE) sshhr(radiant.data::p10(fixer(x), na.rm = na.rm)) p25 <- function(x, na.rm = TRUE) sshhr(radiant.data::p25(fixer(x), na.rm = na.rm)) p75 <- function(x, na.rm = TRUE) sshhr(radiant.data::p75(fixer(x), na.rm = na.rm)) p90 <- function(x, na.rm = TRUE) sshhr(radiant.data::p90(fixer(x), na.rm = na.rm)) p95 <- function(x, na.rm = TRUE) sshhr(radiant.data::p95(fixer(x), na.rm = na.rm)) p975 <- function(x, na.rm = TRUE) sshhr(radiant.data::p975(fixer(x), na.rm = na.rm)) p99 <- function(x, na.rm = TRUE) sshhr(radiant.data::p99(fixer(x), na.rm = na.rm)) skew <- function(x, na.rm = TRUE) sshhr(radiant.data::skew(fixer(x), na.rm = na.rm)) kurtosi <- function(x, na.rm = TRUE) sshhr(radiant.data::kurtosi(fixer(x), na.rm = na.rm)) isLogNum <- "logical" == dc & names(dc) %in% base::setdiff(vars, byvar) if (sum(isLogNum) > 0) { dataset[, isLogNum] <- select(dataset, which(isLogNum)) %>% mutate_all(as.integer) dc[isLogNum] <- "integer" } if (is.empty(byvar)) { byvar <- c() tab <- summarise_all(dataset, fun, na.rm = TRUE) } else { ## convert categorical variables to factors if needed ## needed to deal with empty/missing values dataset[, byvar] <- select_at(dataset, .vars = byvar) %>% mutate_all(~ empty_level(.)) tab <- dataset %>% group_by_at(.vars = byvar) %>% summarise_all(fun, na.rm = TRUE) } ## adjust column names if (length(vars) == 1 || length(fun) == 1) { rng <- (length(byvar) + 1):ncol(tab) colnames(tab)[rng] <- paste0(vars, "_", fun) rm(rng) } ## setup regular expression to split variable/function column appropriately rex <- paste0("(.*?)_", glue('({glue_collapse(fun, "$|")}$)')) ## useful answer and comments: http://stackoverflow.com/a/27880388/1974918 tab <- gather(tab, "variable", "value", !!-(seq_along(byvar))) %>% extract(variable, into = c("variable", "fun"), regex = rex) %>% mutate(fun = factor(fun, levels = !!fun), variable = factor(variable, levels = vars)) %>% # mutate(variable = paste0(variable, " {", dc[variable], "}")) %>% spread("fun", "value") ## flip the table if needed if (top != "fun") { tab <- list(tab = tab, byvar = byvar, fun = fun) %>% flip(top) } nrow_tab <- nrow(tab) ## filtering the table if desired from Report > Rmd if (!is.empty(tabfilt)) { tab <- filter_data(tab, tabfilt) } ## sorting the table if desired from Report > Rmd if (!identical(tabsort, "")) { tabsort <- gsub(",", ";", tabsort) tab <- tab %>% arrange(!!!rlang::parse_exprs(tabsort)) } ## ensure factors ordered as in the (sorted) table if (!is.empty(byvar) && top != "byvar") { for (i in byvar) tab[[i]] <- tab[[i]] %>% (function(x) factor(x, levels = unique(x))) rm(i) } ## frequencies converted to doubles during gather/spread above check_int <- function(x) { if (is.double(x) && length(na.omit(x)) > 0) { x_int <- sshhr(as.integer(round(x, .Machine$double.rounding))) if (isTRUE(all.equal(x, x_int, check.attributes = FALSE))) x_int else x } else { x } } tab <- ungroup(tab) %>% mutate_all(check_int) ## slicing the table if desired if (!is.empty(tabslice)) { tab <- tab %>% slice_data(tabslice) %>% droplevels() } ## convert to data.frame to maintain attributes tab <- as.data.frame(tab, stringsAsFactors = FALSE) attr(tab, "radiant_nrow") <- nrow_tab if (!isTRUE(is.infinite(nr))) { ind <- if (nr > nrow(tab)) 1:nrow(tab) else 1:nr tab <- tab[ind, , drop = FALSE] rm(ind) } list( tab = tab, df_name = df_name, vars = vars, byvar = byvar, fun = fun, top = top, tabfilt = tabfilt, tabsort = tabsort, tabslice = tabslice, nr = nr, data_filter = data_filter, arr = arr, rows = rows ) %>% add_class("explore") } summary.explore <- function(object, dec = 3, ...) { cat("Explore\n") cat("Data :", object$df_name, "\n") if (!is.empty(object$data_filter)) { cat("Filter :", gsub("\\n", "", object$data_filter), "\n") } if (!is.empty(object$arr)) { cat("Arrange :", gsub("\\n", "", object$arr), "\n") } if (!is.empty(object$rows)) { cat("Slice :", gsub("\\n", "", object$rows), "\n") } if (!is.empty(object$tabfilt)) { cat("Table filter:", object$tabfilt, "\n") } if (!is.empty(object$tabsort[1])) { cat("Table sorted:", paste0(object$tabsort, collapse = ", "), "\n") } if (!is.empty(object$tabslice)) { cat("Table slice :", object$tabslice, "\n") } nr <- attr(object$tab, "radiant_nrow") if (!isTRUE(is.infinite(nr)) && !isTRUE(is.infinite(object$nr)) && object$nr < nr) { cat(paste0("Rows shown : ", object$nr, " (out of ", nr, ")\n")) } if (!is.empty(object$byvar[1])) { cat("Grouped by :", object$byvar, "\n") } cat("Functions :", paste0(object$fun, collapse = ", "), "\n") cat("Top :", c("fun" = "Function", "var" = "Variables", "byvar" = "Group by")[object$top], "\n") cat("\n") format_df(object$tab, dec = dec, mark = ",") %>% print(row.names = FALSE) invisible() } store.explore <- function(dataset, object, name, ...) { if (missing(name)) { object$tab } else { stop( paste0( "This function is deprecated. Use the code below instead:\n\n", name, " <- ", deparse(substitute(object)), "$tab\nregister(\"", name, ")" ), call. = FALSE ) } } flip <- function(expl, top = "fun") { cvars <- expl$byvar %>% (function(x) if (is.empty(x[1])) character(0) else x) if (top[1] == "var") { expl$tab %<>% gather(".function", "value", !!-(1:(length(cvars) + 1))) %>% spread("variable", "value") expl$tab[[".function"]] %<>% factor(., levels = expl$fun) } else if (top[1] == "byvar" && length(cvars) > 0) { expl$tab %<>% gather(".function", "value", !!-(1:(length(cvars) + 1))) %>% spread(!!cvars[1], "value") expl$tab[[".function"]] %<>% factor(., levels = expl$fun) ## ensure we don't have invalid column names colnames(expl$tab) <- fix_names(colnames(expl$tab)) } expl$tab } dtab.explore <- function(object, dec = 3, searchCols = NULL, order = NULL, pageLength = NULL, caption = NULL, ...) { style <- if (exists("bslib_current_version") && "4" %in% bslib_current_version()) "bootstrap4" else "bootstrap" tab <- object$tab cn_all <- colnames(tab) cn_num <- cn_all[sapply(tab, is.numeric)] cn_cat <- cn_all[-which(cn_all %in% cn_num)] isInt <- sapply(tab, is.integer) isDbl <- sapply(tab, is_double) dec <- ifelse(is.empty(dec) || dec < 0, 3, round(dec, 0)) top <- c("fun" = "Function", "var" = "Variables", "byvar" = paste0("Group by: ", object$byvar[1]))[object$top] sketch <- shiny::withTags( table( thead( tr( th(" ", colspan = length(cn_cat)), lapply(top, th, colspan = length(cn_num), class = "text-center") ), tr(lapply(cn_all, th)) ) ) ) if (!is.empty(caption)) { ## from https://github.com/rstudio/DT/issues/630#issuecomment-461191378 caption <- shiny::tags$caption(style = "caption-side: bottom; text-align: left; font-size:100%;", caption) } ## for display options see https://datatables.net/reference/option/dom dom <- if (nrow(tab) < 11) "t" else "ltip" fbox <- if (nrow(tab) > 5e6) "none" else list(position = "top") dt_tab <- DT::datatable( tab, container = sketch, caption = caption, selection = "none", rownames = FALSE, filter = fbox, ## must use fillContainer = FALSE to address ## see https://github.com/rstudio/DT/issues/367 ## https://github.com/rstudio/DT/issues/379 fillContainer = FALSE, style = style, options = list( dom = dom, stateSave = TRUE, ## store state searchCols = searchCols, order = order, columnDefs = list(list(orderSequence = c("desc", "asc"), targets = "_all")), autoWidth = TRUE, processing = FALSE, pageLength = { if (is.null(pageLength)) 10 else pageLength }, lengthMenu = list(c(5, 10, 25, 50, -1), c("5", "10", "25", "50", "All")) ), ## https://github.com/rstudio/DT/issues/146#issuecomment-534319155 callback = DT::JS('$(window).on("unload", function() { table.state.clear(); })') ) %>% DT::formatStyle(., cn_cat, color = "white", backgroundColor = "grey") ## rounding as needed if (sum(isDbl) > 0) { dt_tab <- DT::formatRound(dt_tab, names(isDbl)[isDbl], dec) } if (sum(isInt) > 0) { dt_tab <- DT::formatRound(dt_tab, names(isInt)[isInt], 0) } ## see https://github.com/yihui/knitr/issues/1198 dt_tab$dependencies <- c( list(rmarkdown::html_dependency_bootstrap("bootstrap")), dt_tab$dependencies ) dt_tab } ########################################### ## turn functions below into functional ... ########################################### n_obs <- function(x, ...) length(x) n_missing <- function(x, ...) sum(is.na(x)) p01 <- function(x, na.rm = TRUE) quantile(x, .01, na.rm = na.rm) p025 <- function(x, na.rm = TRUE) quantile(x, .025, na.rm = na.rm) p05 <- function(x, na.rm = TRUE) quantile(x, .05, na.rm = na.rm) p10 <- function(x, na.rm = TRUE) quantile(x, .1, na.rm = na.rm) p25 <- function(x, na.rm = TRUE) quantile(x, .25, na.rm = na.rm) p75 <- function(x, na.rm = TRUE) quantile(x, .75, na.rm = na.rm) p90 <- function(x, na.rm = TRUE) quantile(x, .90, na.rm = na.rm) p95 <- function(x, na.rm = TRUE) quantile(x, .95, na.rm = na.rm) p975 <- function(x, na.rm = TRUE) quantile(x, .975, na.rm = na.rm) p99 <- function(x, na.rm = TRUE) quantile(x, .99, na.rm = na.rm) cv <- function(x, na.rm = TRUE) { m <- mean(x, na.rm = na.rm) if (m == 0) { message("Mean should be greater than 0") NA } else { sd(x, na.rm = na.rm) / m } } se <- function(x, na.rm = TRUE) { if (na.rm) x <- na.omit(x) sd(x) / sqrt(length(x)) } me <- function(x, conf_lev = 0.95, na.rm = TRUE) { if (na.rm) x <- na.omit(x) se(x) * qt(conf_lev / 2 + .5, length(x) - 1, lower.tail = TRUE) } prop <- function(x, na.rm = TRUE) { if (na.rm) x <- na.omit(x) if (is.numeric(x)) { mean(x == max(x, 1)) ## gives proportion of max value in x } else if (is.factor(x)) { mean(x == levels(x)[1]) ## gives proportion of first level in x } else if (is.logical(x)) { mean(x) } else { NA } } varprop <- function(x, na.rm = TRUE) { p <- prop(x, na.rm = na.rm) p * (1 - p) } sdprop <- function(x, na.rm = TRUE) sqrt(varprop(x, na.rm = na.rm)) seprop <- function(x, na.rm = TRUE) { if (na.rm) x <- na.omit(x) sqrt(varprop(x, na.rm = FALSE) / length(x)) } meprop <- function(x, conf_lev = 0.95, na.rm = TRUE) { if (na.rm) x <- na.omit(x) seprop(x) * qnorm(conf_lev / 2 + .5, lower.tail = TRUE) } varpop <- function(x, na.rm = TRUE) { if (na.rm) x <- na.omit(x) n <- length(x) var(x) * ((n - 1) / n) } sdpop <- function(x, na.rm = TRUE) sqrt(varpop(x, na.rm = na.rm)) ln <- function(x, na.rm = TRUE) { if (na.rm) log(na.omit(x)) else log(x) } does_vary <- function(x, na.rm = TRUE) { ## based on http://stackoverflow.com/questions/4752275/test-for-equality-among-all-elements-of-a-single-vector if (length(x) == 1L) { FALSE } else { if (is.factor(x) || is.character(x)) { length(unique(x)) > 1 } else { abs(max(x, na.rm = na.rm) - min(x, na.rm = na.rm)) > .Machine$double.eps^0.5 } } } empty_level <- function(x) { if (!is.factor(x)) x <- as.factor(x) levs <- levels(x) if ("" %in% levs) { levs[levs == ""] <- "NA" x <- factor(x, levels = levs) x[is.na(x)] <- "NA" } else if (any(is.na(x))) { x <- factor(x, levels = unique(c(levs, "NA"))) x[is.na(x)] <- "NA" } x } modal <- function(x, na.rm = TRUE) { if (na.rm) x <- na.omit(x) unv <- unique(x) unv[which.max(tabulate(match(x, unv)))] } #—————————————————————————————————————————————————绘图部分——————————————————————————————————————————— visualize <- function(dataset, xvar, yvar = "", comby = FALSE, combx = FALSE, type = ifelse(is.empty(yvar), "dist", "scatter"), nrobs = -1, facet_row = ".", facet_col = ".", color = "none", fill = "none", size = "none", fillcol = "blue", linecol = "black", pointcol = "black", bins = 10, smooth = 1, fun = "mean", check = "", axes = "", alpha = 0.5, theme = "theme_gray", base_size = 11, base_family = "", labs = list(), xlim = NULL, ylim = NULL, data_filter = "", arr = "", rows = NULL, shiny = FALSE, custom = FALSE, envir = parent.frame()) { if (missing(xvar) && type %in% c("box", "line")) { xvar <- yvar if (type == "box") { type <- "box-single" if (comby) { comby <- FALSE combx <- TRUE } } else { type <- "line-single" } } ## inspired by Joe Cheng's ggplot2 browser app http://www.youtube.com/watch?feature=player_embedded&v=o2B5yJeEl1A#! vars <- xvar if (!type %in% c("scatter", "line", "line-single", "box")) color <- "none" if (!type %in% c("bar", "dist", "density", "surface")) fill <- "none" if (type != "scatter") { check %<>% sub("line", "", .) %>% sub("loess", "", .) if (length(fun) > 1) { fun <- fun[1] message("No more than one function (", fun, ") will be used for plots of type ", type) } size <- "none" } if (type == "scatter" && length(fun) > 3) { fun <- fun[1:3] message("No more than three functions (", paste(fun, collapse = ", "), ") can be used with scatter plots") } if (!type %in% c("scatter", "box", "box-single")) check %<>% sub("jitter", "", .) ## variable to use if bar chart is specified byvar <- NULL if (length(yvar) == 0 || identical(yvar, "")) { if (!type %in% c("dist", "density")) { return("No Y-variable provided for a plot that requires one") } } else if (type == "surface" && is.empty(fill, "none")) { return("No Fill variable provided for a plot that requires one") } else { if (type %in% c("dist", "density")) { yvar <- "" } else { vars %<>% c(., yvar) } } if (color != "none") { vars %<>% c(., color) if (type == "line") byvar <- color } if (facet_row != ".") { vars %<>% c(., facet_row) byvar <- if (is.null(byvar)) facet_row else unique(c(byvar, facet_row)) } if (facet_col != ".") { vars %<>% c(., facet_col) byvar <- if (is.null(byvar)) facet_col else unique(c(byvar, facet_col)) } if (facet_col != "." && facet_row == facet_col) { return("The same variable cannot be used for both Facet row and Facet column") } if (fill != "none") { vars %<>% c(., fill) if (type == "bar") { byvar <- if (is.null(byvar)) fill else unique(c(byvar, fill)) } } if (size != "none") vars %<>% c(., size) ## so you can also pass-in a data.frame df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset)) dataset <- get_data(dataset, vars, filt = data_filter, arr = arr, rows = rows, envir = envir) if (type == "scatter" && !is.empty(nrobs)) { nrobs <- as.integer(nrobs) if (nrobs > 0 && nrobs < nrow(dataset)) { dataset <- sample_n(dataset, nrobs, replace = FALSE) } } ## get class dc <- dc_org <- get_class(dataset) ## if : is used to specify a range of variables if (length(vars) < ncol(dataset)) { fl <- strsplit(xvar, ":") %>% unlist() cn <- colnames(dataset) xvar <- cn[which(fl[1] == cn):which(fl[2] == cn)] } ## converting character variables if needed isChar <- dc == "character" if (sum(isChar) > 0) { if (type == "density") { dataset[, isChar] <- select(dataset, which(isChar)) %>% mutate_all(as_numeric) if ("character" %in% get_class(select(dataset, which(isChar)))) { return("Character variable(s) were not converted to numeric for plotting.\nTo use these variables in a plot convert them to numeric\nvariables (or factors) in the Data > Transform tab") } } else { dataset[, isChar] <- select(dataset, which(isChar)) %>% mutate_all(as_factor) nrlev <- sapply(dataset, function(x) if (is.factor(x)) length(levels(x)) else 0) if (max(nrlev) > 500) { return("Character variable(s) were not converted to factors for plotting.\nTo use these variable in a plot convert them to factors\n(or numeric variables) in the Data > Transform tab") } } ## in case something was changed, if not, this won't run dc <- get_class(dataset) } if (type %in% c("bar", "line")) { if (any(xvar %in% yvar)) { return("Cannot create a bar or line chart if an X-variable is also included as a Y-variable") } } else if (type == "box") { if (any(xvar %in% yvar)) { return("Cannot create a box-plot if an X-variable is also included as a Y-variable") } } ## Determine if you want to use the first level of factor or not if (type %in% c("bar", "line")) { isFctY <- "factor" == dc & names(dc) %in% yvar if (sum(isFctY)) { levs_org <- sapply(dataset[, isFctY, drop = FALSE], function(x) levels(x)[1]) levs <- c() fixer_first <- function(x) { x_num <- sshhr(as.integer(as.character(x))) if (length(na.omit(x_num)) == 0) { lx <- levels(x) x <- as_integer(x == lx[1]) levs <<- c(levs, lx[1]) } else { x <- x_num levs <<- c(levs, NA) } x } fixer <- function(x) { x_num <- sshhr(as.integer(as.character(x))) if (length(na.omit(x_num)) == 0) { lx <- levels(x) x <- as_integer(x) levs <<- c(levs, lx[1]) } else { x <- x_num levs <<- c(levs, NA) } x } if (fun %in% c("mean", "sum", "sd", "var", "sd", "se", "me", "cv", "prop", "varprop", "sdprop", "seprop", "meprop", "varpop", "sepop")) { mfun <- fixer_first } else if (fun %in% c("median", "min", "max", "p01", "p025", "p05", "p10", "p25", "p50", "p75", "p90", "p95", "p975", "p99", "skew", "kurtosi")) { mfun <- fixer } else { mfun <- function(x) { levs <<- c(levs, NA) x } } dataset[, isFctY] <- select(dataset, which(isFctY)) %>% mutate_all(mfun) names(levs) <- names(levs_org) dc[isFctY] <- "integer" } } if (xor("log_x" %in% axes, "log_y" %in% axes)) { if (any(xvar %in% yvar)) { return("When applying 'Log X' an X-variable cannot also be selected as a Y-variable") } if (any(yvar %in% xvar)) { return("When applying 'Log Y' a Y-variable cannot also be selected as an X-variable") } } log_trans <- function(x) ifelse(x > 0, log(x), NA) if ("log_x" %in% axes) { if (any(!dc[xvar] %in% c("integer", "numeric"))) { return("'Log X' is only meaningful for X-variables of type integer or numeric") } to_log <- (dc[xvar] %in% c("integer", "numeric")) %>% xvar[.] dataset <- mutate_at(dataset, .vars = to_log, .funs = log_trans) } if ("log_y" %in% axes) { if (any(!dc[yvar] %in% c("integer", "numeric"))) { return("'Log Y' is only meaningful for Y-variables of type integer or numeric") } to_log <- (dc[yvar] %in% c("integer", "numeric")) %>% yvar[.] dataset <- mutate_at(dataset, .vars = to_log, .funs = log_trans) } ## combining Y-variables if needed if (comby && length(yvar) > 1) { if (any(xvar %in% yvar) && !type %in% c("box-single", "line-single")) { return("X-variables cannot be part of Y-variables when combining Y-variables") } if (!is.empty(color, "none")) { return("Cannot use Color when combining Y-variables") } if (!is.empty(fill, "none")) { return("Cannot use Fill when combining Y-variables") } if (!is.empty(size, "none")) { return("Cannot use Size when combining Y-variables") } if (facet_row %in% yvar || facet_col %in% yvar) { return("Facet row or column variables cannot be part of\nY-variables when combining Y-variables") } dataset <- gather(dataset, "yvar", "values", !!yvar, factor_key = TRUE) yvar <- "values" byvar <- if (is.null(byvar)) "yvar" else c("yvar", byvar) color <- fill <- "yvar" dc <- get_class(dataset) } ## combining X-variables if needed if (combx && length(xvar) > 1) { if (!is.empty(fill, "none")) { return("Cannot use Fill when combining X-variables") } if (facet_row %in% xvar || facet_col %in% xvar) { return("Facet row or column variables cannot be part of\nX-variables when combining Y-variables") } if (any(!get_class(select_at(dataset, .vars = xvar)) %in% c("numeric", "integer"))) { return("Cannot combine plots for non-numeric variables") } dataset <- gather(dataset, "xvar", "values", !!xvar, factor_key = TRUE) xvar <- "values" byvar <- if (is.null(byvar)) "xvar" else c("xvar", byvar) color <- fill <- "xvar" dc <- get_class(dataset) } plot_list <- list() if (type == "dist") { for (i in xvar) { ## can't create a distribution plot for a logical if (dc[i] == "logical") { dataset[[i]] <- as_factor(dataset[[i]]) dc[i] <- "factor" } hist_par <- list(alpha = alpha, position = "stack") if (combx) hist_par[["position"]] <- "identity" if (fill == "none") hist_par[["fill"]] <- fillcol plot_list[[i]] <- ggplot(dataset, aes(x = .data[[i]])) if ("density" %in% axes && !"factor" %in% dc[i]) { hist_par <- c(list(aes(y = after_stat(density))), hist_par) plot_list[[i]] <- plot_list[[i]] + geom_density(adjust = smooth, color = linecol, linewidth = .5) } if ("factor" %in% dc[i]) { plot_fun <- get("geom_bar") if ("log_x" %in% axes) axes <- sub("log_x", "", axes) } else { plot_fun <- get("geom_histogram") hist_par[["binwidth"]] <- select_at(dataset, .vars = i) %>% range() %>% { diff(.) / (bins - 1) } } plot_list[[i]] <- plot_list[[i]] + do.call(plot_fun, hist_par) if ("log_x" %in% axes) plot_list[[i]] <- plot_list[[i]] + xlab(paste("log", i)) } } else if (type == "density") { for (i in xvar) { plot_list[[i]] <- ggplot(dataset, aes(x = .data[[i]])) + if (fill == "none") { geom_density(adjust = smooth, color = linecol, fill = fillcol, alpha = alpha, linewidth = 1) } else { geom_density(adjust = smooth, alpha = alpha, linewidth = 1) } if ("log_x" %in% axes) plot_list[[i]] <- plot_list[[i]] + xlab(paste("log", i)) } } else if (type == "scatter") { itt <- 1 if ("jitter" %in% check) { if (color == "none") { gs <- geom_jitter(alpha = alpha, color = pointcol, position = position_jitter(width = 0.4, height = 0.0)) } else { gs <- geom_jitter(alpha = alpha, position = position_jitter(width = 0.4, height = 0.0)) } check <- sub("jitter", "", check) } else { if (color == "none") { gs <- geom_point(alpha = alpha, color = pointcol) } else { gs <- geom_point(alpha = alpha) } } for (i in xvar) { if ("log_x" %in% axes && dc[i] == "factor") axes <- sub("log_x", "", axes) for (j in yvar) { plot_list[[itt]] <- ggplot(dataset, aes(x = .data[[i]], y = .data[[j]])) + gs if ("log_x" %in% axes) plot_list[[itt]] <- plot_list[[itt]] + xlab(paste("log", i)) if ("log_y" %in% axes) plot_list[[itt]] <- plot_list[[itt]] + ylab(paste("log", j)) if (dc[i] == "factor") { ## make range comparable to bar plot ymax <- max(0, max(dataset[[j]])) ymin <- min(0, min(dataset[[j]])) plot_list[[itt]] <- plot_list[[itt]] + ylim(ymin, ymax) fun1 <- function(y) { y <- get(fun[1])(y) data.frame(ymin = y, ymax = y, y = y, stringsAsFactors = FALSE) } if (length(fun) == 1) { ## need some contrast in this case if (pointcol[1] == "black" && linecol[1] == "black") { linecol[1] <- "blue" } plot_list[[itt]] <- plot_list[[itt]] + stat_summary( fun.data = fun1, na.rm = TRUE, aes(fill = fun[1]), geom = "crossbar", show.legend = FALSE, color = linecol[1] ) } else { plot_list[[itt]] <- plot_list[[itt]] + stat_summary( fun.data = fun1, na.rm = TRUE, aes(fill = fun[1]), geom = "crossbar", show.legend = TRUE, color = linecol[1] ) if (length(fun) > 1) { fun2 <- function(y) { y <- get(fun[2])(y) data.frame(ymin = y, ymax = y, y = y, stringsAsFactors = FALSE) } if (length(linecol) == 1) linecol <- c(linecol, "blue") plot_list[[itt]] <- plot_list[[itt]] + stat_summary( fun.data = fun2, na.rm = TRUE, aes(fill = fun[2]), geom = "crossbar", show.legend = FALSE, color = linecol[2] ) } if (length(fun) == 3) { fun3 <- function(y) { y <- get(fun[3])(y) data.frame(ymin = y, ymax = y, y = y, stringsAsFactors = FALSE) } if (length(linecol) == 2) linecol <- c(linecol, "red") plot_list[[itt]] <- plot_list[[itt]] + stat_summary( fun.data = fun3, na.rm = TRUE, aes(fill = fun[3]), geom = "crossbar", show.legend = FALSE, color = linecol[3] ) } ## adding a legend if needed plot_list[[itt]] <- plot_list[[itt]] + scale_fill_manual(name = "", values = linecol, labels = fun) + ## next line based on https://stackoverflow.com/a/25294787/1974918 guides(fill = guide_legend(override.aes = list(color = NULL))) } nr <- nrow(dataset) if (nr > 1000 || nr != length(unique(dataset[[i]]))) { plot_list[[itt]]$labels$y %<>% paste0(., " (", paste(fun, collapse = ", "), ")") } } itt <- itt + 1 } } } else if (type == "surface") { itt <- 1 for (i in xvar) { if ("log_x" %in% axes && dc[i] == "factor") axes <- sub("log_x", "", axes) interpolate <- ifelse("interpolate" %in% check, TRUE, FALSE) for (j in yvar) { plot_list[[itt]] <- ggplot(dataset, aes(x = .data[[i]], y = .data[[j]], fill = .data[[fill]])) + geom_raster(interpolate = interpolate) if ("log_x" %in% axes) plot_list[[itt]] <- plot_list[[itt]] + xlab(paste("log", i)) if ("log_y" %in% axes) plot_list[[itt]] <- plot_list[[itt]] + ylab(paste("log", j)) itt <- itt + 1 } } } else if (type == "line") { itt <- 1 for (i in xvar) { for (j in yvar) { flab <- "" if (color == "none") { if (dc[i] %in% c("factor", "date") || dc_org[j] == "factor") { tbv <- if (is.null(byvar)) i else c(i, byvar) tmp <- dataset %>% group_by_at(.vars = tbv) %>% select_at(.vars = c(tbv, j)) %>% na.omit() %>% summarise_all(fun) colnames(tmp)[ncol(tmp)] <- j plot_list[[itt]] <- ggplot(tmp, aes(x = .data[[i]], y = .data[[j]])) + geom_line(aes(group = 1), color = linecol) if (nrow(tmp) < 101) plot_list[[itt]] <- plot_list[[itt]] + geom_point(color = pointcol) } else { plot_list[[itt]] <- ggplot(dataset, aes(x = .data[[i]], y = .data[[j]])) + geom_line(color = linecol) } } else { if (dc[i] %in% c("factor", "date") || (!is.empty(dc_org[j]) && dc_org[j] == "factor")) { tbv <- if (is.null(byvar)) i else unique(c(i, byvar)) tmp <- dataset %>% group_by_at(.vars = tbv) %>% select_at(.vars = c(tbv, color, j)) %>% na.omit() %>% summarise_all(fun) colnames(tmp)[ncol(tmp)] <- j plot_list[[itt]] <- ggplot(tmp, aes(x = .data[[i]], y = .data[[j]], color = .data[[color]], group = .data[[color]])) + geom_line() if (nrow(tmp) < 101) plot_list[[itt]] <- plot_list[[itt]] + geom_point() } else { plot_list[[itt]] <- ggplot(dataset, aes(x = .data[[i]], y = .data[[j]], color = .data[[color]], group = .data[[color]])) + geom_line() } } if ("log_x" %in% axes) plot_list[[itt]] <- plot_list[[itt]] + xlab(paste("log", i)) if ("log_y" %in% axes) plot_list[[itt]] <- plot_list[[itt]] + ylab(paste("log", j)) if ((dc[i] %in% c("factor", "date") || (!is.empty(dc_org[j]) && dc_org[j] == "factor")) && nrow(tmp) < nrow(dataset)) { if (exists("levs")) { if (j %in% names(levs) && !is.na(levs[j])) { plot_list[[itt]]$labels$y %<>% paste0(., " (", fun, " {", levs[j], "})") } else { plot_list[[itt]]$labels$y %<>% paste0(., " (", fun, ")") } } else { plot_list[[itt]]$labels$y %<>% paste0(., " (", fun, ")") } } itt <- itt + 1 } } } else if (type == "line-single") { itt <- 1 for (i in yvar) { if (color == "none") { plot_list[[itt]] <- ggplot(dataset, aes(x = seq_along(.data[[i]]), y = .data[[i]])) + geom_line(color = linecol) + labs(x = "") } else { plot_list[[itt]] <- ggplot(dataset, aes(x = seq_along(.data[[i]]), y = .data[[i]], color = .data[[color]], group = .data[[color]])) + geom_line() + labs(x = "") } itt <- itt + 1 } } else if (type == "bar") { itt <- 1 for (i in xvar) { if (!"factor" %in% dc[i]) dataset[[i]] %<>% as_factor() if ("log_x" %in% axes) axes <- sub("log_x", "", axes) for (j in yvar) { tbv <- if (is.null(byvar)) i else c(i, byvar) tmp <- dataset %>% group_by_at(.vars = tbv) %>% select_at(.vars = c(tbv, j)) %>% na.omit() %>% summarise_all(fun) colnames(tmp)[ncol(tmp)] <- j if ("sort" %in% axes && facet_row == "." && facet_col == ".") { if ("flip" %in% axes) { tmp <- arrange_at(ungroup(tmp), .vars = j) } else { tmp <- arrange_at(ungroup(tmp), .vars = j, .funs = desc) } tmp[[i]] %<>% factor(., levels = unique(.)) } plot_list[[itt]] <- ggplot(tmp, aes(x = .data[[i]], y = .data[[j]])) + { if (fill == "none") { geom_bar(stat = "identity", position = "dodge", alpha = alpha, fill = fillcol) } else { geom_bar(stat = "identity", position = "dodge", alpha = alpha) } } if (!custom && (fill == "none" || fill == i)) { plot_list[[itt]] <- plot_list[[itt]] + theme(legend.position = "none") } if ("log_y" %in% axes) plot_list[[itt]] <- plot_list[[itt]] + ylab(paste("log", j)) if (dc[i] %in% c("factor", "integer", "date") && nrow(tmp) < nrow(dataset)) { if (exists("levs")) { if (j %in% names(levs) && !is.na(levs[j])) { plot_list[[itt]]$labels$y %<>% paste0(., " (", fun, " {", levs[j], "})") } else { plot_list[[itt]]$labels$y %<>% paste0(., " (", fun, ")") } } else { plot_list[[itt]]$labels$y %<>% paste0(., " (", fun, ")") } } itt <- itt + 1 } } } else if (type == "box") { itt <- 1 for (i in xvar) { if (!"factor" %in% dc[i]) dataset[[i]] %<>% as_factor for (j in yvar) { if (color == "none") { plot_list[[itt]] <- ggplot(dataset, aes(x = .data[[i]], y = .data[[j]])) + geom_boxplot(alpha = alpha, fill = fillcol, outlier.color = pointcol, color = linecol) } else { plot_list[[itt]] <- ggplot(dataset, aes(x = .data[[i]], y = .data[[j]], fill = .data[[color]])) + geom_boxplot(alpha = alpha) } if (!custom && (color == "none" || color == i)) { plot_list[[itt]] <- plot_list[[itt]] + theme(legend.position = "none") } if ("log_y" %in% axes) plot_list[[itt]] <- plot_list[[itt]] + ylab(paste("log", j)) itt <- itt + 1 } } } else if (type == "box-single") { itt <- 1 for (i in xvar) { if (color == "none") { plot_list[[itt]] <- dataset %>% ggplot(aes(x = "", y = .data[[i]])) + geom_boxplot(alpha = alpha, fill = fillcol, outlier.color = pointcol, color = linecol) + scale_x_discrete(labels = NULL, breaks = NULL) + labs(x = "") } else { plot_list[[itt]] <- dataset %>% ggplot(aes(x = "", y = .data[[i]], fill = color)) + geom_boxplot(alpha = alpha) } if (!custom && (color == "none" || color == i)) { plot_list[[itt]] <- plot_list[[itt]] + theme(legend.position = "none") } if ("log_y" %in% axes) plot_list[[itt]] <- plot_list[[itt]] + ylab(paste("log", i)) itt <- itt + 1 } } if (facet_row != "." || facet_col != ".") { facets <- if (facet_row == ".") { paste("~", facet_col) } else { paste(facet_row, "~", facet_col) } scl <- if ("scale_y" %in% axes) "free_y" else "fixed" facet_fun <- if (facet_row == ".") facet_wrap else facet_grid for (i in 1:length(plot_list)) { plot_list[[i]] <- plot_list[[i]] + facet_fun(as.formula(facets), scales = scl) } } if (color != "none") { for (i in 1:length(plot_list)) { plot_list[[i]] <- plot_list[[i]] + aes(color = .data[[color]]) } } if (size != "none") { for (i in 1:length(plot_list)) { plot_list[[i]] <- plot_list[[i]] + aes(size = .data[[size]]) } } if (fill != "none") { for (i in 1:length(plot_list)) { plot_list[[i]] <- plot_list[[i]] + aes(fill = .data[[fill]]) } } if ((length(xlim) == 2 && is.numeric(xlim)) && (length(ylim) == 2 && is.numeric(ylim))) { for (i in 1:length(plot_list)) { plot_list[[i]] <- plot_list[[i]] + coord_cartesian(xlim = xlim, ylim = ylim) } } else if (length(xlim) == 2 && is.numeric(xlim)) { for (i in 1:length(plot_list)) { plot_list[[i]] <- plot_list[[i]] + coord_cartesian(xlim = xlim) } } else if (length(ylim) == 2 && is.numeric(ylim)) { for (i in 1:length(plot_list)) { plot_list[[i]] <- plot_list[[i]] + coord_cartesian(ylim = ylim) } } if ("jitter" %in% check) { for (i in 1:length(plot_list)) { plot_list[[i]] <- plot_list[[i]] + geom_jitter(alpha = alpha, position = position_jitter(width = 0.4, height = 0.0)) } } if ("line" %in% check) { for (i in 1:length(plot_list)) { plot_list[[i]] <- plot_list[[i]] + sshhr(geom_smooth(method = "lm", alpha = 0.2, linewidth = .75, linetype = "dashed")) } } if ("loess" %in% check) { for (i in 1:length(plot_list)) { plot_list[[i]] <- plot_list[[i]] + sshhr(geom_smooth(span = smooth, method = "loess", alpha = 0.2, linewidth = .75, linetype = "dotdash")) } } if ("flip" %in% axes) { ## reverse legend ordering if available for (i in 1:length(plot_list)) { plot_list[[i]] <- plot_list[[i]] + coord_flip() + guides(fill = guide_legend(reverse = TRUE)) + guides(color = guide_legend(reverse = TRUE)) } } if (length(labs) > 0) { if (is.list(labs[[1]])) { for (i in 1:length(labs)) { plot_list[[i]] <- plot_list[[i]] + do.call(ggplot2::labs, labs[[i]]) } } else { plot_list[[1]] <- plot_list[[1]] + do.call(ggplot2::labs, labs) } } ## setting theme for (i in 1:length(plot_list)) { plot_list[[i]] <- plot_list[[i]] + get(theme)(base_size = ifelse(is.na(base_size), 11, base_size), base_family = base_family) } if (custom) { if (length(plot_list) == 1) plot_list[[1]] else plot_list } else { patchwork::wrap_plots(plot_list, ncol = min(length(plot_list), 2)) %>% (function(x) if (isTRUE(shiny)) x else print(x)) } } qscatter <- function(dataset, xvar, yvar, lev = "", fun = "mean", bins = 20) { if (is.character(dataset[[yvar]])) { dataset <- mutate_at(dataset, .vars = yvar, .funs = as.factor) } if (is.factor(dataset[[yvar]])) { if (is.empty(lev)) lev <- levels(pull(dataset, !!yvar))[1] dataset <- mutate_at(dataset, .vars = yvar, .funs = function(y) as.integer(y == lev)) lev <- paste0(" {", lev, "}") } else { lev <- "" } mutate_at(dataset, .vars = xvar, .funs = list(bins = ~ radiant.data::xtile(., bins))) %>% group_by(bins) %>% summarize_at(.vars = c(xvar, yvar), .funs = fun) %>% ggplot(aes(x = .data[[xvar]], y = .data[[yvar]])) + geom_point() + labs(y = paste0(yvar, " (", fun, lev, ")")) }