#' Compare a sample proportion to a population proportion #' #' @details See \url{https://radiant-rstats.github.io/docs/basics/single_prop.html} for an example in Radiant #' #' @param dataset Dataset #' @param var The variable selected for the proportion comparison #' @param lev The factor level selected for the proportion comparison #' @param comp_value Population value to compare to the sample proportion #' @param alternative The alternative hypothesis ("two.sided", "greater", or "less") #' @param conf_lev Span of the confidence interval #' @param test bionomial exact test ("binom") or Z-test ("z") #' @param data_filter Expression entered in, e.g., Data > View to filter the dataset in Radiant. The expression should be a string (e.g., "price > 10000") #' @param envir Environment to extract data from #' #' @return A list of variables used in single_prop as an object of class single_prop #' #' @examples #' single_prop(titanic, "survived") %>% str() #' single_prop(titanic, "survived", lev = "Yes", comp_value = 0.5, alternative = "less") %>% str() #' #' @seealso \code{\link{summary.single_prop}} to summarize the results #' @seealso \code{\link{plot.single_prop}} to plot the results #' #' @export single_prop <- function(dataset, var, lev = "", comp_value = 0.5, alternative = "two.sided", conf_lev = .95, test = "binom", data_filter = "", envir = parent.frame()) { df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset)) dataset <- get_data(dataset, var, filt = data_filter, na.rm = FALSE, envir = envir) %>% mutate_all(as.factor) ## removing any missing values n_miss <- n_missing(dataset) dataset <- na.omit(dataset) levs <- levels(dataset[[var]]) if (lev != "") { if (lev %in% levs && levs[1] != lev) { dataset[[var]] %<>% as.character %>% as.factor() %>% relevel(lev) levs <- levels(dataset[[var]]) } } else { lev <- levs[1] } n <- nrow(dataset) ns <- sum(dataset == lev) p <- ns / n dat_summary <- data.frame( diff = p - comp_value, p = p, ns = ns, n = n, n_missing = n_miss, stringsAsFactors = FALSE ) %>% mutate( sd = sqrt(p * (1 - p)), se = sqrt(p * (1 - p) / n), me = se * qnorm(conf_lev / 2 + .5, lower.tail = TRUE) ) if (test == "z") { ## use z-test res <- sshhr(prop.test( ns, n, p = comp_value, alternative = alternative, conf.level = conf_lev, correct = FALSE )) res <- tidy(res) ## convert chi-square stat to a z-score res$statistic <- sqrt(res$statistic) * ifelse(res$estimate < comp_value, -1, 1) } else { ## use binom.test for exact res <- binom.test( ns, n, p = comp_value, alternative = alternative, conf.level = conf_lev ) res <- tidy(res) } # removing unneeded arguments rm(envir) as.list(environment()) %>% add_class("single_prop") } #' Summary method for the single_prop function #' #' @details See \url{https://radiant-rstats.github.io/docs/basics/single_prop.html} for an example in Radiant #' #' @param object Return value from \code{\link{single_prop}} #' @param dec Number of decimals to show #' @param ... further arguments passed to or from other methods #' #' @examples #' result <- single_prop(titanic, "survived", lev = "Yes", comp_value = 0.5, alternative = "less") #' summary(result) #' #' @seealso \code{\link{single_prop}} to generate the results #' @seealso \code{\link{plot.single_prop}} to plot the results #' #' @export summary.single_prop <- function(object, dec = 3, ...) { if (object$test == "z") { cat("Single proportion test (z-test)\n") } else { cat("Single proportion test (binomial exact)\n") } cat("Data :", object$df_name, "\n") if (!is.empty(object$data_filter)) { cat("Filter :", gsub("\\n", "", object$data_filter), "\n") } cat("Variable :", object$var, "\n") cat("Level :", object$lev, "in", object$var, "\n") cat("Confidence:", object$conf_lev, "\n") hyp_symbol <- c( "two.sided" = "not equal to", "less" = "<", "greater" = ">" )[object$alternative] cat("Null hyp. : the proportion of", object$lev, "in", object$var, "=", object$comp_value, "\n") cat("Alt. hyp. : the proportion of", object$lev, "in", object$var, hyp_symbol, object$comp_value, "\n\n") ## determine lower and upper % for ci ci_perc <- ci_label(object$alternative, object$conf_lev) ## print summary statistics object$dat_summary[-1] %>% as.data.frame(stringsAsFactors = FALSE) %>% format_df(dec = dec, mark = ",") %>% print(row.names = FALSE) cat("\n") res <- object$res res <- bind_cols(data.frame(diff = object$dat_summary[["diff"]]), res[, -1]) %>% select(base::setdiff(colnames(.), c("parameter", "method", "alternative"))) if (object$test == "z") { names(res) <- c("diff", "z.value", "p.value", ci_perc[1], ci_perc[2]) res <- format_df(res, dec = dec, mark = ",") # restrict the number of decimals } else { names(res) <- c("diff", "ns", "p.value", ci_perc[1], ci_perc[2]) res <- format_df(mutate(res, ns = as.integer(res$ns)), dec = dec, mark = ",") # restrict the number of decimals } res$` ` <- sig_stars(res$p.value) if (res$p.value < .001) res$p.value <- "< .001" ## print statistics print(as.data.frame(res, stringsAsFactors = FALSE), row.names = FALSE) cat("\nSignif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1\n") } #' Plot method for the single_prop function #' #' @details See \url{https://radiant-rstats.github.io/docs/basics/single_prop.html} for an example in Radiant #' #' @param x Return value from \code{\link{single_prop}} #' @param plots Plots to generate. "bar" shows a bar chart of the data. The "simulate" chart shows the location of the sample proportion and the comparison value (comp_value). Simulation is used to demonstrate the sampling variability in the data under the null-hypothesis #' @param shiny Did the function call originate inside a shiny app #' @param custom Logical (TRUE, FALSE) to indicate if ggplot object (or list of ggplot objects) should be returned. This option can be used to customize plots (e.g., add a title, change x and y labels, etc.). See examples and \url{https://ggplot2.tidyverse.org/} for options. #' @param ... further arguments passed to or from other methods #' #' @examples #' result <- single_prop(titanic, "survived", lev = "Yes", comp_value = 0.5, alternative = "less") #' plot(result, plots = c("bar", "simulate")) #' #' @seealso \code{\link{single_prop}} to generate the result #' @seealso \code{\link{summary.single_prop}} to summarize the results #' #' @importFrom rlang .data #' #' @export plot.single_prop <- function(x, plots = "bar", shiny = FALSE, custom = FALSE, ...) { if (any(!plots %in% c("bar", "simulate"))) { stop("Available plot types for 'single_prop' are \"bar\" and \"simulate\"") } lev_name <- x$levs[1] plot_list <- list() if ("bar" %in% plots) { plot_list[[which("bar" == plots)]] <- ggplot(x$dataset, aes(x = .data[[x$var]], fill = .data[[x$var]])) + geom_bar(aes(y = after_stat(count) / sum(after_stat(count))), alpha = 0.5) + scale_y_continuous(labels = scales::percent) + theme(legend.position = "none") + labs( title = paste0("Single proportion: ", lev_name, " in ", x$var), y = "" ) } if ("simulate" %in% plots) { simdat <- rbinom(1000, prob = x$comp_value, x$n) %>% divide_by(x$n) %>% data.frame(stringsAsFactors = FALSE) %>% set_colnames(lev_name) cip <- ci_perc(simdat[[lev_name]], x$alternative, x$conf_lev) %>% set_names(NULL) bw <- simdat %>% range() %>% diff() %>% divide_by(20) # to avoid problems with levels that start with numbers or contain spaces # http://stackoverflow.com/questions/13445435/ggplot2-aes-string-fails-to-handle-names-starting-with-numbers-or-containing-s names(simdat) <- "col1" plot_list[[which("simulate" == plots)]] <- ggplot(simdat, aes(x = col1)) + geom_histogram(fill = "blue", binwidth = bw, alpha = 0.5) + geom_vline( xintercept = x$comp_value, color = "red", linetype = "solid", linewidth = 1 ) + geom_vline( xintercept = x$res$estimate, color = "black", linetype = "solid", linewidth = 1 ) + geom_vline(xintercept = cip, color = "red", linetype = "longdash", linewidth = .5) + labs( title = paste0("Simulated proportions if null hyp. is true (", lev_name, " in ", x$var, ")"), x = paste0("Level ", lev_name, " in variable ", x$var) ) } if (length(plot_list) > 0) { if (custom) { if (length(plot_list) == 1) plot_list[[1]] else plot_list } else { patchwork::wrap_plots(plot_list, ncol = 1) %>% (function(x) if (shiny) x else print(x)) } } }