############################################ ## Normality test ############################################ # Batch normality tests for radiant.basics #' @export normality_test <- function(dataset, var, method = c("shapiro", "ks", "ad"), data_filter = "", envir = parent.frame()) { ## 1. 定义支持的检验方法 supported_methods <- c("shapiro", "ks", "ad") ## 2. 处理多选方法:过滤无效值+设置默认 method <- intersect(method, supported_methods) if (length(method) == 0) method <- "shapiro" method <- match.arg(method, choices = supported_methods, several.ok = TRUE) ## 3. 取数据 df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset)) dataset <- get_data(dataset, var, filt = data_filter, na.rm = TRUE, envir = envir) x <- dataset[[var]] if (!is.numeric(x)) stop(i18n$t("Variable must be numeric")) x <- x[!is.na(x)] # 剔除缺失 ## 4. 初始化结果表格 res <- tibble::tibble( Test = character(), Statistic = numeric(), p.value = numeric() ) ## 5. 逐方法计算 if ("shapiro" %in% method) { tmp <- tryCatch(stats::shapiro.test(x), error = function(e) { stop("Shapiro-Wilk 需要 3 ≤ n ≤ 5000,当前 n = ", length(x), "\n请换 KS 或 AD 方法。") }) res <- tibble::add_row(res, Test = "Shapiro-Wilk", Statistic = tmp$statistic, p.value = tmp$p.value) } if ("ks" %in% method) { if (requireNamespace("nortest", quietly = TRUE)) { tmp <- nortest::lillie.test(x) res <- tibble::add_row(res, Test = "Lilliefors-KS", Statistic = tmp$statistic, p.value = tmp$p.value) } } if ("ad" %in% method) { if (requireNamespace("nortest", quietly = TRUE)) { tmp <- nortest::ad.test(x) res <- tibble::add_row(res, Test = "Anderson-Darling", Statistic = tmp$statistic, p.value = tmp$p.value) } } ## 6. 样本描述 dat_summary <- tibble::tibble( mean = mean(x), n = length(x), n_missing = sum(is.na(dataset[[var]])), sd = sd(x), se = sd(x) / sqrt(length(x)) ) ## 7. 绘图对象 plot_obj <- list( qq = list(type = "qq", data = x), hist = list(type = "hist", data = x), pp = list(type = "pp", data = x), density = list(type = "density", data = x) ) ## 8. 打包返回 out <- list( df_name = df_name, var = var, method = method, data_filter = data_filter, res = res, dat_summary = dat_summary, x = x, plot_obj = plot_obj ) class(out) <- "normality_test" out } # Summary method #' @export summary.normality_test <- function(object, dec = 3, ...) { cat("Normality tests\n") cat("Data :", object$df_name, "\n") if (!is.empty(object$data_filter)) { cat("Filter :", gsub("\\n", "", object$data_filter), "\n") } cat("Variable :", object$var, "\n\n") ## 打印统计量表 object$res %>% mutate(p.value = format.pval(p.value, digits = 3, eps = 1e-4)) %>% as.data.frame(stringsAsFactors = FALSE) %>% format_df(dec = dec) %>% print(row.names = FALSE) cat("\n") } # Plot method #' @export plot.normality_test <- function(x, plots = c("qq", "hist"), shiny = FALSE, custom = FALSE, ...) { plot_list <- list() if ("qq" %in% plots) { plot_list[[which("qq" == plots)]] <- ggplot(data.frame(y = x$x), aes(sample = y)) + stat_qq() + stat_qq_line() } if ("hist" %in% plots) { plot_list[[which("hist" == plots)]] <- ggplot(data.frame(y = x$x), aes(y)) + geom_histogram(fill = "blue", bins = 30) } if ("pp" %in% plots) { n <- length(x$x) i <- 1:n p <- (i - 0.5) / n theoretical <- qnorm(p) empirical <- sort(scale(x$x)) plot_list[[which("pp" == plots)]] <- ggplot(data.frame(theoretical = theoretical, empirical = empirical), aes(theoretical, empirical)) + geom_point(colour = "blue") + geom_abline(intercept = 0, slope = 1, linetype = "dashed", colour = "red") + labs(x = "Theoretical quantiles", y = "Empirical quantiles", title = "P-P plot") + theme_minimal() } if ("density" %in% plots) { plot_list[[which("density" == plots)]] <- ggplot(data.frame(y = x$x), aes(y)) + geom_density(fill = "blue", alpha = 0.5) } if (length(plot_list) == 0) return(invisible()) patchwork::wrap_plots(plot_list, ncol = 1) %>% { if (shiny) print(.) else print(.) } invisible(x) }