############################################ ## Multigroup Difference Analysis (ANOVA/Kruskal-Wallis) ############################################ #' @export mda <- function(dataset, var, group, normality_type = c("overall", "by_group"), data_filter = "", envir = parent.frame()) { # 1. 基础参数处理 normality_type <- match.arg(normality_type, choices = c("overall", "by_group")) df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset)) # 2. 数据提取:只保留“因变量+分组变量” dataset <- get_data( dataset, vars = c(var, group), # 强制只取2个核心变量,剔除冗余列 filt = data_filter, na.rm = FALSE, # 先不删缺失值,后续统一过滤 envir = envir ) # 3. 数据校验 if (!var %in% colnames(dataset)) { stop(paste("因变量", var, "未在数据集中找到!"), call. = FALSE) } if (!group %in% colnames(dataset)) { stop(paste("分组变量", group, "未在数据集中找到!"), call. = FALSE) } if (!is.numeric(dataset[[var]])) { stop(paste("因变量", var, "必须是数值型(当前类型:", class(dataset[[var]]), ")!"), call. = FALSE) } # 4. 有效样本过滤:剔除任一变量缺失的样本 valid_indices <- !is.na(dataset[[var]]) & !is.na(dataset[[group]]) valid_data <- dataset[valid_indices, ] # 仅保留有效样本的2列数据 if (nrow(valid_data) == 0) { stop("无有效样本(所有样本的因变量/分组变量存在缺失值)!", call. = FALSE) } # 5. 分组变量处理:强制转因子+校验水平 valid_data[[group]] <- as.factor(valid_data[[group]]) # 强制转因子,避免字符型干扰 valid_levels <- length(levels(valid_data[[group]])) # 用levels()确保因子水平正确 if (valid_levels < 2) { stop(paste("分组变量有效水平不足2个(当前水平数:", valid_levels, "),无法执行检验!"), call. = FALSE) } # 6. 检验计算:调用辅助函数 homo_res <- run_homo_test(valid_data, var, group) # 方差齐性检验 norm_res <- run_norm_test(valid_data, var, group, normality_type) # 正态性检验 # 7. 绘图数据准备 plot_obj <- list( norm = list( data = valid_data[[var]], group_data = if (normality_type == "by_group") { # 把命名向量转为无命名列表,避免asJSON警告 lapply(split(valid_data[[var]], valid_data[[group]]), function(x) x) } else NULL, var = var, group = group, type = normality_type ), homo = list( data = valid_data, var = var, group = group ) ) # 8. 结果打包:对齐单独检验的输出结构 out <- structure( list( df_name = df_name, var = var, group = group, normality_type = normality_type, data_filter = if (data_filter == "") "None" else data_filter, valid_n = nrow(valid_data), # 有效样本量 homo_res = homo_res, # 方差齐性检验结果 norm_res = norm_res, # 正态性检验结果 plot_obj = plot_obj ), class = "mda" ) out } # ------------------------------ # 辅助函数1:方差齐性检验 # ------------------------------ run_homo_test <- function(valid_data, var, group) { x <- valid_data[[var]] g <- valid_data[[group]] res <- tibble::tibble(Test = character(), Statistic = numeric(), p.value = numeric()) # 1. Levene检验 if (requireNamespace("car", quietly = TRUE)) { tmp <- tryCatch( expr = car::leveneTest(x ~ g), error = function(e) { message(paste("Levene检验执行失败:", e$message)) return(NULL) } ) if (!is.null(tmp) && nrow(tmp) > 0) { res <- tibble::add_row(res, Test = "Levene", Statistic = as.numeric(tmp[["F value"]][1]), p.value = as.numeric(tmp[["Pr(>F)"]][1])) } } else { res <- tibble::add_row(res, Test = "Levene", Statistic = NA_real_, p.value = NA_real_) message("提示:需安装car包以运行Levene检验") } # 2. Bartlett检验 tmp <- tryCatch( expr = stats::bartlett.test(x, g), error = function(e) { message(paste("Bartlett检验执行失败:", e$message)) return(NULL) } ) if (!is.null(tmp)) { res <- tibble::add_row(res, Test = "Bartlett", Statistic = as.numeric(tmp$statistic), p.value = as.numeric(tmp$p.value)) } else { res <- tibble::add_row(res, Test = "Bartlett", Statistic = NA_real_, p.value = NA_real_) } # 3. Fligner检验 tmp <- tryCatch( expr = stats::fligner.test(x, g), error = function(e) { message(paste("Fligner检验执行失败:", e$message)) return(NULL) } ) if (!is.null(tmp)) { res <- tibble::add_row(res, Test = "Fligner", Statistic = as.numeric(tmp$statistic), p.value = as.numeric(tmp$p.value)) } else { res <- tibble::add_row(res, Test = "Fligner", Statistic = NA_real_, p.value = NA_real_) } res } # ------------------------------ # 辅助函数2:正态性检验 # ------------------------------ run_norm_test <- function(valid_data, var, group, normality_type) { x <- valid_data[[var]] g <- valid_data[[group]] res <- tibble::tibble(Group = character(), Test = character(), Statistic = numeric(), p.value = numeric()) # 1. 整体正态性检验 if (normality_type == "overall") { res <- dplyr::bind_rows(res, get_single_norm(x, group_label = "Overall")) } # 2. 按分组正态性检验 if (normality_type == "by_group") { for (level in levels(g)) { group_x <- x[g == level] res <- dplyr::bind_rows(res, get_single_norm(group_x, group_label = level)) } } res } # ------------------------------ # 辅助函数3:单组正态性检验 # ------------------------------ get_single_norm <- function(x, group_label) { res <- tibble::tibble(Group = group_label, Test = character(), Statistic = numeric(), p.value = numeric()) n <- length(x) # 1. Shapiro-Wilk检验 if (n >= 3 && n <= 5000) { tmp <- tryCatch( expr = stats::shapiro.test(x), error = function(e) { message(paste("Shapiro-Wilk检验(", group_label, ")失败:", e$message, sep = "")) return(NULL) } ) if (!is.null(tmp)) { res <- tibble::add_row(res, Group = group_label, Test = "Shapiro-Wilk", Statistic = tmp$statistic, p.value = tmp$p.value) } } else { res <- tibble::add_row(res, Group = group_label, Test = "Shapiro-Wilk", Statistic = NA_real_, p.value = NA_real_) message(paste("Shapiro-Wilk检验(", group_label, ")跳过:样本量需3-5000(当前n=", n, ")", sep = "")) } # 2. Lilliefors-KS检验 if (requireNamespace("nortest", quietly = TRUE)) { tmp <- tryCatch( expr = nortest::lillie.test(x), error = function(e) { message(paste("Lilliefors-KS检验(", group_label, ")失败:", e$message, sep = "")) return(NULL) } ) if (!is.null(tmp)) { res <- tibble::add_row(res, Group = group_label, Test = "Lilliefors-KS", Statistic = tmp$statistic, p.value = tmp$p.value) } else { res <- tibble::add_row(res, Group = group_label, Test = "Lilliefors-KS", Statistic = NA_real_, p.value = NA_real_) } } else { res <- tibble::add_row(res, Group = group_label, Test = "Lilliefors-KS", Statistic = NA_real_, p.value = NA_real_) message("提示:需安装nortest包以运行KS/AD检验") } # 3. Anderson-Darling检验 if (requireNamespace("nortest", quietly = TRUE)) { tmp <- tryCatch( expr = nortest::ad.test(x), error = function(e) { message(paste("Anderson-Darling检验(", group_label, ")失败:", e$message, sep = "")) return(NULL) } ) if (!is.null(tmp)) { res <- tibble::add_row(res, Group = group_label, Test = "Anderson-Darling", Statistic = tmp$statistic, p.value = tmp$p.value) } else { res <- tibble::add_row(res, Group = group_label, Test = "Anderson-Darling", Statistic = NA_real_, p.value = NA_real_) } } else { res <- tibble::add_row(res, Group = group_label, Test = "Anderson-Darling", Statistic = NA_real_, p.value = NA_real_) } res } # ------------------------------ # Summary方法 # ------------------------------ #' @export summary.mda <- function(object, dec = 3, ...) { # 1. 基础信息 cat("Multigroup Difference Analysis (ANOVA/KW)\n") cat("Data :", object$df_name, "\n") cat("Dependent var:", object$var, "(numeric)\n") cat("Group var :", object$group, "(factor,", length(levels(object$plot_obj$homo$data[[object$group]])), "levels)\n") cat("Normality test:", object$normality_type, "\n") cat("Valid samples:", object$valid_n, "\n\n") # 2. 正态性检验结果 cat("=== 1. Normality Test Results ===\n") if (nrow(object$norm_res) == 0) { cat(" No valid normality test results.\n\n") } else { norm_formatted <- object$norm_res %>% dplyr::mutate( Statistic = as.character(round(Statistic, dec)), # 转为字符型,统一类型 p.value = dplyr::case_when( is.na(p.value) ~ "", p.value < 0.001 ~ "<0.001", p.value < 0.01 ~ as.character(round(p.value, 3)), # 数值转字符 TRUE ~ as.character(round(p.value, 4)) # 数值转字符 ) ) %>% as.data.frame(stringsAsFactors = FALSE) print(norm_formatted, row.names = FALSE, right = FALSE) cat("\n") } # 3. 方差齐性检验结果 cat("=== 2. Homogeneity of Variance Results ===\n") if (nrow(object$homo_res) == 0) { cat(" No valid homogeneity test results.\n\n") } else { homo_formatted <- object$homo_res %>% dplyr::mutate( Statistic = as.character(round(Statistic, dec)), # 转为字符型,统一类型 p.value = dplyr::case_when( is.na(p.value) ~ "", p.value < 0.001 ~ "<0.001", p.value < 0.01 ~ as.character(round(p.value, 3)), # 数值转字符 TRUE ~ as.character(round(p.value, 4)) # 数值转字符 ) ) %>% as.data.frame(stringsAsFactors = FALSE) print(homo_formatted, row.names = FALSE, right = FALSE) cat("\n") } # 4. 结论提示 cat("=== 3. Interpretation Tips ===\n") cat("• 正态性:p ≥ 0.05 → 满足正态性假设\n") cat("• 方差齐性:p ≥ 0.05 → 满足方差齐性假设\n") cat("• 若同时满足这两个假设 → 使用方差分析(ANOVA)\n") cat("• 若任一假设不满足 → 使用Kruskal-Wallis检验\n") invisible(object) } # ------------------------------ # Plot方法 # ------------------------------ #' @export plot.mda <- function(x, plots = c("norm_qq", "norm_hist", "homo_box"), shiny = FALSE, custom = FALSE, ...) { # 1. 基础校验 if (length(plots) == 0) { return(ggplot2::ggplot() + ggplot2::annotate("text", x = 1, y = 1, label = i18n$t("No plots selected")) + ggplot2::theme_void()) } plot_list <- list() var_name <- x$var group_name <- x$group # 2. 正态性检验图表 # 2.1 Q-Q图 if ("norm_qq" %in% plots) { if (x$normality_type == "overall") { p <- ggplot2::ggplot(data.frame(y = x$plot_obj$norm$data), ggplot2::aes(sample = y)) + ggplot2::stat_qq(color = "#2E86AB", size = 1) + ggplot2::stat_qq_line(color = "#A23B72", linetype = "dashed") + ggplot2::labs(x = "Theoretical Quantiles", y = paste("Empirical Quantiles (", var_name, ")", sep = ""), title = "Normality: Q-Q Plot (Overall)") + ggplot2::theme_minimal() + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 12)) plot_list[["norm_qq"]] <- p } else { # 按分组画QQ图 group_data <- x$plot_obj$norm$group_data for (level in names(group_data)) { p <- ggplot2::ggplot(data.frame(y = group_data[[level]]), ggplot2::aes(sample = y)) + ggplot2::stat_qq(color = "#2E86AB", size = 1) + ggplot2::stat_qq_line(color = "#A23B72", linetype = "dashed") + ggplot2::labs(x = "Theoretical Quantiles", y = paste("Empirical Quantiles (", var_name, ")", sep = ""), title = paste("Normality: Q-Q Plot (", level, ")", sep = "")) + ggplot2::theme_minimal() + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 12)) plot_list[[paste("norm_qq_", level, sep = "")]] <- p } } } # 2.2 直方图 if ("norm_hist" %in% plots) { if (x$normality_type == "overall") { p <- ggplot2::ggplot(data.frame(y = x$plot_obj$norm$data), ggplot2::aes(x = y)) + ggplot2::geom_histogram(fill = "#F18F01", alpha = 0.7, bins = 30) + ggplot2::labs(x = var_name, y = "Count", title = "Normality: Histogram (Overall)") + ggplot2::theme_minimal() + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 12)) plot_list[["norm_hist"]] <- p } else { # 按分组画直方图 group_data <- x$plot_obj$norm$group_data for (level in names(group_data)) { p <- ggplot2::ggplot(data.frame(y = group_data[[level]]), ggplot2::aes(x = y)) + ggplot2::geom_histogram(fill = "#F18F01", alpha = 0.7, bins = 30) + ggplot2::labs(x = var_name, y = "Count", title = paste("Normality: Histogram (", level, ")", sep = "")) + ggplot2::theme_minimal() + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 12)) plot_list[[paste("norm_hist_", level, sep = "")]] <- p } } } # 3. 方差齐性检验图表 if ("homo_box" %in% plots) { p <- ggplot2::ggplot(x$plot_obj$homo$data, ggplot2::aes(x = .data[[group_name]], y = .data[[var_name]], fill = .data[[group_name]])) + ggplot2::geom_boxplot(alpha = 0.7, show.legend = FALSE) + ggplot2::scale_fill_brewer(palette = "Set2") + ggplot2::labs(x = group_name, y = var_name, title = "Homogeneity: Boxplot by Group") + ggplot2::theme_minimal() + ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 12)) plot_list[["homo_box"]] <- p } # 4. 组合图表 combined_plot <- patchwork::wrap_plots(plot_list, ncol = 1, guides = "collect") # 5. 输出 if (shiny) { print(combined_plot) return(invisible(combined_plot)) } else { return(combined_plot) } }