update

4688178c · wuzekai · be466d32 · 4688178c · 4688178c · 4688178c
Commit 4688178c authored Dec 24, 2025 by wuzekai
Showing with 599 additions and 1 deletion

DESCRIPTION radiant.basics/DESCRIPTION +2 -1

NAMESPACE radiant.basics/NAMESPACE +3 -0

missing.R radiant.basics/R/missing.R +353 -0

missing_ui.R radiant.basics/inst/app/tools/analysis/missing_ui.R +241 -0

No files found.
--- a/radiant.basics/DESCRIPTION
+++ b/radiant.basics/DESCRIPTION
@@ -26,7 +26,8 @@ Imports:
    shiny.i18n,
    rlang (>= 1.0.6),
    ggpp,
-    nortest
+    nortest,
+    naniar
 Suggests:
    testthat (>= 2.0.0),
    pkgdown (>= 1.1.0),

--- a/radiant.basics/NAMESPACE
+++ b/radiant.basics/NAMESPACE
@@ -8,6 +8,7 @@ S3method(plot,cross_tabs)
 S3method(plot,goodness)
 S3method(plot,homo_variance_test)
 S3method(plot,mda)
+S3method(plot,missing)
 S3method(plot,normality_test)
 S3method(plot,outlier)
 S3method(plot,prob_binom)
@@ -30,6 +31,7 @@ S3method(summary,cross_tabs)
 S3method(summary,goodness)
 S3method(summary,homo_variance_test)
 S3method(summary,mda)
+S3method(summary,missing)
 S3method(summary,normality_test)
 S3method(summary,outlier)
 S3method(summary,prob_binom)
@@ -54,6 +56,7 @@ export(get_single_norm)
 export(goodness)
 export(homo_variance_test)
 export(mda)
+export(missing)
 export(normality_test)
 export(outlier)
 export(prob_binom)

--- a/radiant.basics/R/missing.R
+++ b/radiant.basics/R/missing.R
+############################################
+##  Missing Value Analysis
+############################################
+#' @export
+missing <- function(dataset,
+                    vars = NULL,
+                    data_filter = "",
+                    envir = parent.frame()) {
+  # 1. 基础参数处理
+  df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset))
+  # 2. 数据提取
+  # 2.1 获取完整数据集（不限制vars，保留所有字段）
+  full_dataset <- get_data(
+    dataset,
+    filt = data_filter,  # 仅过滤行，不过滤列
+    na.rm = FALSE,       # 保留缺失值
+    envir = envir
+  )
+  # 2.2 从完整数据集中筛选出用户选中的变量
+  if (!is.null(vars) && length(vars) > 0) {
+    # 校验选中变量是否存在于完整数据集
+    invalid_vars <- setdiff(vars, colnames(full_dataset))
+    if (length(invalid_vars) > 0) {
+      stop(paste(i18n$t("Variables not found in dataset:"), paste(invalid_vars, collapse = ", ")), call. = FALSE)
+    }
+    dataset <- full_dataset[, vars, drop = FALSE]  # 选中变量的子集
+  } else {
+    # 未选变量时，默认分析所有变量
+    dataset <- full_dataset
+    vars <- colnames(dataset)
+  }
+  # 3. 数据校验（仅校验完整数据集非空）
+  if (nrow(full_dataset) == 0) stop(i18n$t("Dataset is empty."), call. = FALSE)
+  # 4. 统计计算
+  # 4.1 整体缺失统计
+  overall_stats <- tibble::tibble(
+    total_samples = nrow(dataset),
+    complete_samples = sum(stats::complete.cases(dataset)),  # 无任何缺失的样本数
+    missing_samples = nrow(dataset) - sum(stats::complete.cases(dataset)),  # 含缺失的样本数
+    missing_sample_pct = round((missing_samples / total_samples) * 100, 2)
+  )
+  # 4.2 变量级缺失统计
+  var_stats <- purrr::map_dfr(vars, function(var) {
+    var_data <- dataset[[var]]
+    n_missing <- sum(is.na(var_data))
+    tibble::tibble(
+      variable = var,
+      var_type = class(var_data)[1],  # 变量类型
+      total_samples = length(var_data),
+      missing_count = n_missing,
+      missing_pct = round((n_missing / length(var_data)) * 100, 2),
+      valid_count = length(var_data) - n_missing,
+      valid_pct = round(((length(var_data) - n_missing) / length(var_data)) * 100, 2)
+    )
+  })
+  # 4.3 单个变量缺失样本的填充率详情
+  missing_sample_fill_rate <- purrr::map(vars, function(target_var) {
+    # 1. 找到目标变量（选中的）在完整数据集中的缺失行索引
+    missing_row_idx <- which(is.na(full_dataset[[target_var]]))
+    if (length(missing_row_idx) == 0) {
+      return(list(
+        target_variable = target_var,
+        missing_sample_count = 0,
+        fill_rate_details = tibble::tibble(variable = character(0), fill_count = integer(0), fill_pct = numeric(0))
+      ))
+    }
+    # 2. 提取缺失样本的完整数据集
+    missing_samples_data <- full_dataset[missing_row_idx, , drop = FALSE]
+    # 3. 遍历完整数据集的所有字段计算填充率
+    fill_rate <- purrr::map_dfr(colnames(full_dataset), function(var) {
+      n_valid <- sum(!is.na(missing_samples_data[[var]]))
+      tibble::tibble(
+        variable = var,          
+        fill_count = n_valid,    
+        fill_pct = round((n_valid / nrow(missing_samples_data)) * 100, 2)
+      )
+    })
+    list(
+      target_variable = target_var,
+      missing_sample_count = length(missing_row_idx),
+      fill_rate_details = fill_rate
+    )
+  })
+  names(missing_sample_fill_rate) <- vars
+  # 5. 结果打包
+  out <- structure(
+    list(
+      df_name = df_name,
+      vars = vars,
+      data_filter = if (data_filter == "") "None" else data_filter,
+      overall_stats = overall_stats,
+      var_stats = var_stats,
+      missing_sample_fill_rate = missing_sample_fill_rate,
+      raw_data = dataset
+    ),
+    class = "missing"
+  )
+  out
+}
+# ------------------------------
+# Summary方法：展示缺失值统计结果
+# ------------------------------
+#' @export
+summary.missing <- function(object, dec = 2, ...) {
+  # 1. 基础信息
+  cat(i18n$t("Missing Value Analysis Results\n"))
+  cat(i18n$t("Data         :"), object$df_name, "\n")
+  cat(i18n$t("Variables    :"), paste(object$vars, collapse = ", "), "\n")
+  cat(i18n$t("Filter       :"), object$data_filter, "\n\n")
+  # 2. 整体缺失统计
+  cat("=== 1. Overall Missing Statistics ===\n")
+  overall_formatted <- object$overall_stats %>%
+    dplyr::mutate(
+      missing_sample_pct = paste0(missing_sample_pct, "%")
+    ) %>%
+    dplyr::rename(
+      "Total Samples" = total_samples,
+      "Complete Samples" = complete_samples,
+      "Samples with Missing Values" = missing_samples,
+      "Missing Sample %" = missing_sample_pct
+    )
+  print(overall_formatted, row.names = FALSE, right = FALSE)
+  cat("\n")
+  # 3. 变量级缺失统计
+  cat("=== 2. Variable-wise Missing Statistics ===\n")
+  var_formatted <- object$var_stats %>%
+    dplyr::mutate(
+      missing_pct = paste0(missing_pct, "%"),
+      valid_pct = paste0(valid_pct, "%")
+    ) %>%
+    dplyr::rename(
+      "Variable" = variable,
+      "Variable Type" = var_type,
+      "Total Samples" = total_samples,
+      "Missing Count" = missing_count,
+      "Missing %" = missing_pct,
+      "Valid Count" = valid_count,
+      "Valid %" = valid_pct
+    )
+  print(var_formatted, row.names = FALSE, right = FALSE)
+  cat("\n")
+  # 4. 单个变量缺失样本的填充率详情
+  cat("=== 3. Fill Rate Details of Samples with Missing Values (By Variable) ===\n")
+  for (var in object$vars) {
+    fill_rate_data <- object$missing_sample_fill_rate[[var]]
+    if (fill_rate_data$missing_sample_count == 0) {
+      cat(paste("•", var, ":", i18n$t("No missing values, no fill rate details.\n")))
+      next
+    }
+    cat(paste("• Target Variable:", var, "|", i18n$t("Missing Sample Count:"), fill_rate_data$missing_sample_count, "\n"))
+    fill_formatted <- fill_rate_data$fill_rate_details %>%
+      dplyr::mutate(fill_pct = paste0(fill_pct, "%")) %>%
+      dplyr::rename(
+        "Variable" = variable,
+        "Fill Count" = fill_count,
+        "Fill %" = fill_pct
+      )
+    print(fill_formatted, row.names = FALSE, right = FALSE)
+    cat("\n")
+  }
+  invisible(object)
+}
+# ------------------------------
+# Plot方法：生成缺失值可视化图表
+# ------------------------------
+#' @export
+plot.missing <- function(x,
+                         plots = c("heatmap", "barplot"),
+                         shiny = FALSE, ...) {
+  # 1. 基础校验
+  if (length(plots) == 0) {
+    return(ggplot2::ggplot() +
+             ggplot2::annotate("text", x = 1, y = 1, label = i18n$t("No plots selected")) +
+             ggplot2::theme_void())
+  }
+  plot_list <- list()
+  raw_data <- x$raw_data
+  # 2. 缺失热图
+  if ("heatmap" %in% plots) {
+    # 步骤1：数据预处理（先校验数据，再安全抽样）
+    if (nrow(raw_data) == 0) {
+      # 空数据时返回提示图
+      heatmap_data <- ggplot2::ggplot() +
+        ggplot2::annotate("text", x = 1, y = 1, label = i18n$t("No data to display")) +
+        ggplot2::theme_void()
+      plot_list[["heatmap"]] <- heatmap_data
+      next
+    }
+    # 步骤2：安全抽样（优先保留缺失样本，避免weight_by报错）
+    # 2.1 分离缺失样本和有效样本
+    missing_sample_idx <- which(!stats::complete.cases(raw_data))  # 有缺失值的样本索引
+    valid_sample_idx <- which(stats::complete.cases(raw_data))     # 无缺失值的样本索引
+    # 2.2 抽样逻辑：优先保留所有缺失样本，再随机补有效样本至3000
+    max_samples <- 3000
+    selected_idx <- c()
+    # 先加所有缺失样本
+    if (length(missing_sample_idx) > 0) {
+      selected_idx <- c(selected_idx, missing_sample_idx)
+    }
+    # 再补有效样本（不超过max_samples）
+    need_samples <- max_samples - length(selected_idx)
+    if (need_samples > 0 && length(valid_sample_idx) > 0) {
+      selected_valid <- sample(valid_sample_idx, size = min(need_samples, length(valid_sample_idx)))
+      selected_idx <- c(selected_idx, selected_valid)
+    }
+    # 无缺失样本时，直接随机抽有效样本
+    if (length(selected_idx) == 0) {
+      selected_idx <- sample(1:nrow(raw_data), size = min(max_samples, nrow(raw_data)))
+    }
+    # 2.3 提取抽样后的数据并整理
+    heatmap_prep <- raw_data[selected_idx, , drop = FALSE] %>%
+      tibble::rowid_to_column("sample_id") %>%  # 新增样本ID
+      tidyr::pivot_longer(cols = -sample_id, names_to = "variable", values_to = "value") %>%
+      dplyr::mutate(is_missing = is.na(value))  # 标记缺失状态
+    # 步骤3：绘制优化热图（无报错+高对比）
+    heatmap_data <- ggplot2::ggplot(
+      heatmap_prep, 
+      ggplot2::aes(x = variable, y = reorder(sample_id, -sample_id), fill = is_missing)
+    ) +
+      # 核心1：矩形块+白色边框（区分度拉满）
+      ggplot2::geom_tile(color = "white", size = 0.2) +
+      # 核心2：高对比配色（缺失=亮红，有效=深灰）
+      ggplot2::scale_fill_manual(
+        values = c("FALSE" = "#2c3e50", "TRUE" = "#e74c3c"),
+        labels = c("FALSE" = i18n$t("Valid"), "TRUE" = i18n$t("Missing")),
+        name = i18n$t("Data Status"),
+        drop = FALSE  # 强制显示两种状态，避免无缺失时图例消失
+      ) +
+      # 核心3：尺寸适配+标签优化
+      ggplot2::theme_minimal() +
+      ggplot2::theme(
+        # 动态尺寸（避免压缩）
+        plot.width = ggplot2::unit(min(12, length(colnames(raw_data)) * 0.8), "in"),
+        plot.height = ggplot2::unit(min(8, nrow(heatmap_prep) * 0.006), "in"),
+        # 标签优化
+        axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, size = 10, face = "bold"),
+        axis.text.y = ggplot2::element_text(size = 6),
+        axis.title = ggplot2::element_text(size = 11, face = "bold"),
+        # 图例优化（置顶+横向）
+        legend.position = "top",
+        legend.direction = "horizontal",
+        legend.key.width = ggplot2::unit(1.5, "cm"),
+        legend.title = ggplot2::element_text(size = 10),
+        legend.text = ggplot2::element_text(size = 9),
+        # 去除网格线
+        panel.grid = ggplot2::element_blank(),
+        # 标题样式
+        plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold", margin = ggplot2::margin(b = 10)),
+        plot.subtitle = ggplot2::element_text(hjust = 0.5, size = 11, color = "#666666")
+      ) +
+      # 标签设置
+      ggplot2::labs(
+        title = i18n$t("Missing Value Distribution Heatmap"),
+        subtitle = i18n$t("Red = Missing, Dark Gray = Valid | Max 3000 samples (missing samples prioritized)"),
+        x = i18n$t("Variables"),
+        y = i18n$t("Sample ID")
+      )
+    plot_list[["heatmap"]] <- heatmap_data
+  }
+  # 3. 缺失条形图
+  if ("barplot" %in% plots) {
+    barplot <- NULL
+    if (!"variable" %in% colnames(x$var_stats) || !"missing_pct" %in% colnames(x$var_stats)) {
+      barplot <- ggplot2::ggplot() +
+        ggplot2::annotate("text", x = 1, y = 1, label = paste("原始列名：", paste(colnames(x$var_stats), collapse = ", "))) +
+        ggplot2::theme_void()
+    } else {
+      # 提取数据
+      bar_data <- data.frame(
+        variable = x$var_stats$variable,
+        missing_pct = x$var_stats$missing_pct
+      )
+      bar_data$variable <- factor(bar_data$variable, levels = x$var_stats$variable)
+      # ========== 加调试代码：打印bar_data内容 ==========
+      cat("=== bar_data真实内容 ===\n")
+      print(bar_data)
+      cat("========================\n")
+      # 画图
+      barplot <- ggplot2::ggplot(bar_data, ggplot2::aes(x = variable, y = missing_pct, fill = variable)) +
+        ggplot2::geom_bar(stat = "identity", alpha = 0.8) +
+        ggplot2::scale_fill_brewer(palette = "Set2") +
+        ggplot2::scale_y_continuous(
+          limits = c(0, max(bar_data$missing_pct) * 1.2),  
+          breaks = function(limits) seq(0, limits[2], by = max(limits[2]/10, 0.5)),  # 自动分刻度
+          expand = c(0, 0)
+        ) +
+        ggplot2::geom_text(
+          aes(label = round(missing_pct, 2)),  
+          vjust = -0.3,  
+          size = 2.5,    
+          color = "black"
+        ) +
+        ggplot2::labs(
+          x = "Variables",
+          y = "Missing %",
+          title = "Variable-wise Missing Percentage",
+          fill = NULL
+        ) +
+        ggplot2::theme_minimal() +
+        ggplot2::theme(
+          plot.title = ggplot2::element_text(hjust = 0.5, size = 12),
+          axis.text.x = ggplot2::element_text(
+            angle = if (nrow(bar_data) > 8) 60 else 45,  
+            hjust = 1,
+            size = if (nrow(bar_data) > 10) 7 else 8 
+          ),
+          axis.text.y = ggplot2::element_text(size = 8),
+          legend.position = "none",  
+          panel.grid.major.y = ggplot2::element_line(color = "gray90"),
+          panel.grid.minor.y = ggplot2::element_blank(),
+          panel.grid.major.x = ggplot2::element_blank()
+        )
+    }
+    plot_list[["barplot"]] <- barplot
+  }
+  # 4. 组合图表
+  combined_plot <- patchwork::wrap_plots(plot_list[plots], ncol = 1, guides = "collect")
+  # 5. 输出
+  if (shiny) {
+    print(combined_plot)
+    return(invisible(combined_plot))
+  } else {
+    return(combined_plot)
+  }
+}
--- a/radiant.basics/inst/app/tools/analysis/missing_ui.R
+++ b/radiant.basics/inst/app/tools/analysis/missing_ui.R
+############################################
+##  Missing Value Analysis - UI
+############################################
+## 1. 标签
+missing_plots <- c("heatmap", "barplot")
+names(missing_plots) <- c(
+  i18n$t("Missing Heatmap"),
+  i18n$t("Missing Barplot")
+)
+## 2. 函数形参
+missing_args <- as.list(formals(missing))
+missing_args <- missing_args[names(missing_args) %in% c("dataset", "vars", "data_filter")]
+## 3. 输入收集
+missing_inputs <- reactive({
+  req(input$dataset)
+  inputs <- list(
+    dataset = input$dataset,
+    vars = input$missing_vars,  # 用户选择的变量
+    data_filter = if (input$show_filter) input$data_filter else "",
+    envir = r_data
+  )
+  # 校验参数完整性
+  for (arg in names(missing_args)) {
+    if (is.null(inputs[[arg]]) || length(inputs[[arg]]) == 0) {
+      inputs[[arg]] <- missing_args[[arg]]
+    }
+  }
+  inputs
+})
+## 4. 变量选择UI
+output$ui_missing_vars <- renderUI({
+  req(input$dataset)
+  current_data <- get_data(input$dataset, envir = r_data)
+  all_vars <- colnames(current_data)
+  if (length(all_vars) == 0) {
+    return(div(class = "alert alert-warning", i18n$t("No variables in dataset. Please select another dataset.")))
+  }
+  # 显示变量类型（数值型/分类型）
+  var_types <- sapply(current_data[, all_vars, drop = FALSE], function(col) class(col)[1])
+  choices <- setNames(nm = paste0(all_vars, " {", var_types, "}"), object = all_vars)
+  selectizeInput(
+    inputId = "missing_vars",
+    label = i18n$t("Select variables to analyze:"),
+    choices = choices,
+    selected = state_multiple("missing_vars", character(0)),
+    multiple = TRUE,
+    options = list(placeholder = i18n$t("Select one or more variables"), plugins = list("remove_button", "drag_drop"))
+  )
+})
+## 5. 主UI（Summary + Plot标签页）
+output$ui_missing <- renderUI({
+  req(input$dataset)
+  tagList(
+    wellPanel(
+      # Summary标签页：变量选择 + 统计结果
+      conditionalPanel(
+        condition = "input.tabs_missing == 'Summary'",
+        uiOutput("ui_missing_vars")
+      ),
+      # Plot标签页：可视化类型选择
+      conditionalPanel(
+        condition = "input.tabs_missing == 'Plot'",
+        selectizeInput(
+          inputId = "missing_plots",
+          label = i18n$t("Select plots:"),
+          choices = missing_plots,
+          selected = state_multiple("missing_plots", missing_plots, "barplot"),  # 默认选中条形图
+          multiple = TRUE,
+          options = list(placeholder = i18n$t("Select plot types"), plugins = list("remove_button", "drag_drop"))
+        )
+      )
+    ),
+    # 帮助与报告（和离群值分析保持一致）
+    help_and_report(
+      modal_title = i18n$t("Missing Value Analysis"),
+      fun_name = "missing",
+      help_file = inclMD(file.path(getOption("radiant.path.basics"), "app/tools/help/missing.md"))
+    )
+  )
+})
+## 6. 图表尺寸计算
+missing_plot_dims <- reactive({
+  req(.missing())
+  plot_count <- length(input$missing_plots)
+  var_count <- length(.missing()$vars)
+  # 基础高度
+  base_height_px <- if ("heatmap" %in% input$missing_plots) {
+    min(500 + (nrow(.missing()$raw_data) * 0.5), 1200)  # 样本越多，热图越高
+  } else {
+    400
+  }
+  total_height_px <- base_height_px * plot_count
+  # 限制最大/最小高度
+  total_height_px <- min(total_height_px, 2000)
+  total_height_px <- max(total_height_px, 500)
+  list(
+    width = 800,
+    height = total_height_px
+  )
+})
+missing_plot_width <- function() missing_plot_dims()$width
+missing_plot_height <- function() missing_plot_dims()$height
+## 7. 输出面板
+output$missing <- renderUI({
+  # 注册输出组件
+  register_print_output("summary_missing", ".summary_missing")
+  register_plot_output("plot_missing", ".plot_missing",
+                       height_fun = "missing_plot_height")
+  # 标签页布局（Summary + Plot）
+  missing_panels <- tabsetPanel(
+    id = "tabs_missing",
+    tabPanel(
+      title = i18n$t("Summary"),
+      value = "Summary",
+      verbatimTextOutput("summary_missing", placeholder = TRUE)
+    ),
+    tabPanel(
+      title = i18n$t("Plot"),
+      value = "Plot",
+      download_link("dlp_missing"),  # 下载按钮
+      plotOutput("plot_missing", height = "100%"),
+      style = "margin-top: 10px;"
+    )
+  )
+  # 集成到Data Quality菜单下
+  stat_tab_panel(
+    menu = i18n$t("Basics > Data Quality"),
+    tool = i18n$t("Missing Value Analysis"),
+    tool_ui = "ui_missing",
+    output_panels = missing_panels
+  )
+})
+## 8. 可用性检验
+missing_available <- reactive({
+  req(input$dataset)
+  current_data <- get_data(input$dataset, envir = r_data)
+  # 校验是否选择变量：未选则返回提示，阻止后续计算
+  if (not_available(input$missing_vars)) {
+    return(i18n$t("Please select at least one variable to analyze."))
+  }
+  # 校验变量是否存在
+  invalid_vars <- input$missing_vars[!input$missing_vars %in% colnames(current_data)]
+  if (length(invalid_vars) > 0) {
+    return(i18n$t(paste("Invalid variables: ", paste(invalid_vars, collapse = ", "), ". Please reselect.", sep = "")))
+  }
+  "available"
+})
+## 9. 计算核心
+.missing <- reactive({
+  req(missing_available() == "available")
+  do.call(missing, missing_inputs())
+})
+## 10. Summary输出
+.summary_missing <- reactive({
+  req(missing_available() == "available")
+  summary(.missing())
+})
+## 11. Plot输出
+.plot_missing <- reactive({
+  req(missing_available() == "available")
+  validate(need(input$missing_plots, i18n$t("Please select at least one plot type first.")))
+  withProgress(message = i18n$t("Generating missing value plots..."), value = 0.5, {
+    p <- plot(.missing(), plots = input$missing_plots, shiny = TRUE)
+    setProgress(value = 1)
+  })
+  p
+})
+## 12. 下载功能
+download_handler(
+  id = "dlp_missing",
+  fun = function(file) {
+    plot_obj <- .plot_missing()
+    width_in <- missing_plot_width() / 96
+    height_in <- missing_plot_height() / 96
+    ggsave(
+      filename = file,
+      plot = plot_obj,
+      width = width_in,
+      height = height_in,
+      device = "png",
+      dpi = 300,
+      limitsize = FALSE,
+      bg = "white"
+    )
+  },
+  fn = function() paste0(input$dataset, "_missing_value_analysis"),
+  type = "png",
+  caption = i18n$t("Save missing value plots")
+)
+## 13. 报告生成
+missing_report <- function() {
+  req(missing_available() == "available")
+  figs <- length(input$missing_plots) > 0
+  update_report(
+    inp_main = clean_args(missing_inputs(), missing_args),
+    fun_name = "missing",
+    inp_out = if (figs) list("", list(plots = input$missing_plots)) else list(""),
+    outputs = if (figs) c("summary", "plot") else "summary",
+    figs = figs,
+    fig.width = missing_plot_width(),
+    fig.height = missing_plot_height()
+  )
+}
+## 14. 截图功能
+observeEvent(input$missing_report, {
+  r_info[["latest_screenshot"]] <- NULL
+  missing_report()
+})
+observeEvent(input$missing_screenshot, {
+  r_info[["latest_screenshot"]] <- NULL
+  radiant_screenshot_modal("modal_missing_screenshot")
+})
+observeEvent(input$modal_missing_screenshot, {
+  missing_report()
+  removeModal()
+})