############################################
##  Missing Value Analysis
############################################
#' @export
missing <- function(dataset,
                    vars = NULL,
                    data_filter = "",
                    envir = parent.frame()) {
  
  # 1. 基础参数处理
  df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset))
  
  # 2. 数据提取
  # 2.1 获取完整数据集（不限制vars，保留所有字段）
  full_dataset <- get_data(
    dataset,
    filt = data_filter,  # 仅过滤行，不过滤列
    na.rm = FALSE,       # 保留缺失值
    envir = envir
  )
  
  # 2.2 从完整数据集中筛选出用户选中的变量
  if (!is.null(vars) && length(vars) > 0) {
    # 校验选中变量是否存在于完整数据集
    invalid_vars <- setdiff(vars, colnames(full_dataset))
    if (length(invalid_vars) > 0) {
      stop(paste(i18n$t("Variables not found in dataset:"), paste(invalid_vars, collapse = ", ")), call. = FALSE)
    }
    dataset <- full_dataset[, vars, drop = FALSE]  # 选中变量的子集
  } else {
    # 未选变量时，默认分析所有变量
    dataset <- full_dataset
    vars <- colnames(dataset)
  }
  
  # 3. 数据校验（仅校验完整数据集非空）
  if (nrow(full_dataset) == 0) stop(i18n$t("Dataset is empty."), call. = FALSE)
  
  # 4. 统计计算
  # 4.1 整体缺失统计
  overall_stats <- tibble::tibble(
    total_samples = nrow(dataset),
    complete_samples = sum(stats::complete.cases(dataset)),  # 无任何缺失的样本数
    missing_samples = nrow(dataset) - sum(stats::complete.cases(dataset)),  # 含缺失的样本数
    missing_sample_pct = round((missing_samples / total_samples) * 100, 2)
  )
  
  # 4.2 变量级缺失统计
  var_stats <- purrr::map_dfr(vars, function(var) {
    var_data <- dataset[[var]]
    n_missing <- sum(is.na(var_data))
    tibble::tibble(
      variable = var,
      var_type = class(var_data)[1],  # 变量类型
      total_samples = length(var_data),
      missing_count = n_missing,
      missing_pct = round((n_missing / length(var_data)) * 100, 2),
      valid_count = length(var_data) - n_missing,
      valid_pct = round(((length(var_data) - n_missing) / length(var_data)) * 100, 2)
    )
  })
  
  # 4.3 单个变量缺失样本的填充率详情
  missing_sample_fill_rate <- purrr::map(vars, function(target_var) {
    # 1. 找到目标变量（选中的）在完整数据集中的缺失行索引
    missing_row_idx <- which(is.na(full_dataset[[target_var]]))
    if (length(missing_row_idx) == 0) {
      return(list(
        target_variable = target_var,
        missing_sample_count = 0,
        fill_rate_details = tibble::tibble(variable = character(0), fill_count = integer(0), fill_pct = numeric(0))
      ))
    }
    # 2. 提取缺失样本的完整数据集
    missing_samples_data <- full_dataset[missing_row_idx, , drop = FALSE]
    # 3. 遍历完整数据集的所有字段计算填充率
    fill_rate <- purrr::map_dfr(colnames(full_dataset), function(var) {
      n_valid <- sum(!is.na(missing_samples_data[[var]]))
      tibble::tibble(
        variable = var,          
        fill_count = n_valid,    
        fill_pct = round((n_valid / nrow(missing_samples_data)) * 100, 2)
      )
    })
    list(
      target_variable = target_var,
      missing_sample_count = length(missing_row_idx),
      fill_rate_details = fill_rate
    )
  })
  names(missing_sample_fill_rate) <- vars
  
  # 5. 结果打包
  out <- structure(
    list(
      df_name = df_name,
      vars = vars,
      data_filter = if (data_filter == "") "None" else data_filter,
      overall_stats = overall_stats,
      var_stats = var_stats,
      missing_sample_fill_rate = missing_sample_fill_rate,
      raw_data = dataset
    ),
    class = "missing"
  )
  out
}

# ------------------------------
# Summary方法：展示缺失值统计结果
# ------------------------------
#' @export
summary.missing <- function(object, dec = 2, ...) {
  # 1. 基础信息
  cat(i18n$t("Missing Value Analysis Results\n"))
  cat(i18n$t("Data         :"), object$df_name, "\n")
  cat(i18n$t("Variables    :"), paste(object$vars, collapse = ", "), "\n")
  cat(i18n$t("Filter       :"), object$data_filter, "\n\n")
  
  # 2. 整体缺失统计
  cat("=== 1. Overall Missing Statistics ===\n")
  overall_formatted <- object$overall_stats %>%
    dplyr::mutate(
      missing_sample_pct = paste0(missing_sample_pct, "%")
    ) %>%
    dplyr::rename(
      "Total Samples" = total_samples,
      "Complete Samples" = complete_samples,
      "Samples with Missing Values" = missing_samples,
      "Missing Sample %" = missing_sample_pct
    )
  print(overall_formatted, row.names = FALSE, right = FALSE)
  cat("\n")
  
  # 3. 变量级缺失统计
  cat("=== 2. Variable-wise Missing Statistics ===\n")
  var_formatted <- object$var_stats %>%
    dplyr::mutate(
      missing_pct = paste0(missing_pct, "%"),
      valid_pct = paste0(valid_pct, "%")
    ) %>%
    dplyr::rename(
      "Variable" = variable,
      "Variable Type" = var_type,
      "Total Samples" = total_samples,
      "Missing Count" = missing_count,
      "Missing %" = missing_pct,
      "Valid Count" = valid_count,
      "Valid %" = valid_pct
    )
  print(var_formatted, row.names = FALSE, right = FALSE)
  cat("\n")
  
  # 4. 单个变量缺失样本的填充率详情
  cat("=== 3. Fill Rate Details of Samples with Missing Values (By Variable) ===\n")
  for (var in object$vars) {
    fill_rate_data <- object$missing_sample_fill_rate[[var]]
    if (fill_rate_data$missing_sample_count == 0) {
      cat(paste("•", var, ":", i18n$t("No missing values, no fill rate details.\n")))
      next
    }
    cat(paste("• Target Variable:", var, "|", i18n$t("Missing Sample Count:"), fill_rate_data$missing_sample_count, "\n"))
    fill_formatted <- fill_rate_data$fill_rate_details %>%
      dplyr::mutate(fill_pct = paste0(fill_pct, "%")) %>%
      dplyr::rename(
        "Variable" = variable,
        "Fill Count" = fill_count,
        "Fill %" = fill_pct
      )
    print(fill_formatted, row.names = FALSE, right = FALSE)
    cat("\n")
  }
  
  invisible(object)
}

# ------------------------------
# Plot方法：生成缺失值可视化图表
# ------------------------------
#' @export
plot.missing <- function(x,
                         plots = c("heatmap", "barplot"),
                         shiny = FALSE, ...) {
  # 1. 基础校验
  if (length(plots) == 0) {
    return(ggplot2::ggplot() +
             ggplot2::annotate("text", x = 1, y = 1, label = i18n$t("No plots selected")) +
             ggplot2::theme_void())
  }
  
  plot_list <- list()
  raw_data <- x$raw_data
  
  # 2. 缺失热图
  if ("heatmap" %in% plots) {
    # 步骤1：数据预处理（先校验数据，再安全抽样）
    if (nrow(raw_data) == 0) {
      # 空数据时返回提示图
      heatmap_data <- ggplot2::ggplot() +
        ggplot2::annotate("text", x = 1, y = 1, label = i18n$t("No data to display")) +
        ggplot2::theme_void()
      plot_list[["heatmap"]] <- heatmap_data
      next
    }
    
    # 步骤2：安全抽样（优先保留缺失样本，避免weight_by报错）
    # 2.1 分离缺失样本和有效样本
    missing_sample_idx <- which(!stats::complete.cases(raw_data))  # 有缺失值的样本索引
    valid_sample_idx <- which(stats::complete.cases(raw_data))     # 无缺失值的样本索引
    
    # 2.2 抽样逻辑：优先保留所有缺失样本，再随机补有效样本至3000
    max_samples <- 3000
    selected_idx <- c()
    # 先加所有缺失样本
    if (length(missing_sample_idx) > 0) {
      selected_idx <- c(selected_idx, missing_sample_idx)
    }
    # 再补有效样本（不超过max_samples）
    need_samples <- max_samples - length(selected_idx)
    if (need_samples > 0 && length(valid_sample_idx) > 0) {
      selected_valid <- sample(valid_sample_idx, size = min(need_samples, length(valid_sample_idx)))
      selected_idx <- c(selected_idx, selected_valid)
    }
    # 无缺失样本时，直接随机抽有效样本
    if (length(selected_idx) == 0) {
      selected_idx <- sample(1:nrow(raw_data), size = min(max_samples, nrow(raw_data)))
    }
    
    # 2.3 提取抽样后的数据并整理
    heatmap_prep <- raw_data[selected_idx, , drop = FALSE] %>%
      tibble::rowid_to_column("sample_id") %>%  # 新增样本ID
      tidyr::pivot_longer(cols = -sample_id, names_to = "variable", values_to = "value") %>%
      dplyr::mutate(is_missing = is.na(value))  # 标记缺失状态
    
    # 步骤3：绘制优化热图（无报错+高对比）
    heatmap_data <- ggplot2::ggplot(
      heatmap_prep, 
      ggplot2::aes(x = variable, y = reorder(sample_id, -sample_id), fill = is_missing)
    ) +
      # 核心1：矩形块+白色边框（区分度拉满）
      ggplot2::geom_tile(color = "white", size = 0.2) +
      # 核心2：高对比配色（缺失=亮红，有效=深灰）
      ggplot2::scale_fill_manual(
        values = c("FALSE" = "#2c3e50", "TRUE" = "#e74c3c"),
        labels = c("FALSE" = i18n$t("Valid"), "TRUE" = i18n$t("Missing")),
        name = i18n$t("Data Status"),
        drop = FALSE  # 强制显示两种状态，避免无缺失时图例消失
      ) +
      # 核心3：尺寸适配+标签优化
      ggplot2::theme_minimal() +
      ggplot2::theme(
        # 动态尺寸（避免压缩）
        plot.width = ggplot2::unit(min(12, length(colnames(raw_data)) * 0.8), "in"),
        plot.height = ggplot2::unit(min(8, nrow(heatmap_prep) * 0.006), "in"),
        # 标签优化
        axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, size = 10, face = "bold"),
        axis.text.y = ggplot2::element_text(size = 6),
        axis.title = ggplot2::element_text(size = 11, face = "bold"),
        # 图例优化（置顶+横向）
        legend.position = "top",
        legend.direction = "horizontal",
        legend.key.width = ggplot2::unit(1.5, "cm"),
        legend.title = ggplot2::element_text(size = 10),
        legend.text = ggplot2::element_text(size = 9),
        # 去除网格线
        panel.grid = ggplot2::element_blank(),
        # 标题样式
        plot.title = ggplot2::element_text(hjust = 0.5, size = 14, face = "bold", margin = ggplot2::margin(b = 10)),
        plot.subtitle = ggplot2::element_text(hjust = 0.5, size = 11, color = "#666666")
      ) +
      # 标签设置
      ggplot2::labs(
        title = i18n$t("Missing Value Distribution Heatmap"),
        subtitle = i18n$t("Red = Missing, Dark Gray = Valid | Max 3000 samples (missing samples prioritized)"),
        x = i18n$t("Variables"),
        y = i18n$t("Sample ID")
      )
    
    plot_list[["heatmap"]] <- heatmap_data
  }
  
  
  # 3. 缺失条形图
  if ("barplot" %in% plots) {
    barplot <- NULL
    if (!"variable" %in% colnames(x$var_stats) || !"missing_pct" %in% colnames(x$var_stats)) {
      barplot <- ggplot2::ggplot() +
        ggplot2::annotate("text", x = 1, y = 1, label = paste("原始列名：", paste(colnames(x$var_stats), collapse = ", "))) +
        ggplot2::theme_void()
    } else {
      # 提取数据
      bar_data <- data.frame(
        variable = x$var_stats$variable,
        missing_pct = x$var_stats$missing_pct
      )
      bar_data$variable <- factor(bar_data$variable, levels = x$var_stats$variable)
      
      # ========== 加调试代码：打印bar_data内容 ==========
      cat("=== bar_data真实内容 ===\n")
      print(bar_data)
      cat("========================\n")
      
      # 画图
      barplot <- ggplot2::ggplot(bar_data, ggplot2::aes(x = variable, y = missing_pct, fill = variable)) +
        ggplot2::geom_bar(stat = "identity", alpha = 0.8) +
        ggplot2::scale_fill_brewer(palette = "Set2") +
        ggplot2::scale_y_continuous(
          limits = c(0, max(bar_data$missing_pct) * 1.2),  
          breaks = function(limits) seq(0, limits[2], by = max(limits[2]/10, 0.5)),  # 自动分刻度
          expand = c(0, 0)
        ) +
        ggplot2::geom_text(
          aes(label = round(missing_pct, 2)),  
          vjust = -0.3,  
          size = 2.5,    
          color = "black"
        ) +
        ggplot2::labs(
          x = "Variables",
          y = "Missing %",
          title = "Variable-wise Missing Percentage",
          fill = NULL
        ) +
        ggplot2::theme_minimal() +
        ggplot2::theme(
          plot.title = ggplot2::element_text(hjust = 0.5, size = 12),
          axis.text.x = ggplot2::element_text(
            angle = if (nrow(bar_data) > 8) 60 else 45,  
            hjust = 1,
            size = if (nrow(bar_data) > 10) 7 else 8 
          ),
          axis.text.y = ggplot2::element_text(size = 8),
          legend.position = "none",  
          panel.grid.major.y = ggplot2::element_line(color = "gray90"),
          panel.grid.minor.y = ggplot2::element_blank(),
          panel.grid.major.x = ggplot2::element_blank()
        )
    }
    plot_list[["barplot"]] <- barplot
  }
  
  
  # 4. 组合图表
  combined_plot <- patchwork::wrap_plots(plot_list[plots], ncol = 1, guides = "collect")
  
  # 5. 输出
  if (shiny) {
    print(combined_plot)
    return(invisible(combined_plot))
  } else {
    return(combined_plot)
  }
}