############################################
##  Outlier Analysis (IQR/Z-score Method)
############################################
#' @export
outlier <- function(dataset,
                    vars,
                    method = c("iqr", "zscore"),
                    iqr_multiplier = 1.5,
                    z_threshold = 3,
                    data_filter = "",
                    envir = parent.frame()) {
  
  # 1. 基础参数处理
  method <- match.arg(method, choices = c("iqr", "zscore"))
  df_name <- if (is_string(dataset)) dataset else deparse(substitute(dataset))
  
  # 2. 数据提取：仅保留选择的变量+过滤数据
  dataset <- get_data(
    dataset,
    vars = vars,
    filt = data_filter,
    na.rm = FALSE,  # 保留缺失值，后续标记为非离群值
    envir = envir
  )
  
  # 3. 数据校验
  if (length(vars) == 0) stop("Please select at least one numeric variable.", call. = FALSE)
  if (!all(vars %in% colnames(dataset))) {
    stop(paste("Variables not found in dataset:", paste(setdiff(vars, colnames(dataset)), collapse = ", ")), call. = FALSE)
  }
  
  # 4. 离群值计算
  outlier_results <- list()
  for (var in vars) {
    var_data <- dataset[[var]]
    valid_data <- var_data[!is.na(var_data)]  # 仅对非缺失值计算
    
    if (length(valid_data) < 5) {
      outlier_results[[var]] <- list(
        overview = tibble::tibble(
          var = var,
          n_total = length(var_data),
          n_valid = length(valid_data),
          n_outlier = 0,
          outlier_pct = 0,
          lower_bound = NA_real_,
          upper_bound = NA_real_
        ),
        # 强制deviation为character(0)
        details = tibble::tibble(
          row_idx = integer(0),
          value = numeric(0),
          deviation = character(0)  # 明确指定字符型空向量
        )
      )
      next
    }
    
    # 调用辅助函数计算离群值
    if (method == "iqr") {
      res <- calc_outlier_iqr(valid_data, multiplier = iqr_multiplier)
    } else {
      res <- calc_outlier_zscore(valid_data, threshold = z_threshold)
    }
    
    # 匹配原始行索引
    outlier_idx <- which(!is.na(var_data) & var_data %in% res$outlier_values)
    # 当无离群值时，deviation仍为character(0)
    deviation <- if (length(outlier_idx) == 0) {
      character(0)
    } else {
      ifelse(var_data[outlier_idx] < res$lower_bound, "Below Lower Bound", "Above Upper Bound")
    }
    
    # 整理结果
    outlier_results[[var]] <- list(
      overview = tibble::tibble(
        var = var,
        n_total = length(var_data),
        n_valid = length(valid_data),
        n_outlier = length(res$outlier_values),
        outlier_pct = round(length(res$outlier_values)/length(valid_data)*100, 2),
        lower_bound = round(res$lower_bound, 3),
        upper_bound = round(res$upper_bound, 3)
      ),
      details = tibble::tibble(
        row_idx = outlier_idx,
        value = var_data[outlier_idx],
        deviation = deviation 
      )
    )
  }
  
  # 5. 结果打包
  out <- structure(
    list(
      df_name = df_name,
      vars = vars,
      method = method,
      params = list(
        iqr_multiplier = if (method == "iqr") iqr_multiplier else NA,
        z_threshold = if (method == "zscore") z_threshold else NA
      ),
      data_filter = if (data_filter == "") "None" else data_filter,
      results = outlier_results,  # 每个变量的离群值结果
      raw_data = dataset  # 原始数据
    ),
    class = "outlier"
  )
  out
}

# ------------------------------
# 辅助函数1：IQR法计算离群值
# ------------------------------
calc_outlier_iqr <- function(data, multiplier = 1.5) {
  q1 <- stats::quantile(data, 0.25, na.rm = TRUE)
  q3 <- stats::quantile(data, 0.75, na.rm = TRUE)
  iqr_val <- q3 - q1
  lower <- q1 - multiplier * iqr_val
  upper <- q3 + multiplier * iqr_val
  outlier_values <- data[data < lower | data > upper]
  
  list(
    lower_bound = lower,
    upper_bound = upper,
    outlier_values = outlier_values
  )
}

# ------------------------------
# 辅助函数2：Z-score法计算离群值
# ------------------------------
calc_outlier_zscore <- function(data, threshold = 3) {
  mean_val <- mean(data, na.rm = TRUE)
  sd_val <- stats::sd(data, na.rm = TRUE)
  z_scores <- (data - mean_val) / sd_val
  lower <- mean_val - threshold * sd_val
  upper <- mean_val + threshold * sd_val
  outlier_values <- data[abs(z_scores) > threshold]
  
  list(
    lower_bound = lower,
    upper_bound = upper,
    outlier_values = outlier_values,
    mean = mean_val,
    sd = sd_val
  )
}

# ------------------------------
# Summary方法：展示离群值概览
# ------------------------------
#' @export
summary.outlier <- function(object, dec = 3, ...) {
  # 1. 基础信息
  cat("Outlier Analysis Results\n")
  cat("Data         :", object$df_name, "\n")
  cat("Variables    :", paste(object$vars, collapse = ", "), "(numeric)\n")
  cat("Method       :", if (object$method == "iqr") paste("IQR Method (Multiplier =", object$params$iqr_multiplier, ")") else paste("Z-score Method (Threshold =", object$params$z_threshold, ")"), "\n")
  cat("Filter       :", object$data_filter, "\n\n")
  
  # 2. 离群值概览表
  cat("=== 1. Outlier Overview ===\n")
  overview_df <- purrr::map_dfr(object$results, ~ .x$overview)
  overview_formatted <- overview_df %>%
    dplyr::mutate(
      outlier_pct = paste0(outlier_pct, "%"),
      lower_bound = as.character(round(lower_bound, dec)),
      upper_bound = as.character(round(upper_bound, dec))
    ) %>%
    dplyr::rename(
      "Variable" = var,
      "Total Samples" = n_total,
      "Valid Samples" = n_valid,
      "Outlier Count" = n_outlier,
      "Outlier %" = outlier_pct,
      "Lower Bound" = lower_bound,
      "Upper Bound" = upper_bound
    ) %>%
    as.data.frame(stringsAsFactors = FALSE)
  
  print(overview_formatted, row.names = FALSE, right = FALSE)
  cat("\n")
  
  # 3. 离群值明细提示
  cat("=== 2. Outlier Details (By Variable) ===\n")
  for (var in object$vars) {
    details <- object$results[[var]]$details
    if (nrow(details) == 0) {
      cat(paste("•", var, ": No outliers detected\n"))
    } else {
      cat(paste("•", var, ":", nrow(details), "outliers\n"))
      details_formatted <- details %>%
        dplyr::mutate(
          value = round(value, dec)
        ) %>%
        dplyr::rename(
          "Row Index" = row_idx,
          "Value" = value,
          "Deviation" = deviation
        )
      print(details_formatted, row.names = FALSE, right = FALSE)
      cat("\n")
    }
  }
  
  # 4. 方法说明
  cat("=== 3. Method Explanation ===\n")
  if (object$method == "iqr") {
    cat("• IQR Method: Outliers are values outside [Q1 - k×IQR, Q3 + k×IQR] (k =", object$params$iqr_multiplier, ")\n")
    cat("• Q1 = 25th percentile, Q3 = 75th percentile, IQR = Q3 - Q1\n")
  } else {
    cat("• Z-score Method: Outliers are values with |Z-score| >", object$params$z_threshold, "\n")
    cat("• Z-score = (Value - Mean) / Standard Deviation\n")
  }
  
  invisible(object)
}

# ------------------------------
# Plot方法：可视化离群值
# ------------------------------
#' @export
plot.outlier <- function(x,
                         plots = c("boxplot", "histogram", "scatter"),
                         shiny = FALSE, custom = FALSE, ...) {
  # 1. 基础校验
  if (length(plots) == 0) {
    return(ggplot2::ggplot() +
             ggplot2::annotate("text", x = 1, y = 1, label = i18n$t("No plots selected")) +
             ggplot2::theme_void())
  }
  
  plot_list <- list()
  vars <- x$vars
  raw_data <- x$raw_data %>%
    tidyr::pivot_longer(cols = all_of(vars), names_to = "variable", values_to = "value")  # 长格式便于分面
  
  # 2. 箱线图
  if ("boxplot" %in% plots) {
    raw_data <- raw_data %>%
      dplyr::mutate(variable = as.factor(variable))
    
    p <- ggplot2::ggplot(raw_data, ggplot2::aes(x = 1, y = value, fill = variable)) +  
      ggplot2::geom_boxplot(alpha = 0.7, outlier.color = "red", outlier.size = 2, show.legend = FALSE) +
      ggplot2::scale_fill_brewer(palette = "Set2") +
      ggplot2::facet_wrap(~variable, scales = "free_y", ncol = 2) +  
      ggplot2::labs(
        x = "", 
        y = i18n$t("Value"),  
        title = i18n$t("Outlier Detection: Boxplot (Per Variable)")  
      ) +
      ggplot2::theme_minimal() +
      ggplot2::theme(
        axis.text.x = ggplot2::element_blank(),  
        strip.text = ggplot2::element_text(size = 11),  
        plot.title = ggplot2::element_text(hjust = 0.5, size = 12),  
        panel.spacing = ggplot2::unit(1, "cm")  
      )
    plot_list[["boxplot"]] <- p
  }
  
  # 3. 直方图
  if ("histogram" %in% plots) {
    # 合并所有变量的界值
    bound_data <- purrr::map_dfr(x$results, function(res) {
      tibble::tibble(
        variable = res$overview$var,
        lower_bound = res$overview$lower_bound,
        upper_bound = res$overview$upper_bound
      )
    })
    
    p <- ggplot2::ggplot(raw_data, ggplot2::aes(x = value)) +
      ggplot2::geom_histogram(fill = "#4287f5", alpha = 0.7, bins = 30) +
      ggplot2::geom_vline(
        data = bound_data,
        ggplot2::aes(xintercept = lower_bound),
        color = "red", linetype = "dashed", linewidth = 1
      ) +
      ggplot2::geom_vline(
        data = bound_data,
        ggplot2::aes(xintercept = upper_bound),
        color = "red", linetype = "dashed", linewidth = 1
      ) +
      ggplot2::facet_wrap(~variable, scales = "free") +
      ggplot2::labs(x = i18n$t("Value"), y = i18n$t("Count"),
                    title = i18n$t("Outlier Detection: Histogram (Red Dashed = Thresholds)")) +
      ggplot2::theme_minimal() +
      ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 12))
    plot_list[["histogram"]] <- p
  }
  
  # 4. 散点图
  if ("scatter" %in% plots) {
    if (length(vars) >= 2) {
      # 变量数≥2：正常生成散点图
      outlier_row_idx <- purrr::map(x$results, ~ .x$details$row_idx)
      outlier_row_idx <- unique(unlist(outlier_row_idx))  
      
      # 取前两个变量做散点图
      var1 <- vars[1]
      var2 <- vars[2]
      # 构建散点图数据
      scatter_data <- x$raw_data %>%
        dplyr::mutate(
          row_idx = dplyr::row_number(),  
          is_outlier = row_idx %in% outlier_row_idx  
        ) %>%
        dplyr::select(row_idx, all_of(c(var1, var2)), is_outlier)  
      
      p <- ggplot2::ggplot(scatter_data, ggplot2::aes(x = .data[[var1]], y = .data[[var2]], color = is_outlier)) +
        ggplot2::geom_point(alpha = 0.7, size = 1.5) +
        ggplot2::scale_color_manual(values = c("black", "red"), labels = c("Normal", "Outlier")) +
        ggplot2::labs(x = var1, y = var2, color = i18n$t("Type"),
                      title = i18n$t("Outlier Detection: Scatter Plot")) +
        ggplot2::theme_minimal() +
        ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5, size = 12))
    } else {
      p <- ggplot2::ggplot() +
        ggplot2::annotate("text", 
                          x = 1, y = 1, 
                          label = i18n$t("Scatter Plot requires at least 2 numeric variables.\nPlease select more variables."),
                          size = 4.5, color = "#666666") +
        ggplot2::labs(title = i18n$t("Outlier Detection: Scatter Plot")) +
        ggplot2::theme_minimal() +
        ggplot2::theme(
          plot.title = ggplot2::element_text(hjust = 0.5, size = 12),
          axis.text = ggplot2::element_blank(),
          axis.title = ggplot2::element_blank(),
          panel.grid = ggplot2::element_blank()
        )
    }
    plot_list[["scatter"]] <- p 
  }
  
  
  # 5. 组合图表
  combined_plot <- patchwork::wrap_plots(plot_list[plots], ncol = 1, guides = "collect")
  
  # 6. 输出
  if (shiny) {
    print(combined_plot)
    return(invisible(combined_plot))
  } else {
    return(combined_plot)
  }
}