update:更新了不同的代码生成大模型

d68fce7d · wuzekai · ab43003a · d68fce7d · d68fce7d · d68fce7d
Commit d68fce7d authored Dec 04, 2025 by wuzekai
4 changed files
--- a/radiant.quickgen/R/quickgen_metrics.R
+++ b/radiant.quickgen/R/quickgen_metrics.R
 # === 配置 ===
-OLLAMA_URL <- "http://172.29.2.110:8139/api/generate"  
+OLLAMA_URL <- "http://180.169.131.147:8139/api/generate"
 MODEL_ID <- "qwen3-coder:30b"

 # === 单次对话 ===
 #' @export
 metrics_completion <- function(user_prompt) {
-  # 构建 Ollama 请求体
-  req <- httr2::request(OLLAMA_URL) %>%
-    httr2::req_headers(
-      "Content-Type" = "application/json"  
-    ) %>%
-    httr2::req_body_json(list(
+  
+  Sys.unsetenv("http_proxy")
+  Sys.unsetenv("https_proxy")
+  
+  # 构建请求体
+  req_body <- list(
    model = MODEL_ID,
    prompt = user_prompt,
    stream = FALSE
-    ))
+  )
  
-  resp <- httr2::req_perform(req)
-  body <- httr2::resp_body_json(resp)
+  # 使用 curl 包发送请求
+  h <- curl::new_handle()
+  curl::handle_setheaders(h, "Content-Type" = "application/json")
+  curl::handle_setopt(h, postfields = jsonlite::toJSON(req_body, auto_unbox = TRUE))
+  curl::handle_setopt(h, timeout = 60)  
+  
+  con <- curl::curl(OLLAMA_URL, handle = h)
+  result <- readLines(con, warn = FALSE)
+  close(con)
+  
+  # 解析响应
+  body <- jsonlite::fromJSON(paste(result, collapse = ""))
  
-  # 解析 Ollama 响应
  if (is.null(body$response) || trimws(body$response) == "")
-    stop("Ollama API 返回空内容：", jsonlite::toJSON(body))
+    stop("Ollama API 返回空内容：", paste(result, collapse = ""))
  
-  body$response  # 返回 Ollama 生成的核心代码/结果
+  body$response
 }
+
+
 # === 构造发给模型的 Prompt ===
 #' @export
 build_metrics_prompt <- function(user_prompt, data_call) {
@@ -43,14 +54,15 @@ build_metrics_prompt <- function(user_prompt, data_call) {
   - 变量适配：严格匹配方法对变量类型的要求（如 t检验要求连续变量，卡方检验要求分类变量，回归分析因变量需连续），类型不匹配则触发“无法计算”。
   - 缺失值处理：所有涉及数据计算的函数必须加 na.rm=TRUE（方法不支持则除外）。
 3. 默认参数：用户未指定时，按科研规范设默认值并在结果中体现：
-   - 置信水平默认 0.95（conf.level=0.95）
-   - 多重比较调整方法默认 p.adjust.method=none
-   - 假设检验默认双侧检验（alternative=two.sided）
+   - 置信水平默认 0.95（conf.level='0.95'）
+   - 多重比较调整方法默认（p.adjust.method='none'）
+   - 假设检验默认双侧检验（alternative='two.sided'）
 〓 技术规范（确保结果结构化、可展示）〓
-1. 数据集已读入为：%s（直接用 df$列名 引用变量，无需重复读入数据）。
+1. 数据集已读入为：%s（直接用 df$列名 引用变量）。
 2. 结果输出要求：
-   - 优先用 broom::tidy() 或 broom::glance() 将统计结果转为结构化数据框（包含统计量、p值、置信区间、自由度、显著性标记等核心指标），再用 print() 输出。
-   - 若 broom 包不支持该方法（如部分复杂模型），直接 print(统计函数结果)，确保保留显著性标记（***、**、*）。
+   - 禁止使用broom::tidy()、data.frame()等函数将统计结果转为结构化数据框，必须保持原始统计结果
+   - 必须将统计结果的关键指标通过 cat() 函数逐行输出，确保每行一个指标，清晰可读。最后一行需要将统计的原始结果print出来。
+   - 输出必须包含以下上下文信息：数据集名称（来自进行统计计算的 dataset）、变量名称（如 age, charges等变量原名称）、检验类型、原假设、样本信息（n, 缺失值数量）、关键统计量（t, df, p, CI, 均值等）
 3. 函数规范：统计函数必须写全名+完整括号（如 t.test()、lm()、chisq.test()），禁止省略括号或简写。
 4. 变量校验：自动校验变量存在性和类型（如分组变量必须是因子/字符型，连续变量不能用于卡方检验），校验失败则输出“无法计算”。
 用户请求：%s",
@@ -74,9 +86,6 @@ metrics_generate <- function(prompt, dataset, envir = parent.frame()) {
                type    = "empty",
                auto_run = FALSE))
  }
-  
-  r_code <- paste0(data_call, "\n", r_code)
-  
  has_gg  <- grepl("ggplot\\(|geom_", r_code)
  has_tbl <- grepl("data\\.frame\\(|tibble\\(|tbl_summary|tableOne|CreateTableOne|t.test|lm|cor.test|anova", r_code)
  type    <- if (has_gg) "plot" else if (has_tbl) "table" else "text"
@@ -91,6 +100,11 @@ metrics_get_data_call <- function(dataset, envir) {
 }

 #' @export
+
 metrics_run_code <- function(r_code, envir = parent.frame()) {
+  output_lines <- capture.output({
    eval(parse(text = r_code), envir = envir)
+  })
+  paste(output_lines, collapse = "\n")
+  
 }
--- a/radiant.quickgen/inst/app/tools/analysis/quickgen_chat_ui.R
+++ b/radiant.quickgen/inst/app/tools/analysis/quickgen_chat_ui.R
@@ -104,38 +104,82 @@ output$field_info_display <- renderUI({

 # 获取并编码字段信息
 get_field_info <- function() {
-  # 1. 校验数据集是否存在
-  if (is.null(input$dataset) || !exists("r_data")) {
-    return(NULL)
-  }
+  if (is.null(input$dataset) || !exists("r_data")) return(NULL)
  
-  # 2. 尝试获取数据集
-  df <- tryCatch({
-    get(input$dataset, envir = r_data)
-  }, error = function(e) NULL)
+  df <- tryCatch(get(input$dataset, envir = r_data), error = function(e) NULL)
+  if (is.null(df) || !is.data.frame(df) || nrow(df) == 0) return(NULL)
  
-  # 3. 数据集为空则返回NULL
-  if (is.null(df) || !is.data.frame(df) || nrow(df) == 0) {
-    return(NULL)
+  fields_summary <- list()
+  
+  for (col in names(df)) {
+    x <- df[[col]]
+    cls <- class(x)[1]
+    
+    # 提取非 NA 的值
+    valid_x <- x[!is.na(x)]
+    
+    if (cls %in% c("numeric", "double")) {
+      if (length(valid_x) == 0) {
+        fields_summary[[col]] <- list(type = cls)
+      } else {
+        fields_summary[[col]] <- list(
+          type = cls,
+          min = round(min(valid_x), 3),
+          max = round(max(valid_x), 3),
+          mean = round(mean(valid_x), 3)
+        )
      }
      
-  # 4. 构建字段名-类型映射
-  fields_list <- list()
-  for (col_name in names(df)) {
-    fields_list[[col_name]] <- class(df[[col_name]])[1] 
+    } else if (cls == "integer") {
+      if (length(valid_x) == 0) {
+        fields_summary[[col]] <- list(type = cls, levels = list(), n_levels = 0L)
+      } else {
+        unique_vals <- sort(unique(valid_x))
+        n_unique <- length(unique_vals)
+        if (n_unique <= 10) {
+          levs_out <- unique_vals
+        } else {
+          levs_out <- c(unique_vals[1:10], "...")
+        }
+        fields_summary[[col]] <- list(
+          type = cls,
+          levels = levs_out,
+          n_levels = n_unique
+        )
      }
      
-  # 5. 构建JSON结构
-  json_struct <- list(
-    dataset_name = input$dataset,  
-    fields = fields_list          
+    } else {
+      # factor 或 character
+      if (length(valid_x) == 0) {
+        fields_summary[[col]] <- list(type = cls, levels = list(), n_levels = 0L)
+      } else {
+        if (is.factor(x)) {
+          # 只取实际出现的值
+          unique_vals <- sort(unique(as.character(valid_x)))
+        } else {
+          unique_vals <- sort(unique(as.character(valid_x)))
+        }
+        n_unique <- length(unique_vals)
+        if (n_unique <= 10) {
+          levs_out <- unique_vals
+        } else {
+          levs_out <- c(unique_vals[1:10], "...")
+        }
+        fields_summary[[col]] <- list(
+          type = cls,
+          levels = levs_out,
+          n_levels = n_unique
        )
+      }
+    }
+  }
  
-  # 6. JSON序列化+URL编码
-  URLencode(
-    jsonlite::toJSON(json_struct, auto_unbox = TRUE),  
-    reserved = TRUE  
+  json_struct <- list(
+    dataset_name = input$dataset,
+    n_rows = nrow(df),
+    fields = fields_summary
  )
+  URLencode(jsonlite::toJSON(json_struct, auto_unbox = TRUE, digits = 3), reserved = TRUE)
 }

 # 无数据时的UI

--- a/radiant.quickgen/inst/app/tools/analysis/quickgen_metrics_ui.R
+++ b/radiant.quickgen/inst/app/tools/analysis/quickgen_metrics_ui.R
@@ -2,182 +2,6 @@
 library(shinyjs)
 library(shinyAce)

-# 新增：统计结果文本格式化函数
-format_stat_text <- function(result, dataset_name) {
-  # 1. 处理“无法计算”的情况
-  if (is.character(result) && grepl("无法计算", result)) {
-    return(result)
-  }
-  
-  # 2. 单样本t检验（htest类）
-  if (inherits(result, "htest") && grepl("One Sample t-test", result$method)) {
-    # 提取关键指标
-    var_name <- gsub("^x = df\\$|^df\\$", "", as.character(result$data.name))
-    mean_val <- round(mean(result$data, na.rm = TRUE), 3)
-    n_val <- length(na.omit(result$data))
-    n_missing <- sum(is.na(result$data))
-    sd_val <- round(sd(result$data, na.rm = TRUE), 3)
-    se_val <- round(result$stderr, 3)
-    me_val <- round(se_val * qt(0.975, df = result$parameter), 3)  # 边际误差
-    diff_val <- round(result$estimate, 3)
-    t_val <- round(result$statistic, 3)
-    p_val <- if (result$p.value < 0.001) "< .001" else round(result$p.value, 4)
-    df_val <- result$parameter
-    ci_lower <- round(result$conf.int[1], 3)
-    ci_upper <- round(result$conf.int[2], 3)
-    conf_level <- round(attr(result$conf.int, "conf.level") * 100, 1)
-    # 显著性标记
-    signif <- if (p_val == "< .001") "***" else if (p_val <= 0.001) "***" else if (p_val <= 0.01) "**" else if (p_val <= 0.05) "*" else if (p_val <= 0.1) "." else " "
-    
-    # 组织文本（完全匹配示例格式）
-    text <- sprintf(paste0(
-      "Single mean test\n",
-      "Data      : %s \n",
-      "Variable  : %s \n",
-      "Confidence: %.1f%% \n",
-      "Null hyp. : the mean of %s = %.1f \n",
-      "Alt. hyp. : the mean of %s is not equal to %.1f \n",
-      "   mean     n n_missing     sd    se    me\n",
-      " %6.3f %5s         %d %6.3f %5.3f %5.3f\n",
-      "   diff    se t.value p.value   df   2.5%%  97.5%%    \n",
-      " %6.3f %5.3f %6.3f %8s %5.0f %6.3f %6.3f %s\n",
-      "Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1"
-    ),
-    dataset_name, var_name, conf_level, var_name, result$null.value,
-    var_name, result$null.value, mean_val, format(n_val, big.mark = ","),
-    n_missing, sd_val, se_val, me_val, diff_val, se_val, t_val, p_val,
-    df_val, ci_lower, ci_upper, signif
-    )
-    return(text)
-  }
-  
-  # 3. 独立样本t检验（htest类）
-  if (inherits(result, "htest") && grepl("Welch Two Sample t-test|Two Sample t-test", result$method)) {
-    # 提取分组变量和数值变量
-    data_parts <- strsplit(result$data.name, " ~ ")[[1]]
-    var_name <- data_parts[1]
-    group_var <- data_parts[2]
-    group_vals <- levels(factor(df[[group_var]]))
-    # 分组统计
-    group1_data <- df[[var_name]][df[[group_var]] == group_vals[1]]
-    group2_data <- df[[var_name]][df[[group_var]] == group_vals[2]]
-    mean1 <- round(mean(group1_data, na.rm = TRUE), 3)
-    mean2 <- round(mean(group2_data, na.rm = TRUE), 3)
-    n1 <- length(na.omit(group1_data))
-    n2 <- length(na.omit(group2_data))
-    n_missing1 <- sum(is.na(group1_data))
-    n_missing2 <- sum(is.na(group2_data))
-    sd1 <- round(sd(group1_data, na.rm = TRUE), 3)
-    sd2 <- round(sd(group2_data, na.rm = TRUE), 3)
-    se1 <- round(sd1 / sqrt(n1), 3)
-    se2 <- round(sd2 / sqrt(n2), 3)
-    me1 <- round(se1 * qt(0.975, df = n1 - 1), 3)
-    me2 <- round(se2 * qt(0.975, df = n2 - 1), 3)
-    # 检验结果
-    diff_val <- round(result$estimate[1] - result$estimate[2], 3)
-    t_val <- round(result$statistic, 3)
-    p_val <- if (result$p.value < 0.001) "< .001" else round(result$p.value, 4)
-    df_val <- round(result$parameter, 0)
-    ci_lower <- round(result$conf.int[1], 3)
-    ci_upper <- round(result$conf.int[2], 3)
-    conf_level <- round(attr(result$conf.int, "conf.level") * 100, 1)
-    signif <- if (p_val == "< .001") "***" else if (p_val <= 0.001) "***" else if (p_val <= 0.01) "**" else if (p_val <= 0.05) "*" else if (p_val <= 0.1) "." else " "
-    
-    # 组织文本
-    text <- sprintf(paste0(
-      "Pairwise mean comparisons (t-test)\n",
-      "Data      : %s \n",
-      "Variables : %s, %s \n",
-      "Samples   : independent \n",
-      "Confidence: %.1f%% \n",
-      "Adjustment: None \n",
-      "           mean     n n_missing     sd    se    me\n",
-      " %6s %6.3f %5s         %d %6.3f %5.3f %5.3f\n",
-      " %6s %6.3f %5s         %d %6.3f %5.3f %5.3f\n",
-      " Null hyp.               Alt. hyp.                  diff  p.value    \n",
-      " %s = %s   %s not equal to %s   %6.3f %8s  %s\n",
-      "Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1"
-    ),
-    dataset_name, group_var, var_name, conf_level,
-    group_vals[1], mean1, format(n1, big.mark = ","), n_missing1, sd1, se1, me1,
-    group_vals[2], mean2, format(n2, big.mark = ","), n_missing2, sd2, se2, me2,
-    group_vals[1], group_vals[2], group_vals[1], group_vals[2], diff_val, p_val, signif
-    )
-    return(text)
-  }
-  
-  # 4. 线性回归（lm类）
-  if (inherits(result, "lm")) {
-    # 提取模型信息
-    model_formula <- as.character(result$call$formula)
-    var_dep <- model_formula[2]
-    vars_indep <- gsub("^ ~ ", "", model_formula[1])
-    # 系数表
-    coef_sum <- summary(result)$coefficients
-    coef_text <- "            Estimate Std.Error t.value p.value    Signif.\n"
-    for (i in 1:nrow(coef_sum)) {
-      term <- rownames(coef_sum)[i]
-      est <- round(coef_sum[i, 1], 4)
-      se <- round(coef_sum[i, 2], 4)
-      t_val <- round(coef_sum[i, 3], 3)
-      p_val <- if (coef_sum[i, 4] < 0.001) "< .001" else round(coef_sum[i, 4], 4)
-      signif <- if (p_val == "< .001") "***" else if (p_val <= 0.001) "***" else if (p_val <= 0.01) "**" else if (p_val <= 0.05) "*" else if (p_val <= 0.1) "." else " "
-      coef_text <- paste0(coef_text, sprintf(" %12s %8.4f %8.4f %6.3f %8s  %3s\n", term, est, se, t_val, p_val, signif))
-    }
-    # 模型拟合度
-    r2 <- round(summary(result)$r.squared, 4)
-    r2_adj <- round(summary(result)$adj.r.squared, 4)
-    n_val <- nrow(result$model)
-    n_missing <- nrow(df) - n_val
-    
-    # 组织文本
-    text <- sprintf(paste0(
-      "Linear Regression Model\n",
-      "Data      : %s \n",
-      "Formula   : %s ~ %s \n",
-      "Dependent : %s \n",
-      "Independent: %s \n",
-      "Sample    : n = %s, n_missing = %d \n",
-      "Fit       : R² = %.4f, Adjusted R² = %.4f \n",
-      "%s",
-      "Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1"
-    ),
-    dataset_name, var_dep, vars_indep, var_dep, vars_indep,
-    format(n_val, big.mark = ","), n_missing, r2, r2_adj, coef_text
-    )
-    return(text)
-  }
-  
-  # 5. 其他统计方法（卡方检验、方差分析等，按示例风格扩展）
-  if (inherits(result, "htest") && grepl("Chi-squared test", result$method)) {
-    # 卡方检验文本格式化（类似逻辑，提取列联表、卡方值、p值等）
-    data_parts <- strsplit(result$data.name, " ~ ")[[1]]
-    var1 <- data_parts[1]
-    var2 <- data_parts[2]
-    contingency <- table(df[[var1]], df[[var2]], useNA = "no")
-    chi_val <- round(result$statistic, 3)
-    df_val <- result$parameter
-    p_val <- if (result$p.value < 0.001) "< .001" else round(result$p.value, 4)
-    signif <- if (p_val == "< .001") "***" else if (p_val <= 0.001) "***" else if (p_val <= 0.01) "**" else if (p_val <= 0.05) "*" else if (p_val <= 0.1) "." else " "
-    
-    text <- sprintf(paste0(
-      "Chi-squared Test of Independence\n",
-      "Data      : %s \n",
-      "Variables : %s (row), %s (column) \n",
-      "Contingency Table:\n%s\n",
-      "Test Statistic: Chi-squared = %.3f, df = %.0f, p-value = %s %s\n",
-      "Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1"
-    ),
-    dataset_name, var1, var2, capture.output(print(contingency)),
-    chi_val, df_val, p_val, signif
-    )
-    return(text)
-  }
-  
-  # 6. 直接输出原始结果
-  return(capture.output(print(result)))
-}
-
 ## ==================== 右下角浮框 ====================
 ui_metrics_progress <- tags$div(
  id = "metrics_progress_box",
@@ -188,7 +12,7 @@ ui_metrics_progress <- tags$div(
           background:#f5f5f5; color:#333;
           border:1px solid #337ab7; border-radius:4px;
           padding:10px 15px; box-shadow:0 2px 8px rgba(0,0,0,.25);",
-  tags$strong(i18n$t("AI calculating...")),
+  tags$strong(i18n$t("AI generating...")),
  tags$div(class = "progress",
           tags$div(class = "progress-bar progress-bar-striped active",
                    style = "width:100%"))
@@ -341,12 +165,12 @@ observeEvent(input$metrics_run_code, {
    # 分支2：所有统计结果转为结构化文本
    else {
      r_values$metrics_result_type <- "text"
-      # 调用格式化函数，生成示例风格的文本
-      formatted_text <- format_stat_text(result, dataset_name)
-      # 输出文本（保留换行和空格对齐）
-      output$metrics_result_text <- renderText(paste(formatted_text, collapse = "\n"))
+      if (is.null(result) || result == "") {
+        output$metrics_result_text <- renderText(i18n$t("No output generated."))
+      } else {
+        output$metrics_result_text <- renderText(result)
+      }
    }
-    
    r_values$metrics_result_ready <- TRUE
  }, error = function(e) {
    r_values$metrics_result_type <- "error"

--- a/radiant.quickgen/inst/app/tools/help/quickgen_metrics.md
+++ b/radiant.quickgen/inst/app/tools/help/quickgen_metrics.md
-xxxxxx
\ No newline at end of file
+> 大模型生成描述性统计（指标）
+
+## 使用方法
+
+以下是 `大模型生成描述性统计`的使用方法。
+
+1.`数据对象`必须要有，且必须和数据集中的名字一致（大小写也一致）
+
+2.`统计方法`必须要有，比如t检验、卡方检验等。
+
+3.`生成代码`如果有误，或者想要修改，可以点击`编辑`按钮对R代码进行修改，保存后点击`运行`按钮即可。
+
+## 示例
+
+**1. 单变量推断**
+
+- 对 `age` 进行单样本 t 检验，原假设总体均值为 35。
+
+- 检验 `bmi` 的平均值是否显著高于 25（单侧 t 检验）。
+
+**2. 两组比较**
+
+- 比较 `smoker` = "yes" 和 "no" 两组的 `charges` 均值差异（独立样本 t 检验）。
+- 使用 Wilcoxon 秩和检验，比较男性与女性（`sex`）的 `bmi` 中位数是否不同。
+
+**3.分类变量关联**
+
+- 执行卡方独立性检验，判断 `sex` 与 `smoker` 是否相关。
+- 分析 `region` 和 `smoker` 的列联表，进行卡方检验。
+
+**4.多组比较**
+
+- 对 `charges` 按 `region` 分组进行单因素方差分析（one-way ANOVA）。
+- 使用 Kruskal-Wallis 检验，比较不同 `children` 数量（0~5）对应的 `charges` 分布差异。
+
+**5.相关与回归**
+
+- 计算 `age` 与 `charges` 的 Pearson 相关系数并检验显著性。
+- 拟合线性回归模型、以 `charges` 为因变量，`age`、`bmi`、`children` 和 `smoker` 为自变量。
+- 在控制 `age` 和 `bmi` 的情况下，检验 `smoker` 对 `charges` 的偏回归系数是否显著（多元线性回归）。
+
+**6.交互效应**
+
+- 通过双因素方差分析，检验 `smoker` 与 `sex` 对 `charges` 是否存在交互作用。
\ No newline at end of file