descr_out <- function(descr, ret_type = "html") { ## if there is no data description if (is.empty(descr)) { return("") } ## if there is a data description and we want html output if (ret_type == "html") { markdown::mark_html(text = descr, template = FALSE, meta = list(css = ""), output = FALSE) } else { descr } } ## create an empty data.frame and return error message as description upload_error_handler <- function(objname, ret) { r_data[[objname]] <- data.frame(matrix(rep("", 12), nrow = 2), stringsAsFactors = FALSE) %>% set_attr("description", ret) } load_csv <- function(file, delim = ",", col_names = TRUE, dec = ".", n_max = Inf, saf = TRUE, safx = 30) { n_max <- if (is_not(n_max) || n_max < 0) Inf else n_max dataset <- sshhr(try( readr::read_delim( file, delim = delim, locale = readr::locale(decimal_mark = dec, grouping_mark = delim), col_names = col_names, n_max = n_max, trim_ws = TRUE ), silent = TRUE )) if (inherits(dataset, "try-error")) { i18n$t("#### There was an error loading the data. Please make sure the data are in csv format") } else { prb <- readr::problems(dataset) if (nrow(prb) > 0) { tab_big <- "class='table table-condensed table-hover' style='width:70%;'" rprob <- knitr::kable( prb[1:(min(nrow(prb):10)), , drop = FALSE], align = "l", format = "html", table.attr = tab_big, caption = i18n$t("Read issues (max 10 rows shown):") ) } else { rprob <- "" } if (saf) dataset <- to_fct(dataset, safx) as.data.frame(dataset, stringsAsFactors = FALSE) %>% { set_colnames(., fix_names(colnames(.))) } %>% set_attr("description", rprob) } } load_user_data <- function(fname, uFile, ext, header = TRUE, man_str_as_factor = TRUE, sep = ",", dec = ".", n_max = Inf, xlsx_sheet = 1, xlsx_header = TRUE) { filename <- basename(fname) fext <- tools::file_ext(filename) %>% tolower() ## switch extension if needed ext <- case_when( fext == ext ~ ext, fext == "rdata" ~ "rdata", fext == "rds" && ext == "rda" ~ "rds", fext == "rda" && ext == "rds" ~ "rda", fext == "txt" && ext == "csv" ~ "txt", fext == "tsv" && ext == "csv" ~ "tsv", fext %in% c("xls", "xlsx") ~ "xlsx", TRUE ~ ext ) ## objname is used as the name of the data.frame, make case insensitive objname <- sub(glue("\\.{ext}$"), "", filename, ignore.case = TRUE) ## if ext isn't in the filename nothing was replaced and so ... if (objname == filename && !fext %in% c("xls", "xlsx")) { ret <- glue(i18n$t('#### The filename extension "{fext}" does not match the specified \\ file-type "{ext}". Please specify the file type you are trying to upload')) upload_error_handler(objname, ret) ext <- "---" } ldir <- getOption("radiant.launch_dir", default = radiant.data::find_home()) pdir <- getOption("radiant.project_dir", default = ldir) cmd <- NULL pp <- suppressMessages( radiant.data::parse_path( uFile, pdir = pdir, chr = "\"", mess = FALSE ) ) ## can't have spaces, dashes, etc. in objectname objname <- radiant.data::fix_names(objname) if (ext %in% c("rda", "rdata")) { ## objname will hold the name of the object(s) inside the R datafile robjname <- try(load(uFile), silent = TRUE) if (inherits(robjname, "try-error")) { upload_error_handler(objname, i18n$t("#### There was an error loading the data. Please make sure the data are in rda format.")) } else if (length(robjname) > 1) { if (sum(robjname %in% c("r_state", "r_data", "r_info")) > 1) { upload_error_handler(objname, i18n$t("#### To restore state select 'radiant state file' from the 'Load data of type' drowdown before loading the file")) ## need to remove the local copies of r_state, r_data, and r_info suppressWarnings(rm(r_state, r_data, r_info)) } else { upload_error_handler(objname, i18n$t("#### More than one R object contained in the data.")) } } else { r_data[[objname]] <- as.data.frame(get(robjname), stringsAsFactors = FALSE) cmd <- glue("{objname} <- load({pp$rpath}) %>% get()") } } else if (ext == "rds") { ## objname will hold the name of the object(s) inside the R datafile robj <- try(readRDS(uFile), silent = TRUE) if (inherits(robj, "try-error")) { upload_error_handler(objname, i18n$t("#### There was an error loading the data. Please make sure the data are in rds format.")) } else { r_data[[objname]] <- as.data.frame(robj, stringsAsFactors = FALSE) cmd <- glue("{objname} <- readr::read_rds({pp$rpath})") } } else if (ext == "parquet") { if (!requireNamespace("arrow", quietly = TRUE)) { stop(i18n$t("The 'arrow' package is not installed. Please install it and try again.")) upload_error_handler(objname, i18n$t("#### The arrow package required to work with data in parquet format is not installed. Please use install.packages('arrow')")) } else { robj <- arrow::read_parquet(uFile) # %>% set_attr("description", feather::feather_metadata(uFile)$description) if (inherits(robj, "try-error")) { upload_error_handler(objname, i18n$t("#### There was an error loading the data. Please make sure the data are in parquet format.")) } else { r_data[[objname]] <- as.data.frame(robj, stringsAsFactors = FALSE) cmd <- glue("{objname} <- arrow::read_parquet({pp$rpath})") } } }else if (ext == "xlsx") { if (!requireNamespace("readxl", quietly = TRUE)) { upload_error_handler(objname, i18n$t("#### 需要安装 readxl 包")) return() } # 1. 获取所有 Sheet all_sheets <- try(readxl::excel_sheets(path = uFile), silent = TRUE) if (inherits(all_sheets, "try-error")) { upload_error_handler(objname, i18n$t("#### 无法解析 Excel 文件结构")) return() } loaded_count <- 0 # 2. 循环处理 for (sheet_name in all_sheets) { # 2.1 准备合法的变量名 clean_name <- make.names(sheet_name) if (clean_name == "") clean_name <- paste0("Sheet_", sample(1000, 1)) # 2.2 读取数据 raw_data <- try(readxl::read_excel( path = uFile, sheet = sheet_name, col_names = xlsx_header ), silent = TRUE) # 2.3 卫语句:跳过无效数据(空表或读取错误) if (inherits(raw_data, "try-error") || !is.data.frame(raw_data) || ncol(raw_data) == 0) { next } # 2.4 [关键] 立即执行 Radiant 标准数据清洗流程 # 必须在循环内做,因为我们不会走到函数底部的通用处理逻辑了 try({ # 清洗列名 colnames(raw_data) <- radiant.data::fix_names(colnames(raw_data)) # 转换因子 if (man_str_as_factor) raw_data <- radiant.data::to_fct(raw_data) }, silent = TRUE) # 2.5 入库 r_data[[clean_name]] <- as.data.frame(raw_data, stringsAsFactors = FALSE) # 2.6 生成并注册复现代码 (Reproducibility) # 注意:这里直接注册 register,不需要 cmd 变量在外部累加 curr_cmd <- glue::glue(' {clean_name} <- readxl::read_excel("{uFile}", sheet = "{sheet_name}", col_names = {xlsx_header}) %>% as.data.frame(stringsAsFactors = FALSE) %>% fix_names() {if (man_str_as_factor) paste0(clean_name, " <- radiant.data::to_fct(", clean_name, ")") else ""} register("{clean_name}") ') # 2.7 更新系统元数据 (Mimic bottom logic) if (exists(clean_name, envir = r_data) && !bindingIsActive(as.symbol(clean_name), env = r_data)) { shiny::makeReactiveBinding(clean_name, env = r_data) } r_info[[glue("{clean_name}_lcmd")]] <- curr_cmd r_info[["datasetlist"]] <- unique(c(clean_name, r_info[["datasetlist"]])) loaded_count <- loaded_count + 1 } if (loaded_count == 0) { upload_error_handler(objname, i18n$t("#### 文件中未发现有效的表格数据")) } # 直接返回 return() }else if (ext %in% c("tsv", "csv", "txt")) { r_data[[objname]] <- load_csv( uFile, delim = sep, col_names = header, n_max = n_max, dec = dec, saf = man_str_as_factor ) %>% (function(x) if (is.character(x)) upload_error_handler(objname, i18n$t("#### There was an error loading the data")) else x) n_max <- if (is_not(n_max) || n_max < 0) Inf else n_max if (ext == "csv" && sep == "," && dec == "." && header) { cmd <- glue("{objname} <- readr::read_csv({pp$rpath}, n_max = {n_max})") } else { cmd <- glue(' {objname} <- readr::read_delim( {pp$rpath}, delim = "{sep}", col_names = {header}, n_max = {n_max}, locale = readr::locale(decimal_mark = "{dec}", grouping_mark = "{sep}") )') } ## make sure all columns names are "fixed" cmd <- paste0(cmd, " %>%\n fix_names()") if (man_str_as_factor) cmd <- paste0(cmd, " %>%\n to_fct()") } else if (ext != "---") { ret <- glue(i18n$t("#### The selected filetype is not currently supported ({fext})")) upload_error_handler(objname, ret) } # =========================================================================== # 下面的代码只为单数据集流程(CSV, RDS, parquet)服务 # XLSX 流程已经在上面 return() 了,不会执行到这里 # =========================================================================== if (exists(objname, envir = r_data) && !bindingIsActive(as.symbol(objname), env = r_data)) { shiny::makeReactiveBinding(objname, env = r_data) } r_info[[glue("{objname}_descr")]] <- attr(r_data[[objname]], "description") if (!is.empty(cmd)) { cn <- colnames(r_data[[objname]]) fn <- radiant.data::fix_names(cn) if (!identical(cn, fn)) { colnames(r_data[[objname]]) <- fn cmd <- paste0(cmd, " %>%\n fix_names()") } cmd <- glue('{cmd}\nregister("{objname}")') } r_info[[glue("{objname}_lcmd")]] <- cmd r_info[["datasetlist"]] <- c(objname, r_info[["datasetlist"]]) %>% unique() } load_description <- function(fname, uFile, objname) { ldir <- getOption("radiant.launch_dir", default = radiant.data::find_home()) pdir <- getOption("radiant.project_dir", default = ldir) cmd <- NULL pp <- suppressMessages( radiant.data::parse_path( uFile, pdir = pdir, chr = "\"", mess = FALSE ) ) descr <- readLines(pp$path, warn = FALSE) %>% paste0(collapse = "\n") cmd <- glue("register(\"{objname}\", descr = paste0(readLines({pp$rpath}, warn = FALSE), collapse = \"\\n\"))") attr(r_data[[objname]], "description") <- descr r_info[[glue("{objname}_descr")]] <- descr r_info[[glue("{objname}_lcmd")]] <- sub(glue('register("{objname}")'), cmd, r_info[[glue("{objname}_lcmd")]], fixed = TRUE) }