descr_out <- function(descr, ret_type = "html") {
## if there is no data description
if (is.empty(descr)) {
return("")
}
## if there is a data description and we want html output
if (ret_type == "html") {
markdown::mark_html(text = descr, template = FALSE, meta = list(css = ""), output = FALSE)
} else {
descr
}
}
## create an empty data.frame and return error message as description
upload_error_handler <- function(objname, ret) {
r_data[[objname]] <- data.frame(matrix(rep("", 12), nrow = 2), stringsAsFactors = FALSE) %>%
set_attr("description", ret)
}
load_csv <- function(file, delim = ",", col_names = TRUE, dec = ".",
n_max = Inf, saf = TRUE, safx = 30) {
n_max <- if (is_not(n_max) || n_max < 0) Inf else n_max
dataset <- sshhr(try(
readr::read_delim(
file,
delim = delim, locale = readr::locale(decimal_mark = dec, grouping_mark = delim),
col_names = col_names, n_max = n_max, trim_ws = TRUE
),
silent = TRUE
))
if (inherits(dataset, "try-error")) {
i18n$t("#### There was an error loading the data. Please make sure the data are in csv format")
} else {
prb <- readr::problems(dataset)
if (nrow(prb) > 0) {
tab_big <- "class='table table-condensed table-hover' style='width:70%;'"
rprob <- knitr::kable(
prb[1:(min(nrow(prb):10)), , drop = FALSE],
align = "l",
format = "html",
table.attr = tab_big,
caption = i18n$t("Read issues (max 10 rows shown):")
)
} else {
rprob <- ""
}
if (saf) dataset <- to_fct(dataset, safx)
as.data.frame(dataset, stringsAsFactors = FALSE) %>%
{
set_colnames(., fix_names(colnames(.)))
} %>%
set_attr("description", rprob)
}
}
load_user_data <- function(fname, uFile, ext, header = TRUE,
man_str_as_factor = TRUE, sep = ",",
dec = ".", n_max = Inf, xlsx_sheet = 1, xlsx_header = TRUE) {
filename <- basename(fname)
fext <- tools::file_ext(filename) %>% tolower()
## switch extension if needed
ext <- case_when(
fext == ext ~ ext,
fext == "rdata" ~ "rdata",
fext == "rds" && ext == "rda" ~ "rds",
fext == "rda" && ext == "rds" ~ "rda",
fext == "txt" && ext == "csv" ~ "txt",
fext == "tsv" && ext == "csv" ~ "tsv",
fext %in% c("xls", "xlsx") ~ "xlsx",
TRUE ~ ext
)
## objname is used as the name of the data.frame, make case insensitive
objname <- sub(glue("\\.{ext}$"), "", filename, ignore.case = TRUE)
## if ext isn't in the filename nothing was replaced and so ...
if (objname == filename && !fext %in% c("xls", "xlsx")) {
ret <- glue(i18n$t('#### The filename extension "{fext}" does not match the specified \\
file-type "{ext}". Please specify the file type you are trying to upload'))
upload_error_handler(objname, ret)
ext <- "---"
}
ldir <- getOption("radiant.launch_dir", default = radiant.data::find_home())
pdir <- getOption("radiant.project_dir", default = ldir)
cmd <- NULL
pp <- suppressMessages(
radiant.data::parse_path(
uFile,
pdir = pdir,
chr = "\"",
mess = FALSE
)
)
## can't have spaces, dashes, etc. in objectname
objname <- radiant.data::fix_names(objname)
if (ext %in% c("rda", "rdata")) {
## objname will hold the name of the object(s) inside the R datafile
robjname <- try(load(uFile), silent = TRUE)
if (inherits(robjname, "try-error")) {
upload_error_handler(objname, i18n$t("#### There was an error loading the data. Please make sure the data are in rda format."))
} else if (length(robjname) > 1) {
if (sum(robjname %in% c("r_state", "r_data", "r_info")) > 1) {
upload_error_handler(objname, i18n$t("#### To restore state select 'radiant state file' from the 'Load data of type' drowdown before loading the file"))
## need to remove the local copies of r_state, r_data, and r_info
suppressWarnings(rm(r_state, r_data, r_info))
} else {
upload_error_handler(objname, i18n$t("#### More than one R object contained in the data."))
}
} else {
r_data[[objname]] <- as.data.frame(get(robjname), stringsAsFactors = FALSE)
cmd <- glue("{objname} <- load({pp$rpath}) %>% get()")
}
} else if (ext == "rds") {
## objname will hold the name of the object(s) inside the R datafile
robj <- try(readRDS(uFile), silent = TRUE)
if (inherits(robj, "try-error")) {
upload_error_handler(objname, i18n$t("#### There was an error loading the data. Please make sure the data are in rds format."))
} else {
r_data[[objname]] <- as.data.frame(robj, stringsAsFactors = FALSE)
cmd <- glue("{objname} <- readr::read_rds({pp$rpath})")
}
} else if (ext == "parquet") {
if (!requireNamespace("arrow", quietly = TRUE)) {
stop(i18n$t("The 'arrow' package is not installed. Please install it and try again."))
upload_error_handler(objname, i18n$t("#### The arrow package required to work with data in parquet format is not installed. Please use install.packages('arrow')"))
} else {
robj <- arrow::read_parquet(uFile) # %>% set_attr("description", feather::feather_metadata(uFile)$description)
if (inherits(robj, "try-error")) {
upload_error_handler(objname, i18n$t("#### There was an error loading the data. Please make sure the data are in parquet format."))
} else {
r_data[[objname]] <- as.data.frame(robj, stringsAsFactors = FALSE)
cmd <- glue("{objname} <- arrow::read_parquet({pp$rpath})")
}
}
}else if (ext == "xlsx") {
if (!requireNamespace("readxl", quietly = TRUE)) {
upload_error_handler(objname, i18n$t("#### 需要安装 readxl 包"))
return()
}
# 1. 获取所有 Sheet
all_sheets <- try(readxl::excel_sheets(path = uFile), silent = TRUE)
if (inherits(all_sheets, "try-error")) {
upload_error_handler(objname, i18n$t("#### 无法解析 Excel 文件结构"))
return()
}
loaded_count <- 0
# 2. 循环处理
for (sheet_name in all_sheets) {
# 2.1 准备合法的变量名
clean_name <- make.names(sheet_name)
if (clean_name == "") clean_name <- paste0("Sheet_", sample(1000, 1))
# 2.2 读取数据
raw_data <- try(readxl::read_excel(
path = uFile,
sheet = sheet_name,
col_names = xlsx_header
), silent = TRUE)
# 2.3 卫语句:跳过无效数据(空表或读取错误)
if (inherits(raw_data, "try-error") || !is.data.frame(raw_data) || ncol(raw_data) == 0) {
next
}
# 2.4 [关键] 立即执行 Radiant 标准数据清洗流程
# 必须在循环内做,因为我们不会走到函数底部的通用处理逻辑了
try({
# 清洗列名
colnames(raw_data) <- radiant.data::fix_names(colnames(raw_data))
# 转换因子
if (man_str_as_factor) raw_data <- radiant.data::to_fct(raw_data)
}, silent = TRUE)
# 2.5 入库
r_data[[clean_name]] <- as.data.frame(raw_data, stringsAsFactors = FALSE)
# 2.6 生成并注册复现代码 (Reproducibility)
# 注意:这里直接注册 register,不需要 cmd 变量在外部累加
curr_cmd <- glue::glue('
{clean_name} <- readxl::read_excel("{uFile}", sheet = "{sheet_name}", col_names = {xlsx_header}) %>%
as.data.frame(stringsAsFactors = FALSE) %>%
fix_names()
{if (man_str_as_factor) paste0(clean_name, " <- radiant.data::to_fct(", clean_name, ")") else ""}
register("{clean_name}")
')
# 2.7 更新系统元数据 (Mimic bottom logic)
if (exists(clean_name, envir = r_data) && !bindingIsActive(as.symbol(clean_name), env = r_data)) {
shiny::makeReactiveBinding(clean_name, env = r_data)
}
r_info[[glue("{clean_name}_lcmd")]] <- curr_cmd
r_info[["datasetlist"]] <- unique(c(clean_name, r_info[["datasetlist"]]))
loaded_count <- loaded_count + 1
}
if (loaded_count == 0) {
upload_error_handler(objname, i18n$t("#### 文件中未发现有效的表格数据"))
}
# 直接返回
return()
}else if (ext %in% c("tsv", "csv", "txt")) {
r_data[[objname]] <- load_csv(
uFile,
delim = sep, col_names = header, n_max = n_max,
dec = dec, saf = man_str_as_factor
) %>%
(function(x) if (is.character(x)) upload_error_handler(objname, i18n$t("#### There was an error loading the data")) else x)
n_max <- if (is_not(n_max) || n_max < 0) Inf else n_max
if (ext == "csv" && sep == "," && dec == "." && header) {
cmd <- glue("{objname} <- readr::read_csv({pp$rpath}, n_max = {n_max})")
} else {
cmd <- glue('
{objname} <- readr::read_delim(
{pp$rpath},
delim = "{sep}", col_names = {header}, n_max = {n_max},
locale = readr::locale(decimal_mark = "{dec}", grouping_mark = "{sep}")
)')
}
## make sure all columns names are "fixed"
cmd <- paste0(cmd, " %>%\n fix_names()")
if (man_str_as_factor) cmd <- paste0(cmd, " %>%\n to_fct()")
} else if (ext != "---") {
ret <- glue(i18n$t("#### The selected filetype is not currently supported ({fext})"))
upload_error_handler(objname, ret)
}
# ===========================================================================
# 下面的代码只为单数据集流程(CSV, RDS, parquet)服务
# XLSX 流程已经在上面 return() 了,不会执行到这里
# ===========================================================================
if (exists(objname, envir = r_data) && !bindingIsActive(as.symbol(objname), env = r_data)) {
shiny::makeReactiveBinding(objname, env = r_data)
}
r_info[[glue("{objname}_descr")]] <- attr(r_data[[objname]], "description")
if (!is.empty(cmd)) {
cn <- colnames(r_data[[objname]])
fn <- radiant.data::fix_names(cn)
if (!identical(cn, fn)) {
colnames(r_data[[objname]]) <- fn
cmd <- paste0(cmd, " %>%\n fix_names()")
}
cmd <- glue('{cmd}\nregister("{objname}")')
}
r_info[[glue("{objname}_lcmd")]] <- cmd
r_info[["datasetlist"]] <- c(objname, r_info[["datasetlist"]]) %>% unique()
}
load_description <- function(fname, uFile, objname) {
ldir <- getOption("radiant.launch_dir", default = radiant.data::find_home())
pdir <- getOption("radiant.project_dir", default = ldir)
cmd <- NULL
pp <- suppressMessages(
radiant.data::parse_path(
uFile,
pdir = pdir,
chr = "\"",
mess = FALSE
)
)
descr <- readLines(pp$path, warn = FALSE) %>% paste0(collapse = "\n")
cmd <- glue("register(\"{objname}\", descr = paste0(readLines({pp$rpath}, warn = FALSE), collapse = \"\\n\"))")
attr(r_data[[objname]], "description") <- descr
r_info[[glue("{objname}_descr")]] <- descr
r_info[[glue("{objname}_lcmd")]] <- sub(glue('register("{objname}")'), cmd, r_info[[glue("{objname}_lcmd")]], fixed = TRUE)
}